<a href="https://colab.research.google.com/github/TummalaSharmila/HT/blob/main/HI_A2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from scipy.optimize import curve_fit

# Defining the exponential decay function, which models the decay process
def exp_decay(time, N_0, k):
    return N_0 * np.exp(-k * time)

In [None]:
# Defining the function to calculate the half-life of a transcript based on decay data
def calculate_half_life(replicate_data, time_points):
    # Convert data to numeric, forcing errors to NaN
    replicate_data = pd.to_numeric(replicate_data, errors='coerce')
    # Replace non-positive values with NaN to avoid log(0) or negative inputs
    replicate_data[replicate_data <= 0] = np.nan
    # Return NaN if all values are NaN
    if replicate_data.isna().all():
        return np.nan
    # Replace NaNs with the minimum positive value to handle log(0)
    min_positive = replicate_data[replicate_data > 0].min()
    replicate_data.fillna(min_positive, inplace=True)
    # Apply logarithm to the data
    log_data = np.log(replicate_data)
    try:
        # Curve fitting with bounds to ensure positive parameters
        params, _ = curve_fit(exp_decay, time_points, log_data, bounds=(0, np.inf))
        N_0, k = params
        # Calculate half-life only if k is positive
        if k > 0:
            return np.log(2) / k
        else:
            return np.nan
    except:
        # Return NaN if curve fitting fails
        return np.nan

In [None]:
# Load the dataset from a text file, specifying tab as the delimiter
data = pd.read_csv('DecayTimecourse.txt', delimiter='\t')

# Saving the loaded data as a CSV file for further use
data.to_csv('DecayTimecourse.csv', index=False)

In [None]:
# Reloading the CSV file into a DataFrame
data = pd.read_csv("DecayTimecourse.csv")
# Initializing a list to store half-life calculations
half_lives = []
# Defining the time points for the decay measurements
time_points = np.array([0, 5, 10, 15, 20, 30, 40, 50, 60])

In [None]:
# Iterating over each row (transcript) in the dataset to calculate half-lives
for index, row in data.iterrows():
    transcript_id = row['Time course #']
    if transcript_id == 'YORF':
        continue

    replicate_half_lives = []
    for replicate_idx in range(3):
        # Calculating the start column for each replicate based on its position
        start_col = replicate_idx * 9 + 2
        # Extracting the replicate data for the current transcript
        replicate_data = row.iloc[start_col:start_col+9]
        # Calculating the half-life for the current replicate
        half_life = calculate_half_life(replicate_data, time_points)
        if not np.isnan(half_life):
            replicate_half_lives.append(half_life)
    # Calculating the average half-life if valid half-lives were found
    if replicate_half_lives:
        average_half_life = np.mean(replicate_half_lives)
        half_lives.append({'Transcript': transcript_id, 'Half_Life': average_half_life})

In [13]:
# Converting the list of half-lives into a DataFrame
half_lives_df = pd.DataFrame(half_lives)

# Specifying the output path and save the half-lives to a CSV file
output_path = 'Half_Lives_Calculated.csv'
half_lives_df.to_csv(output_path, index=False)

# Printing calculated half-lives
print("Calculated Half-Lives:")
for index, row in half_lives_df.iterrows():
    print(f"Transcript: {row['Transcript']}, Half-Life: {row['Half_Life']}")

print("\nHalf-Lives Calculated and saved to:", output_path)

# Loading the calculated half-lives from the saved CSV
half_lives_df = pd.read_csv('Half_Lives_Calculated.csv')

# Determining the top and bottom 10% thresholds for half-life
top_10_threshold = half_lives_df['Half_Life'].quantile(0.9)
bottom_10_threshold = half_lives_df['Half_Life'].quantile(0.1)

# Filtering transcripts based on the calculated thresholds
high_half_life_transcripts = half_lives_df[half_lives_df['Half_Life'] >= top_10_threshold]
low_half_life_transcripts = half_lives_df[half_lives_df['Half_Life'] <= bottom_10_threshold]

# Extracting gene identifiers for high and low half-life transcripts
high_half_life_genes = high_half_life_transcripts['Transcript'].tolist()
low_half_life_genes = low_half_life_transcripts['Transcript'].tolist()

# Printing the lists of high and low half-life gene identifiers
print(high_half_life_genes)
print(low_half_life_genes)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Transcript: YKR042W, Half-Life: 0.1357162131944521
Transcript: YMR083W, Half-Life: 0.1356274316918891
Transcript: YBR011C, Half-Life: 0.13565195218756634
Transcript: YGR282C, Half-Life: 0.13578130901589322
Transcript: YJL159W, Half-Life: 0.13610584710533546
Transcript: YER177W, Half-Life: 0.1356983273557203
Transcript: YJL158C, Half-Life: 0.13609089190741835
Transcript: YLR056W, Half-Life: 0.13713429323921345
Transcript: YIL051C, Half-Life: 0.13560480676759612
Transcript: YBR078W, Half-Life: 1.7715786825086053
Transcript: YDL137W, Half-Life: 0.13570993713443819
Transcript: YDR050C, Half-Life: 0.13692127010380678
Transcript: YLR043C, Half-Life: 0.2888126110997168
Transcript: YDR077W, Half-Life: 0.9736247139689749
Transcript: YGR285C, Half-Life: 0.1358414103492922
Transcript: YBL077W, Half-Life: 0.19005783251014685
Transcript: YER072W, Half-Life: 0.13719188678301258
Transcript: YLR354C, Half-Life: 0.13528715920042414
Transc

In [14]:
# Saving the lists of high and low half-life gene identifiers to text files
high_life_path = 'high_half_life_genes.txt'
low_life_path = 'low_half_life_genes.txt'

# Writing high half-life genes to a file
with open(high_life_path, 'w') as file:
    for gene in high_half_life_genes:
        file.write(gene + '\n')

# Writing low half-life genes to a file
with open(low_life_path, 'w') as file:
    for gene in low_half_life_genes:
        file.write(gene + '\n')