In [9]:
#Import necessary libraries
import pandas as pd
import numpy as np
from scipy.optimize import curve_fit

# Define a function for exponential decay
def exp_decay(t, a, b):
    return a * np.exp(-b * t)

#Load the dataset
excel_data = pd.read_csv('/content/DecayTimecourse.csv')
#Select columns related to time course data
timecourse_columns = [col for col in excel_data.columns if 'timecourse' in col.lower() or col.startswith('_')]
# Remove rows with all NaN values from selected columns
cleaned_data = excel_data.dropna(subset=timecourse_columns, how='all')

#Reshape the data into a long format
melted_data = pd.melt(cleaned_data, id_vars=['Time course #'], value_vars=timecourse_columns,
                      var_name='Time', value_name='Expression').dropna(subset=['Expression'])

# Extract numerical values from 'Time' column
melted_data['Time'] = melted_data['Time'].str.extract('(\d+)').astype(float)

# Calculate half-life for each gene
results = []
for gene, group in melted_data.groupby('Time course #'):
    if gene == 'YORF':
        continue
    time = group['Time'].values
    expression = group['Expression'].values
    try:
        # Optimize the curve fitting
        popt, _ = curve_fit(exp_decay, time, expression, p0=[1, 0.1], bounds=(0, [np.inf, 1]))
        half_life = np.log(2) / popt[1]
        results.append((gene, half_life))
    except RuntimeError:
        # Handle cases where curve fitting fails
        results.append((gene, np.nan))

# Convert results to DataFram
half_life_df = pd.DataFrame(results, columns=['Gene', 'Half_life'])

# Print gene names and corresponding half-lives
for index, row in half_life_df.iterrows():
    #Check if the half-life is a number (not NaN) before printing
        print(f"Gene: {row['Gene']}, Half-life: {row['Half_life']:.2f} minutes")




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Gene: YDR211W, Half-life: 6.65 minutes
Gene: YDR212W, Half-life: 16.52 minutes
Gene: YDR213W, Half-life: 7.38 minutes
Gene: YDR214W, Half-life: 12249580148820117504.00 minutes
Gene: YDR215C, Half-life: 46.82 minutes
Gene: YDR216W, Half-life: 114.88 minutes
Gene: YDR217C, Half-life: 9.23 minutes
Gene: YDR218C, Half-life: 23.80 minutes
Gene: YDR219C, Half-life: 20.42 minutes
Gene: YDR220C, Half-life: 45.12 minutes
Gene: YDR221W, Half-life: 8.05 minutes
Gene: YDR222W, Half-life: 15.29 minutes
Gene: YDR223W, Half-life: 18.17 minutes
Gene: YDR224C, Half-life: 6.05 minutes
Gene: YDR225W, Half-life: 4.42 minutes
Gene: YDR226W, Half-life: 23.22 minutes
Gene: YDR227W, Half-life: 13.78 minutes
Gene: YDR228C, Half-life: 8.89 minutes
Gene: YDR229W, Half-life: 7.68 minutes
Gene: YDR230W, Half-life: 16.56 minutes
Gene: YDR231C, Half-life: 11.78 minutes
Gene: YDR232W, Half-life: 7.31 minutes
Gene: YDR233C, Half-life: 6.19 minutes
Gene: 

In [10]:
# Calculate the 90th and 10th percentiles of half-life values
top_10_percentile = np.percentile(half_life_df['Half_life'].dropna(), 90)
bottom_10_percentile = np.percentile(half_life_df['Half_life'].dropna(), 10)


# Filter genes with half-lives in the top 10 percentile
top_genes = half_life_df[half_life_df['Half_life'] >= top_10_percentile]
# Filter genes with half-lives in the bottom 10 percentile
bottom_genes = half_life_df[half_life_df['Half_life'] <= bottom_10_percentile]

# Print genes in the top 10 percentile of half-lives
print("\nGenes in the top 10 percentile of half-lives:")
print(top_genes)

# Print genes in the bottom 10 percentile of half-lives
print("\nGenes in the bottom 10 percentile of half-lives:")
print(bottom_genes)






Genes in the top 10 percentile of half-lives:
         Gene     Half_life
4       Q0060  1.263103e+02
5       Q0065  2.034539e+02
6       Q0070  4.944645e+16
7       Q0075  6.368426e+01
9       Q0085  1.608512e+14
...       ...           ...
6129  YPR160W  9.744729e+17
6153  YPR184W  7.791942e+15
6160  YPR191W  6.503511e+01
6162  YPR193C  1.275343e+02
6169  YPR200C  2.543136e+02

[618 rows x 2 columns]

Genes in the bottom 10 percentile of half-lives:
         Gene  Half_life
40    YAL025C   7.655690
58    YAL041W   6.311052
60    YAL043C   6.775450
74    YAL058W   7.612465
97    YAR018C   6.015325
...       ...        ...
6156  YPR187W   4.286532
6159  YPR190C   5.880017
6171  YPR202W   0.693147
6172  YPR203W   2.777644
6173  YPR204W   1.376811

[618 rows x 2 columns]


In [11]:
# Calculate the value corresponding to the 90th percentile of half-life values
top_10_percentile_value = np.percentile(half_life_df['Half_life'].dropna(), 90)

# Filter the DataFrame to get the rows where the half-life is in the top 10 percentile
top_10_percentile_genes = half_life_df[half_life_df['Half_life'] >= top_10_percentile_value]

# Print the genes and their half-lives in the top 10 percentile
print("Genes in the top 10 percentile of half-lives:")
for index, row in top_10_percentile_genes.iterrows():
    print(f"Gene: {row['Gene']}, Half-life: {row['Half_life']}")


Genes in the top 10 percentile of half-lives:
Gene: Q0060, Half-life: 126.31033243343738
Gene: Q0065, Half-life: 203.4538792422616
Gene: Q0070, Half-life: 4.944644662758309e+16
Gene: Q0075, Half-life: 63.68425987828037
Gene: Q0085, Half-life: 160851161455548.6
Gene: Q0110, Half-life: 307.3550145784054
Gene: Q0115, Half-life: 12677061566789.248
Gene: Q0120, Half-life: 8.996543688461602e+16
Gene: Q0130, Half-life: 64.23578601990522
Gene: Q0140, Half-life: 228.5775249960317
Gene: YAL005C, Half-life: 4638470969418.576
Gene: YAL018C, Half-life: 158.72432250595557
Gene: YAL035C-A, Half-life: 63.36218525426081
Gene: YAL038W, Half-life: 377023281816958.9
Gene: YAL049C, Half-life: 136.88923027336713
Gene: YAL054C, Half-life: 84.12153183500055
Gene: YAL058C-A, Half-life: 185.79129664351342
Gene: YAL060W, Half-life: 5.171133031393551e+16
Gene: YAL062W, Half-life: 3716799365182.775
Gene: YAR003W, Half-life: 60007899306222.016
Gene: YAR010C, Half-life: 93.13018876859414
Gene: YAR023C, Half-life: 17

In [12]:
#Calculate the value corresponding to the 10th percentile of half-life values
bottom_10_percentile_value = np.percentile(half_life_df['Half_life'].dropna(), 10)

# Filter the DataFrame to get the rows where the half-life is in the bottom 10 percentile
bottom_10_percentile_genes = half_life_df[half_life_df['Half_life'] <= bottom_10_percentile_value]

# Print the genes and their half-lives in the bottom 10 percentile
print("Genes in the bottom 10 percentile of half-lives:")
for index, row in bottom_10_percentile_genes.iterrows():
    print(f"Gene: {row['Gene']}, Half-life: {row['Half_life']}")


Genes in the bottom 10 percentile of half-lives:
Gene: YAL025C, Half-life: 7.65568974934613
Gene: YAL041W, Half-life: 6.311052219705929
Gene: YAL043C, Half-life: 6.775449508815766
Gene: YAL058W, Half-life: 7.612465464567109
Gene: YAR018C, Half-life: 6.015324593227804
Gene: YBL003C, Half-life: 4.084742528094805
Gene: YBL011W, Half-life: 7.461126386360699
Gene: YBL015W, Half-life: 6.766758075255948
Gene: YBL016W, Half-life: 5.334894730328885
Gene: YBL035C, Half-life: 5.97524776408316
Gene: YBL039C, Half-life: 5.983286150759862
Gene: YBL042C, Half-life: 3.3728346114140146
Gene: YBL063W, Half-life: 2.9232772979032617
Gene: YBL080C, Half-life: 6.170076337968044
Gene: YBL081W, Half-life: 6.174296800067371
Gene: YBL093C, Half-life: 6.9871771718996945
Gene: YBL108W, Half-life: 5.557212004922275
Gene: YBL111C, Half-life: 1.0891347296324354
Gene: YBL112C, Half-life: 3.328618825483001
Gene: YBL113C, Half-life: 1.6356402295766248
Gene: YBR002C, Half-life: 6.2318076967286045
Gene: YBR004C, Half-lif

In [13]:
# Save the DataFrame of genes in the top 10 percentile of half-lives to a CSV file
top_10_percentile_genes.to_csv("/content/top_lives.csv", index=False)

# Save the DataFrame of genes in the bottom 10 percentile of half-lives to a CSV file
bottom_10_percentile_genes.to_csv("/content/bottom_lives.csv", index=False)