In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import CellAlignDTW
from CellAlignDTW.pl import *
import gseapy as gp
import seaborn as sns
from matplotlib.colors import LogNorm

In [None]:
summary_df = pd.read_csv(summary_df_path, index_col=0)
gene_curves = pd.read_csv(aggregated_curves_path, index_col=0)
scores_df = pd.read_csv(scores_df_path, index_col=0)
clusters = cluster_ordering.split("_")
color_map = {'NSCLC': 'gold',
             'SCLC-A': 'tab:red',
             'SCLC-N': 'tab:cyan',
             'SCLC-AN': 'black'}
colors = [color_map[x] for x in clusters]
scores_df.head(n = 20)

In [None]:
sorted_gene_curve, row_colors, col_colors, categories = CellAlignDTW.process_gene_data(scores_df, gene_curves, colors)

In [None]:

plot_kshape_clustering(sorted_gene_curve, categories)

In [None]:
CellAlignDTW.pl.plot_gene_clusters(sorted_gene_curve, row_colors, col_colors, cluster_ordering)

In [None]:
gene_info = pd.DataFrame({'gene': sorted_gene_curve.index, 'category': categories})

early_gene_list = gene_info.gene[gene_info.category == 'early']
intermediate_gene_list = gene_info.gene[gene_info.category == 'intermediate']
late_gene_list = gene_info.gene[gene_info.category == 'late']

gene_sets = ['CellMarker_2024', 'CellMarker_Augmented_2021', 'PanglaoDB_Augmented_2021', 'HuBMAP_ASCTplusB_augmented_2022']

enr = gp.enrichr(gene_list=early_gene_list, # or "./tests/data/gene_list.txt",
                 gene_sets=gene_sets,
                 organism='human', # don't forget to set organism to the one you desired! e.g. Yeast
                 outdir=None, # don't write to disk
                 cutoff = 1
                )
early_results = enr.results

enr = gp.enrichr(gene_list=intermediate_gene_list, # or "./tests/data/gene_list.txt",
                 gene_sets=gene_sets,
                 organism='human', # don't forget to set organism to the one you desired! e.g. Yeast
                 outdir=None, # don't write to disk
                 cutoff = 1
                )
intermediate_results = enr.results

enr = gp.enrichr(gene_list=late_gene_list, # or "./tests/data/gene_list.txt",
                 gene_sets=gene_sets,
                 organism='human', # don't forget to set organism to the one you desired! e.g. Yeast
                 outdir=None, # don't write to disk
                 cutoff = 1
                )
late_results = enr.results

early_results.loc[:,'category'] = 'early'
intermediate_results.loc[:,'category'] = 'intermediate'
late_results.loc[:,'category'] = 'late'
results = pd.concat([early_results, intermediate_results, late_results])

In [None]:
terms = ['Secretory Cell Lung Human', 'Krt4/13+ Cell Trachea Mouse', 'Basal Cell Of Prostatic urethra - Prostate Gland', 'Cancer Stem cell:Peripheral Blood', 
         'Cycling Basal Cell Trachea Mouse', 'Vascular Stem cell:Blood', 'Neural Progenitor Cell Embryonic Prefrontal Cortex Human', 'Neural Stem Cell Brain Mouse', 
         'Immature neuron:Undefined', 'Neuroendocrine Cell Trachea Mouse', 'Endothelial Cell Fetal Gonad Human']
heatmap_long = results.loc[np.isin(results.Term, terms), ['Term', 'category', 'Odds Ratio', 'Adjusted P-value']]
heatmap_long['Term'] = pd.Categorical(heatmap_long['Term'], categories=terms[::-1], ordered=True)
heatmap_long['category'] = pd.Categorical(heatmap_long['category'], categories=['early', 'intermediate', 'late'], ordered=True)

plt.figure(figsize=(8, 4))
sns.scatterplot(data=heatmap_long, x='category', y='Term', size='Odds Ratio', sizes=(20, 200), 
                hue='Adjusted P-value', palette='viridis_r', hue_norm=LogNorm())
plt.xticks(rotation=45)
plt.xlabel('')
plt.ylabel('')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.xlim(-0.5, len(heatmap_long['category'].unique()) - 0.5)
plt.ylim(-0.5, len(heatmap_long['Term'].unique()) - 0.5)
plt.tight_layout()
plt.show()

In [None]:
early_results.groupby("Gene_set").apply(lambda x: x.sort_values(by="P-value").head(5)).reset_index(drop=True)

In [None]:
intermediate_results.groupby("Gene_set").apply(lambda x: x.sort_values(by="P-value").head(5)).reset_index(drop=True)

In [None]:
late_results.groupby("Gene_set").apply(lambda x: x.sort_values(by="P-value").head(5)).reset_index(drop=True)