In [22]:
import anndata as ad
import scanpy as sc
from tqdm import tqdm
import pickle as pkl
import gc

In [23]:
adata_raw = ad.read_h5ad("../../../data/sciplex3.h5ad")

In [24]:
adata_raw

AnnData object with n_obs × n_vars = 581777 × 58347
    obs: 'cell_type', 'dose', 'dose_character', 'dose_pattern', 'g1s_score', 'g2m_score', 'pathway', 'pathway_level_1', 'pathway_level_2', 'product_dose', 'product_name', 'proliferation_index', 'replicate', 'size_factor', 'target', 'vehicle'
    var: 'id', 'num_cells_expressed-0-0', 'num_cells_expressed-1-0', 'num_cells_expressed-1'

In [25]:
sc.pp.filter_genes(adata_raw, min_cells=100)

## All DEGS

In [26]:
def calculate_degs(adata_pp, method='wilcoxon'):
    adata = adata_pp.copy()
    sc.pp.filter_genes(adata, min_cells = 10)
    sc.pp.normalize_total(adata, inplace=True)
    sc.pp.log1p(adata)
    adata.obs['drug_celltype_dose'] = adata.obs['cell_type'].astype(str) + "_" + adata.obs['product_name'].astype(
        str) + "_" + adata.obs['dose'].astype(str)

    results = dict()

    for condition in tqdm(adata.obs.drug_celltype_dose.unique()):
        adata_cond = adata[adata.obs.drug_celltype_dose == condition].copy()
        if adata_cond.n_obs == 0:
            continue

        if list(adata_cond.obs.product_name.unique())[0] == "Vehicle":
            continue

        adata_ref = adata[adata.obs.cell_type == list(adata_cond.obs.cell_type.unique())[0]].copy()
        adata_ref = adata_ref[adata_ref.obs.product_name == "Vehicle"]
        ref_cond = list(adata_ref.obs.drug_celltype_dose.unique())[0]
        
        adata_comb = ad.concat([adata_ref, adata_cond])

        sc.tl.rank_genes_groups(adata_comb, groupby='drug_celltype_dose', reference=ref_cond, method=method)
        df = sc.get.rank_genes_groups_df(adata_comb, group=condition, pval_cutoff = 0.05)

        df['abs_logfc'] = abs(df['logfoldchanges'])
        df = df[df['abs_logfc'] >= 0.1]

        results[condition] = list(df['names'])

        del adata_cond
        del adata_ref
        del adata_comb
        gc.collect()
        
    return results
        

In [27]:
all_degs = calculate_degs(adata_raw)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [1:45:12<00:00,  2.79s/it]


In [32]:
all_degs

{'A549_Enzastaurin (LY317615)_1000.0': ['LINC00486',
  'PDE4B',
  'MT-ATP6',
  'MT-ND4',
  'MT-RNR1',
  'MT-RNR2',
  'MT-CO3',
  'MT-CO1'],
 'A549_Raltitrexed_10.0': [],
 'A549_Lenalidomide (CC-5013)_10.0': [],
 'A549_Divalproex Sodium_1000.0': [],
 'A549_MLN8054_100.0': ['MDM2',
  'NEAT1',
  'PAPPA',
  'LINC01021',
  'MT-CO3',
  'MT-ATP6',
  'MT-ND4',
  'PTPRJ',
  'MIR34AHG',
  'UBE2H',
  'TNS3',
  'ITGB5',
  'MYOF',
  'ALDH3A1',
  'ELF3',
  'MKLN1',
  'IGFL2-AS1',
  'NFAT5',
  'MALAT1',
  'ARL15',
  'MKI67'],
 'A549_Sodium Phenylbutyrate_1000.0': ['MT-CO3',
  'MT-CO1',
  'MT-ND5',
  'MT-ATP8'],
 'A549_Celecoxib_100.0': [],
 'A549_Linifanib (ABT-869)_100.0': [],
 'A549_Thalidomide_1000.0': ['HSP90AA1', 'GAPDH'],
 'A549_SNS-314_1000.0': ['MDM2',
  'NEAT1',
  'MALAT1',
  'IMMP2L',
  'TNS3',
  'PAPPA',
  'BTBD11',
  'ANXA4',
  'MYOF',
  'BMPR2'],
 'A549_Linifanib (ABT-869)_10000.0': ['KRT8',
  'KRT18',
  'ACTN4',
  'MT-ND5',
  'SPTBN1',
  'CALM1',
  'AL445584.2',
  'MAP1B',
  'MT-CO1',
 

In [33]:
with open("./all_degs_raw.pkl", 'wb') as f:
    pkl.dump(all_degs, f)

In [34]:
gene_degs = list()
for key in all_degs.keys():
    gene_degs.extend(all_degs[key])

gene_degs = list(set(gene_degs))

In [35]:
len(gene_degs)

6818

## Feature Agreement between each pp condition

In [36]:
def get_feature_agreement(adata_pp):
    results = list()
    
    for feature_select_method in ['cell_ranger', 'seurat_v3', 'seurat']:
        for feature_number in [500, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000]:

            adata = adata_pp.copy()
            adata.obs['drug_celltype_dose'] = adata.obs['cell_type'].astype(str) + "_" + adata.obs['product_name'].astype(str) + "_" + adata.obs['dose'].astype(str)

            print("Runnign HVG Selection for:", feature_select_method, feature_number)
            
            if feature_select_method == "random":
                sc.pp.normalize_total(adata, inplace=True)
                sc.pp.log1p(adata)
                genes_random = random.sample(list(adata.var_names), feature_number)
                adata = adata[:, adata.var_names.isin(genes_random)].copy()

            elif feature_select_method == "seurat":
                sc.pp.normalize_total(adata, inplace=True)
                sc.pp.log1p(adata)
                sc.pp.highly_variable_genes(adata, flavor='seurat', n_top_genes=feature_number)
                adata = adata[:, adata.var['highly_variable']].copy()

            elif feature_select_method == "seurat_v3":
                sc.pp.highly_variable_genes(adata, flavor='seurat_v3', n_top_genes=feature_number)
                sc.pp.normalize_total(adata, inplace=True)
                sc.pp.log1p(adata)
                adata = adata[:, adata.var['highly_variable']].copy()

            elif feature_select_method == "cell_ranger":
                sc.pp.normalize_total(adata, inplace=True)
                sc.pp.log1p(adata)
                sc.pp.highly_variable_genes(adata, flavor='cell_ranger', n_top_genes=feature_number)
                adata = adata[:, adata.var['highly_variable']].copy()

            if adata.n_obs == 0:
                continue

            results.append({
                "feature_selection_method": feature_select_method,
                "n_features": feature_number,
                "genes": list(adata.var_names)
            })
    return results

In [37]:
feature_agreement = get_feature_agreement(adata_raw)

Runnign HVG Selection for: cell_ranger 500
Runnign HVG Selection for: cell_ranger 1000
Runnign HVG Selection for: cell_ranger 2000
Runnign HVG Selection for: cell_ranger 3000
Runnign HVG Selection for: cell_ranger 4000
Runnign HVG Selection for: cell_ranger 5000
Runnign HVG Selection for: cell_ranger 6000
Runnign HVG Selection for: cell_ranger 7000
Runnign HVG Selection for: cell_ranger 8000
Runnign HVG Selection for: seurat_v3 500
Runnign HVG Selection for: seurat_v3 1000
Runnign HVG Selection for: seurat_v3 2000
Runnign HVG Selection for: seurat_v3 3000
Runnign HVG Selection for: seurat_v3 4000
Runnign HVG Selection for: seurat_v3 5000
Runnign HVG Selection for: seurat_v3 6000
Runnign HVG Selection for: seurat_v3 7000
Runnign HVG Selection for: seurat_v3 8000
Runnign HVG Selection for: seurat 500
Runnign HVG Selection for: seurat 1000
Runnign HVG Selection for: seurat 2000
Runnign HVG Selection for: seurat 3000
Runnign HVG Selection for: seurat 4000
Runnign HVG Selection for: seurat 

In [38]:
feature_agreement

[{'feature_selection_method': 'cell_ranger',
  'n_features': 500,
  'genes': ['CFH',
   'SLC4A1',
   'THSD7A',
   'ACSM3',
   'PRKAR2B',
   'ETV1',
   'TENM1',
   'IGF1',
   'CNTN1',
   'CYP24A1',
   'CPS1',
   'GRAMD1B',
   'ABCC2',
   'GCLM',
   'VIM',
   'AGPAT4',
   'TNFRSF1B',
   'ANK1',
   'VCAN',
   'MSR1',
   'CP',
   'ARHGAP6',
   'CELF2',
   'KCNH2',
   'DCBLD2',
   'SLC2A3',
   'BCAT1',
   'LIMCH1',
   'FAM107B',
   'CD84',
   'ACSL4',
   'TGFBR3',
   'FRMPD1',
   'OSBPL3',
   'MYO3B',
   'RHOBTB1',
   'FCGR2B',
   'TP63',
   'IGF2BP2',
   'ST6GAL1',
   'CA12',
   'TXK',
   'SEMA3A',
   'ARHGAP15',
   'MCAM',
   'ARG2',
   'MEF2C',
   'PTPRC',
   'FYB1',
   'SLCO1A2',
   'AKR1B1',
   'CEACAM6',
   'FTL',
   'CASS4',
   'HEPH',
   'LYZ',
   'LAMB1',
   'SEL1L3',
   'NLRP1',
   'ESR1',
   'SORBS1',
   'PCSK5',
   'MAPK1',
   'TPTEP1',
   'GRAP2',
   'DHRS2',
   'MID1',
   'GATA1',
   'TIMP1',
   'GABRE',
   'FLT1',
   'SLC7A5',
   'ACSBG1',
   'SCG3',
   'CALB1',
   'NCALD',
 

In [43]:
with open("./genes_per_pp_condition_raw.pkl", 'wb') as f:
    pkl.dump(feature_agreement, f)

## DEGS included in each pp condition

In [44]:
def inspect_degs_overlap(adata_pp, all_degs):
    results = list()
    
    for feature_select_method in ['cell_ranger', 'seurat_v3', 'seurat']:
        for feature_number in [500, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000]:

            adata = adata_pp.copy()
            adata.obs['drug_celltype_dose'] = adata.obs['cell_type'].astype(str) + "_" + adata.obs['product_name'].astype(str) + "_" + adata.obs['dose'].astype(str)

            print("Runnign HVG Selection for:", feature_select_method, feature_number)
            
            if feature_select_method == "random":
                sc.pp.normalize_total(adata, inplace=True)
                sc.pp.log1p(adata)
                genes_random = random.sample(list(adata.var_names), feature_number)
                adata = adata[:, adata.var_names.isin(genes_random)].copy()

            elif feature_select_method == "seurat":
                sc.pp.normalize_total(adata, inplace=True)
                sc.pp.log1p(adata)
                sc.pp.highly_variable_genes(adata, flavor='seurat', n_top_genes=feature_number)
                adata = adata[:, adata.var['highly_variable']].copy()

            elif feature_select_method == "seurat_v3":
                sc.pp.highly_variable_genes(adata, flavor='seurat_v3', n_top_genes=feature_number)
                sc.pp.normalize_total(adata, inplace=True)
                sc.pp.log1p(adata)
                adata = adata[:, adata.var['highly_variable']].copy()

            elif feature_select_method == "cell_ranger":
                sc.pp.normalize_total(adata, inplace=True)
                sc.pp.log1p(adata)
                sc.pp.highly_variable_genes(adata, flavor='cell_ranger', n_top_genes=feature_number)
                adata = adata[:, adata.var['highly_variable']].copy()

            if adata.n_obs == 0:
                continue

            genes_adata = list(adata.var_names)
            
            for cond in tqdm(adata.obs.drug_celltype_dose.unique()):
                if cond not in all_degs:
                    print(cond, "not in DEGs, skipping...")
                    continue

                common_genes = list(set(all_degs[cond]) & set(genes_adata))

                if len(all_degs[cond]) == 0:
                    pct_included = None
                else:
                    pct_included = len(common_genes)/len(all_degs[cond])
                

                results.append({
                    "cell_type": cond.split("_")[0],
                    "drug": cond.split("_")[1],
                    "dosage": cond.split("_")[2],
                    "feature_selection_method": feature_select_method,
                    "n_features": feature_number,
                    "all_degs": all_degs[cond],
                    "all_genes": genes_adata,
                    "common_genes": common_genes,
                    "pct_included": pct_included,
                    "number_of_degs": len(all_degs[cond])
                })

    return results
                    

In [45]:
degs_overlap = inspect_degs_overlap(adata_raw, all_degs)

Runnign HVG Selection for: cell_ranger 500


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 56387.33it/s]


A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...
K562_Vehicle_0.0 not in DEGs, skipping...
Runnign HVG Selection for: cell_ranger 1000


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 67493.93it/s]


A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...
K562_Vehicle_0.0 not in DEGs, skipping...
Runnign HVG Selection for: cell_ranger 2000


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 30668.27it/s]


A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...
K562_Vehicle_0.0 not in DEGs, skipping...
Runnign HVG Selection for: cell_ranger 3000


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 18351.03it/s]


A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...
K562_Vehicle_0.0 not in DEGs, skipping...
Runnign HVG Selection for: cell_ranger 4000


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 18601.57it/s]


A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...
K562_Vehicle_0.0 not in DEGs, skipping...
Runnign HVG Selection for: cell_ranger 5000


 78%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▎                             | 1770/2259 [00:00<00:00, 8652.82it/s]

A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...
K562_Vehicle_0.0 not in DEGs, skipping...


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 8729.26it/s]


Runnign HVG Selection for: cell_ranger 6000


 36%|█████████████████████████████████████████████████▋                                                                                        | 813/2259 [00:00<00:00, 8127.08it/s]

A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...
K562_Vehicle_0.0 not in DEGs, skipping...


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 7739.36it/s]


Runnign HVG Selection for: cell_ranger 7000


 66%|██████████████████████████████████████████████████████████████████████████████████████████▍                                              | 1492/2259 [00:00<00:00, 7230.04it/s]

A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...
K562_Vehicle_0.0 not in DEGs, skipping...


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 7316.00it/s]


Runnign HVG Selection for: cell_ranger 8000


 54%|█████████████████████████████████████████████████████████████████████████▉                                                               | 1220/2259 [00:00<00:00, 6156.70it/s]

A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 6307.63it/s]


K562_Vehicle_0.0 not in DEGs, skipping...
Runnign HVG Selection for: seurat_v3 500


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 83246.35it/s]


A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...
K562_Vehicle_0.0 not in DEGs, skipping...
Runnign HVG Selection for: seurat_v3 1000


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 103098.22it/s]


A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...
K562_Vehicle_0.0 not in DEGs, skipping...
Runnign HVG Selection for: seurat_v3 2000


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 23849.57it/s]


A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...
K562_Vehicle_0.0 not in DEGs, skipping...
Runnign HVG Selection for: seurat_v3 3000


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 23269.93it/s]


A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...
K562_Vehicle_0.0 not in DEGs, skipping...
Runnign HVG Selection for: seurat_v3 4000


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 18059.60it/s]


A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...
K562_Vehicle_0.0 not in DEGs, skipping...
Runnign HVG Selection for: seurat_v3 5000


 39%|██████████████████████████████████████████████████████▏                                                                                   | 887/2259 [00:00<00:00, 8861.98it/s]

A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...
K562_Vehicle_0.0 not in DEGs, skipping...


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 8332.28it/s]


Runnign HVG Selection for: seurat_v3 6000


 35%|████████████████████████████████████████████████▍                                                                                         | 793/2259 [00:00<00:00, 7927.70it/s]

A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...
K562_Vehicle_0.0 not in DEGs, skipping...


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 7688.26it/s]


Runnign HVG Selection for: seurat_v3 7000


 55%|███████████████████████████████████████████████████████████████████████████▋                                                             | 1249/2259 [00:00<00:00, 6369.69it/s]

A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 6593.11it/s]


K562_Vehicle_0.0 not in DEGs, skipping...
Runnign HVG Selection for: seurat_v3 8000


 59%|█████████████████████████████████████████████████████████████████████████████████▏                                                       | 1338/2259 [00:00<00:00, 6505.40it/s]

A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 6352.88it/s]


K562_Vehicle_0.0 not in DEGs, skipping...
Runnign HVG Selection for: seurat 500


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 151412.38it/s]


A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...
K562_Vehicle_0.0 not in DEGs, skipping...
Runnign HVG Selection for: seurat 1000


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 53472.39it/s]


A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...
K562_Vehicle_0.0 not in DEGs, skipping...
Runnign HVG Selection for: seurat 2000


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 32259.15it/s]


A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...
K562_Vehicle_0.0 not in DEGs, skipping...
Runnign HVG Selection for: seurat 3000


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 18311.60it/s]


A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...
K562_Vehicle_0.0 not in DEGs, skipping...
Runnign HVG Selection for: seurat 4000


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 17085.43it/s]


A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...
K562_Vehicle_0.0 not in DEGs, skipping...
Runnign HVG Selection for: seurat 5000


 78%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▊                              | 1762/2259 [00:00<00:00, 8607.55it/s]

A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...
K562_Vehicle_0.0 not in DEGs, skipping...


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 8685.98it/s]


Runnign HVG Selection for: seurat 6000


 63%|██████████████████████████████████████████████████████████████████████████████████████▊                                                  | 1432/2259 [00:00<00:00, 6941.53it/s]

A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 7326.77it/s]


K562_Vehicle_0.0 not in DEGs, skipping...
Runnign HVG Selection for: seurat 7000


 64%|███████████████████████████████████████████████████████████████████████████████████████▋                                                 | 1446/2259 [00:00<00:00, 7062.94it/s]

A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 7157.55it/s]


K562_Vehicle_0.0 not in DEGs, skipping...
Runnign HVG Selection for: seurat 8000


 60%|█████████████████████████████████████████████████████████████████████████████████▊                                                       | 1348/2259 [00:00<00:00, 6539.68it/s]

A549_Vehicle_0.0 not in DEGs, skipping...
MCF7_Vehicle_0.0 not in DEGs, skipping...


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2259/2259 [00:00<00:00, 6621.29it/s]

K562_Vehicle_0.0 not in DEGs, skipping...





In [46]:
with open("./degs_overlap_stats_raw.pkl", 'wb') as f:
    pkl.dump(degs_overlap, f)

## DEGs enrichment

In [None]:
import gseapy as gp
import pandas as pd

genesets = gp.get_library(name="MSigDB_Hallmark_2020", organism="Human")  # dict

results_summary = dict()

for cond, degs in all_degs.items():
    if not degs:
        continue

    enr = gp.enrich(
        gene_list=list(set(degs)),
        gene_sets=genesets,        # dict here
        outdir=None,
        cutoff=0.05,
    )

    res = enr.results
    
    if res is None or len(res) == 0:
        results_summary[cond] = []
        continue

    df_enr = res.copy() if isinstance(res, pd.DataFrame) else pd.DataFrame(res)
    df_enr = df_enr[df_enr["Adjusted P-value"] <= 0.05]
    
    if df_enr.empty:
        results_summary[cond] = []

    
    results_summary[cond] = list(df_enr['Term'])



In [None]:
results_summary

In [66]:
with open("./drug_pathway_enrichment.pkl", 'wb') as f:
    pkl.dump(results_summary, f)