# Pathway Enrichment Analysis File #

File to see which pathways seem to be significantly enriched with high synergy and different toxicity categories

Simple pathway enrichment analysis: Which pathways are uniquely associated with each toxicity level?

In [1]:
# Import everything needed
from scipy import stats
from toxicity_ranking import *
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [8]:
# Read in the relevant pre-processed datasets, assumes pre-filtered datasets include only drug combinations
# that have known toxicity, known targets, and known pathways
filtered_combos_df = pd.read_csv('data_processed/filtered_combos_syntox_known_targallpw.csv') 
print(filtered_combos_df.head())
syntoxtarg_allpw_df = pd.read_csv("data_processed/syntoxtarg_allpw.csv") # drugs with their targets and all levels of pathways
syntoxtarg_allpw_df.head()

           drug_row   drug_col cell_line_name  synergy_zip  synergy_loewe  \
0        mefloquine  tamoxifen          TC-32    19.667559       2.263512   
1       chloroquine  tamoxifen          TC-32     8.836970      -0.671750   
2  prochlorperazine  tamoxifen          TC-32    12.411371       1.358891   
3        fingolimod  tamoxifen          TC-32     8.007900      -0.764699   
4         dasatinib  tamoxifen          TC-32    -9.601561       0.314544   

   synergy_bliss  synergy_hsa toxicity_category  
0      25.542924     2.896978          Moderate  
1      11.944238    -0.517536             Major  
2      14.291624     0.880952          Moderate  
3       5.141360    -0.060902             Major  
4      -5.351887     0.710809          Moderate  


Unnamed: 0,drug_name,target_name,target_DrugBank_ID,UniProtKB_ID,Reactome_ID,Pathway_Name
0,meclofenamic acid,Prostaglandin G/H synthase 1,BE0000017,P23219,R-HSA-140180,COX reactions
1,meclofenamic acid,Prostaglandin G/H synthase 1,BE0000017,P23219,R-HSA-1430728,Metabolism
2,meclofenamic acid,Prostaglandin G/H synthase 1,BE0000017,P23219,R-HSA-211859,Biological oxidations
3,meclofenamic acid,Prostaglandin G/H synthase 1,BE0000017,P23219,R-HSA-211945,Phase I - Functionalization of compounds
4,meclofenamic acid,Prostaglandin G/H synthase 1,BE0000017,P23219,R-HSA-2142753,Arachidonate metabolism


In [10]:
# Get the master list of all unique pathways targeted by any drug in the dataset
all_pw = syntoxtarg_allpw_df['Reactome_ID'].unique()
print("Number of unique pathways targeted: ", len(all_pw))

# Create pathway sets for corresponding toxicity groups
major_pws = set()
moderate_pws = set()
minor_pws = set()

for index, row in filtered_combos_df.iterrows():
    drug1 = row['drug_row']
    drug2 = row['drug_col']
    if row['toxicity_category'] == 'Major':
        drug1_pws = syntoxtarg_allpw_df[syntoxtarg_allpw_df['drug_name'] == drug1]['Reactome_ID'].unique()
        drug2_pws = syntoxtarg_allpw_df[syntoxtarg_allpw_df['drug_name'] == drug2]['Reactome_ID'].unique()
        major_pws.update(drug1_pws)
        major_pws.update(drug2_pws)
    elif row['toxicity_category'] == 'Moderate':
        drug1_pws = syntoxtarg_allpw_df[syntoxtarg_allpw_df['drug_name'] == drug1]['Reactome_ID'].unique()
        drug2_pws = syntoxtarg_allpw_df[syntoxtarg_allpw_df['drug_name'] == drug2]['Reactome_ID'].unique()
        moderate_pws.update(drug1_pws)
        moderate_pws.update(drug2_pws)
    else: # Minor
        drug1_pws = syntoxtarg_allpw_df[syntoxtarg_allpw_df['drug_name'] == drug1]['Reactome_ID'].unique()
        drug2_pws = syntoxtarg_allpw_df[syntoxtarg_allpw_df['drug_name'] == drug2]['Reactome_ID'].unique()
        minor_pws.update(drug1_pws)
        minor_pws.update(drug2_pws)

# Use a hypergeometric test to determine if the pathways targeted by drugs in the major toxicity group are enriched
# for any pathways in the dataset
# Major toxicity group
major_pws = list(major_pws)
major_pws = [x for x in major_pws if str(x) != 'nan']
major_pws = list(set(major_pws)) # remove duplicates
print("Number of unique pathways targeted by drugs in the major toxicity group: ", len(major_pws))
major_pws = [x for x in major_pws if x in all_pw] # only keep pathways that are in the master list of pathways
print("Number of unique pathways targeted by drugs in the major toxicity group that are in the master list of pathways: ", len(major_pws))
major_pws = list(set(major_pws)) # remove duplicates
print("Number of unique pathways targeted by drugs in the major toxicity group that are in the master list of pathways after removing duplicates: ", len(major_pws))
# Hypergeometric test
major_pws_enrichment = []
for pw in major_pws:
    M = len(all_pw)
    n = len(major_pws)
    N = len(all_pw)
    x = len(syntoxtarg_allpw_df[syntoxtarg_allpw_df['Reactome_ID'] == pw]['drug_name'].unique())
    pval = stats.hypergeom.sf(x-1, M, n, N)
    major_pws_enrichment.append([pw, x, pval])
major_pws_enrichment = pd.DataFrame(major_pws_enrichment, columns=['Pathway', 'Number of drugs targeting pathway', 'P-value'])
major_pws_enrichment = major_pws_enrichment.sort_values(by='P-value')
print(major_pws_enrichment.head())


Number of unique pathways targeted:  1749
Number of unique pathways targeted by drugs in the major toxicity group:  1484
Number of unique pathways targeted by drugs in the major toxicity group that are in the master list of pathways:  1484
Number of unique pathways targeted by drugs in the major toxicity group that are in the master list of pathways after removing duplicates:  1484
           Pathway  Number of drugs targeting pathway  P-value
0    R-HSA-6804758                                  8      1.0
994    R-HSA-83936                                  3      1.0
993   R-HSA-380612                                  5      1.0
992   R-HSA-192456                                  1      1.0
991   R-HSA-110056                                 10      1.0


In [11]:
# sort major_pws_enrichment by p-value (ascending)
major_pws_enrichment = major_pws_enrichment.sort_values(by='P-value')
major_pws_enrichment.head()

Unnamed: 0,Pathway,Number of drugs targeting pathway,P-value
0,R-HSA-6804758,8,1.0
21,R-HSA-111446,2,1.0
20,R-HSA-180024,8,1.0
1,R-HSA-2173793,11,1.0
2,R-HSA-9028731,3,1.0


Steps:
- Retrieve the drug combinations along with their synergy scores and toxicity categories
- Retrieve the pathways that are targeted for each of these drug combinations and the pathway sets of interest to test (perhaps the higher level pathways that consist of the lowest level pathways)
Break up the drug combination datasets into each toxicity category (drugcombo_major, drugcombo_moderate, drugcombo_minor)
- For each of these drug combination toxicity datasets
    - Rank them by synergy score
    - Compute a hypergeometric test (is this Fisher's exact test?) on the pathway sets to get an enrichment p-value for each pathway set
    - For each pathway set, you create a 2x2 contingency table:
        - Rows: Drug combinations in high synergy vs not high synergy (you'd need to define a threshold)
        - Columns: Hits pathway set vs doesn't hit pathway set
        - Then run Fisher's exact test on this table


Then create a heatmap visualization:
- Rows are enriched pathways
- Columns are toxicity categories
- Color intensity represents -log10(p-value)
- Size of squares could represent odds ratio