In [None]:
"""
This file takes the output of Gestalt, subsets to KEGG IDs, human ortholog IDs, and pathway descriptions.
It merges it with alpha and weights values from DynGENIE3. 
This file will be merged with WGCNA modules to create a network in Cytoscape.

"""

In [1]:
import pandas as pd
import numpy as np

In [90]:
#read output of gestalt
gestalt_results = pd.read_csv('gestalt_results.csv')
#read output of DynGENIE3
dosed_genes = pd.read_csv('genes_in_low_or_high_with_source.csv')
#get KEGG ID mapping file
kegg_ids = pd.read_csv('../user_ko.tsv',sep='\t')

In [93]:
#subset output of gestalt to 3 columns
gestalt_subset = gestalt_results[['geneSet','description','userId','enrichmentScore','normalizedEnrichmentScore']]
#drop NAs in KEGG ID file
kegg_ids = kegg_ids.dropna(subset=['KO'])
# Remove the 't1' suffix from every value in the 'genes' column
kegg_ids['target'] = kegg_ids['target'].str.replace('t1$', '', regex=True)
#merge DynGENIE3 with KEGG IDs on target
dyngenie_kegg = pd.merge(kegg_ids, dosed_genes, on='target')
#rename KEGG IDs so they match on regulatory
kegg_ids_renamed = kegg_ids.rename(columns={"target":"regulatory","KO":"KO_regulatory"})
#merge DynGENIE3 with KEGG IDs on regulatory
dyngenie_kegg_merged = pd.merge(kegg_ids_renamed, dyngenie_kegg, on='regulatory')
#filter DynGENIE3 to only those with KEGG IDs
dyngenie_kegg_filtered = dyngenie_kegg_merged.dropna(subset=['KO'])
#subset to important columns
dyngenie_subset = dyngenie_kegg_filtered[['target','KO','regulatory','alpha','weight','out.degree','KO_regulatory']]
#rename columns
gestalt_renamed = gestalt_subset.rename(columns={"userId":"KO"})
#merge on KEGG IDs
gestalt_dyngenie = pd.merge(gestalt_renamed,dyngenie_subset,on="KO")
#inspect
gestalt_dyngenie.head()
#add column with absolute value enrichment score for visualization
gestalt_dyngenie['abs_es'] = gestalt_dyngenie['normalizedEnrichmentScore'].abs()
#sort by enrichemnt scores
gestalt_sorted = gestalt_dyngenie.sort_values(by=['abs_es'],ascending=False)
#filter low enrichment scores
gestalt_filtered = gestalt_sorted.loc[gestalt_sorted['abs_es']>=2]
#save to csv
gestalt_filtered.to_csv('gestalt_dyngenie.csv')

In [94]:
gestalt_filtered.head()

Unnamed: 0,geneSet,description,KO,enrichmentScore,normalizedEnrichmentScore,target,regulatory,alpha,weight,out.degree,KO_regulatory,abs_es
14135,hsa00140,Steroid hormone biosynthesis,K00699,-0.602438,-3.081076,Dapma7bEVm010722,Dapma7bEVm010294,0.016457,0.006988,78.680527,K21772,3.081076
14134,hsa00140,Steroid hormone biosynthesis,K00699,-0.602438,-3.081076,Dapma7bEVm010722,Dapma7bEVm010294,0.014473,0.000304,50.039725,K21772,3.081076
14133,hsa00140,Steroid hormone biosynthesis,K00699,-0.602438,-3.081076,Dapma7bEVm010722,Dapma7bEVm010947,0.016457,0.016226,41.597406,K09031,3.081076
14132,hsa00140,Steroid hormone biosynthesis,K00699,-0.602438,-3.081076,Dapma7bEVm010722,Dapma7bEVm010947,0.014473,0.000388,40.040096,K09031,3.081076
14131,hsa00140,Steroid hormone biosynthesis,K00699,-0.602438,-3.081076,Dapma7bEVm010722,Dapma7bEVm019356,0.016457,0.004449,103.858537,K09448,3.081076
