In [69]:
import pandas as pd
import numpy as np

from matplotlib import pyplot as plt
import seaborn as sns
%matplotlib inline
from cmapPy.pandasGEXpress.parse import parse
from scipy.stats import spearmanr as scor
from scipy.stats import mannwhitneyu as mwu

**Importing the Niche Net matrix**  \
 I downloaded the data for the ligand-target matrix from [here](https://zenodo.org/record/3260758/files/ligand_target_matrix.rds), and for the receptor-ligand network from [here](https://zenodo.org/record/3260758/files/lr_network.rds), and preprocessed them in R. 

In [70]:
nn_model=pd.read_csv('../data/ligand_target_matrix.csv', sep=',', header=0, index_col=0)

In [71]:
nn_model = nn_model.reindex(sorted(nn_model.columns), axis=1)

In [72]:
nn_model.head()

Unnamed: 0,A2M,AANAT,ADAM12,ADAM15,ADAM17,ADAM2,ADAM23,ADAM9,ADCYAP1,ADIPOQ,...,WNT7A,WNT7B,WNT8A,WNT8B,WNT9A,WNT9B,XCL1,XCL2,YARS,ZP3
A1BG,0.000386,0.000503,0.000363,0.000407,0.000462,0.0003,0.000279,0.000319,0.000412,0.000638,...,0.000484,0.000463,0.000297,0.000298,0.000298,0.000312,0.00025,0.000237,0.000388,0.000421
A1BG-AS1,0.000196,0.000393,0.000192,0.000343,0.000236,0.000201,0.000186,0.000162,0.000207,0.000346,...,0.000214,0.000221,0.000129,0.00013,0.000124,0.000141,0.000115,0.000113,0.000131,0.000319
A1CF,0.000459,0.000401,0.000523,0.000369,0.000576,0.000355,0.000324,0.000365,0.000469,0.000762,...,0.000652,0.000695,0.000595,0.000616,0.000476,0.000466,0.000329,0.000312,0.000385,0.000466
A2M,0.00077,0.000642,0.000671,0.000646,0.000916,0.00042,0.000427,0.000582,0.000588,0.000833,...,0.000791,0.000875,0.000512,0.000514,0.000509,0.000527,0.000435,0.000396,0.00062,0.00058
A2M-AS1,9.3e-05,8.7e-05,8.7e-05,0.000115,0.000117,0.000137,6.2e-05,3.7e-05,0.000171,0.000264,...,3.6e-05,3.5e-05,3.6e-05,6e-06,3.7e-05,6e-06,8.2e-05,8.3e-05,5e-06,0.000138


In [73]:
ligand_receptor=pd.read_csv('../data/lr_network.csv', sep=',', header=0, index_col=0)
ligand_receptor
#to:receptorok, from:ligandok

Unnamed: 0,from,to,source,database
1,CXCL1,CXCR2,kegg_cytokines,kegg
2,CXCL2,CXCR2,kegg_cytokines,kegg
3,CXCL3,CXCR2,kegg_cytokines,kegg
4,CXCL5,CXCR2,kegg_cytokines,kegg
5,PPBP,CXCR2,kegg_cytokines,kegg
...,...,...,...,...
12647,CXCL6,PIGR,ppi_lr_go,ppi_prediction_go
12648,RLN1,GPR25,ppi_lr_go,ppi_prediction_go
12649,RLN1,RAMP3,ppi_lr_go,ppi_prediction_go
12650,PIK3CB,TNFRSF10A,ppi_bidir_r_go,ppi_prediction_go


In [74]:
good_sources=['kegg_cytokines', 'kegg_neuroactive','pharmacology', 'ramilowski_known' ]
fil=np.in1d(ligand_receptor['source'], good_sources)
ligand_receptor=ligand_receptor[fil]

In [75]:
#finding the receptor and ligand genes
receptors=list(ligand_receptor['to'].unique())
ligands=list(ligand_receptor['from'].unique())
rl=receptors+ligands

**Getting the single drugs used in CREEDs** \
I downloaded the data from [here](http://amp.pharm.mssm.edu/CREEDS/#downloads), using the file named 'Manual single drug perturbations'.

In [76]:
drug_perturb=pd.read_csv('../data/single_drug_perturbations-v1.0.csv', sep=',', header=0, index_col=0)

In [77]:
drugs=list(drug_perturb['drug_name'].unique())

In [78]:
#Getting the list of the drugs they used
fout=open ('../data/drugs_perturb.txt', 'w') 
for line in drugs:
    fout.write(line + '\n')
fout.close()

**Importing the single drugs used in CREEDs with their receptor targets**\
From the list of drugs I selected those that target receptor genes, and found their targets from [Drugbank](https://go.drugbank.com/)

In [79]:
drug_targets=pd.read_csv('../data/single_drugs_.csv', sep=';', header=0, index_col=0)

In [80]:
drug_targets #targets from Drugbank

Unnamed: 0_level_0,Target,Target_type
Drugs,Unnamed: 1_level_1,Unnamed: 2_level_1
1.25 dihydroxyvitamin d,VDR,antagonist
1-[3-(trifluoromethyl)phenyl]piperazine,5HT1A,agonist
1-[3-(trifluoromethyl)phenyl]piperazine,5HT1B,agonist
1-[3-(trifluoromethyl)phenyl]piperazine,5HT1D,agonist
1-[3-(trifluoromethyl)phenyl]piperazine,5HT2A,agonist
...,...,...
Vitamin e,NR1I2,
Zinc acetate,BDKRB1,
Zinc acetate,ESR1,
Zinc acetate,GLRA1,


In [81]:
#From the CREEDs data only keeping the drugs that are receptor targeting
fil=np.in1d(list(drug_perturb['drug_name']),list(drug_targets.index) )
drug_perturb=drug_perturb[fil]

In [82]:
drug_perturb

Unnamed: 0_level_0,cell_type,ctrl_ids,curator,drug_name,drugbank_id,geo_id,organism,pert_ids,platform,pubchem_cid,smiles,version
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
drug:3499,vastus lateralis muscle,GSM801199|GSM801201|GSM801202|GSM801204|GSM801...,MicrotaskManic,Resveratrol,DB02709,GSE32357,human,GSM801198|GSM801200|GSM801203|GSM801205|GSM801...,GPL11532,,Oc1ccc(cc1)/C=C/c1cc(O)cc(c1)O,1.0
drug:3292,lymphoblastoid cells,GSM162898|GSM162900|GSM162903,garibr01,Citalopram,DB00215,GSE7036,human,GSM162897|GSM162899|GSM162901,GPL570,2771.0,CN(C)CCCC1(C2=C(CO1)C=C(C=C2)C#N)C3=CC=C(C=C3)F,1.0
drug:3475,liver,GSM1273512|GSM1273513|GSM1273514|GSM1273515,MicrotaskManic,Ethanol,DB00898,GSE52644,mouse,GSM1273500|GSM1273501|GSM1273502|GSM1273503,GPL1261,702.0,CCO,1.0
drug:3498,PBMC (peripheral blood mononuclear cells),GSM906497|GSM906498|GSM906499|GSM906500|GSM906...,MicrotaskManic,Resveratrol,DB02709,GSE36930,human,GSM906515|GSM906516|GSM906517|GSM906518|GSM906...,GPL6244,,Oc1ccc(cc1)/C=C/c1cc(O)cc(c1)O,1.0
drug:3497,peripheral blood mononuclear cells (PBMCs),GSM906491|GSM906492|GSM906493|GSM906494|GSM906...,MicrotaskManic,Resveratrol,DB02709,GSE36930,human,GSM906509|GSM906510|GSM906511|GSM906512|GSM906...,GPL6244,,Oc1ccc(cc1)/C=C/c1cc(O)cc(c1)O,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...
drug:3442,BV-2 microglial cells,GSM579953|GSM579957|GSM579961,MicrotaskManic,Curcumin,,GSE23639,mouse,GSM579954|GSM579958|GSM579962,GPL4134,969516.0,COC1=C(C=CC(=C1)C=CC(=O)CC(=O)C=CC2=CC(=C(C=C2...,1.0
drug:2771,fear,GSM30804|GSM30805|GSM30806|GSM30807|GSM30808,Axel,Isoflurane,DB00753,GSE1779,rat,GSM30809|GSM30810|GSM30811|GSM30812|GSM30813,GPL341,3763.0,C(C(F)(F)F)(OC(F)F)Cl,1.0
drug:2770,control,GSM30790|GSM30791|GSM30792|GSM30793|GSM30794|G...,Axel,Isoflurane,DB00753,GSE1779,rat,GSM99800|GSM99801|GSM99802|GSM99803|GSM99804,GPL341,3763.0,C(C(F)(F)F)(OC(F)F)Cl,1.0
drug:2772,,GSM263915|GSM263916|GSM263917,Axel,Isotretinoin,DB00982,GSE10432,human,GSM263918|GSM263919|GSM263920,GPL8300,,C/C(=C\C=C\C(=C/C(=O)O)\C)/C=C/C1=C(C)CCCC1(C)C,1.0


In [None]:
#drug_perturb.to_csv('../results/drug_perturb.csv', sep=',')

**Importing single perturbed genes from CREEDs** \
I downloaded the data from [here](http://amp.pharm.mssm.edu/CREEDS/#downloads), using the file named 'Manual single gene perturbations'.


In [88]:
gene_perturb=pd.read_csv('../data/single_gene_perturbations-v1.0.csv', sep=',', header=0, index_col=0)

In [89]:
gene_perturb.head()

Unnamed: 0_level_0,cell_type,chdir_norm,ctrl_ids,curator,geo_id,hs_gene_symbol,mm_gene_symbol,organism,pert_ids,pert_type,platform,version
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
gene:508,T reg,,GSM998922|GSM998923,dsurujon,GSE40655,FOXO1,Foxo1,mouse,GSM998920|GSM998921,KO,GPL1261,1.0
gene:509,heart,,GSM1080530|GSM1080531|GSM1080532|GSM1080533,dsurujon,GSE44192,PLIN5,Plin5,mouse,GSM1080534|GSM1080535|GSM1080536|GSM1080537,OE,GPL6246,1.0
gene:2349,Inducible stable HEK 293 Flp-In T-REx cells ov...,,GSM997725|GSM997726|GSM997727,gszeto,GSE40601,ERO1L,Ero1l,human,GSM997728|GSM997729|GSM997730,overexpressing a hyperactive mutant,GPL6244,1.0
gene:2348,Inducible stable HEK 293 Flp-In T-REx Ero1alph...,,GSM997719|GSM997720|GSM997721,gszeto,GSE40601,ERO1L,Ero1l,human,GSM997722|GSM997723|GSM997724,OE,GPL6244,1.0
gene:2343,OT-I T cells from spleen,,GSM1290674|GSM1290675|GSM1290676,AliDot,GSE53388,PPP2R2D,Ppp2r2d,mouse,GSM1290677|GSM1290678|GSM1290679,KD,GPL1261,1.0


In [90]:
gene_perturb.shape

(2176, 12)

In [109]:
#adding signs to the perturbations (i.e. knock-out: -1, overexpression: +1)
gene_perturb['sign']=0

In [108]:
gene_perturb['pert_type'].value_counts()

KO                                              71
OE                                              32
KD                                              27
knockout                                        14
drugactivation                                   9
druginhibition                                   9
activemutant                                     9
hypomorphic prosaposin deficiency                7
Stimulation of gene product                      7
knockdown                                        5
mutant                                           4
Deficiency                                       3
anti-IL6 monoclonal antibody, neutralization     3
Reactivation  (WASHOUT - 2 Hours)                3
Reactivation  (WASHOUT - 4 Hours)                2
INHIBITION                                       2
siTNF                                            1
R899X mutation                                   1
knock-down (shRNA 6-16, doxycycline-induced)     1
expression of human gene       

In [120]:
for i in gene_perturb.index:
    if gene_perturb.loc[i,'pert_type']=='OE':
        gene_perturb.loc[i,'sign']=1
    elif gene_perturb.loc[i,'pert_type']=='KO':
        gene_perturb.loc[i,'sign']=-1
    elif gene_perturb.loc[i,'pert_type']=='KD':
        gene_perturb.loc[i,'sign']=-1
    elif gene_perturb.loc[i,'pert_type']=='knockout':
        gene_perturb.loc[i,'sign']=-1
    elif gene_perturb.loc[i,'pert_type']=='Stimulation of gene product':
        gene_perturb.loc[i,'sign']=1
    elif gene_perturb.loc[i,'pert_type']=='knockdown':
        gene_perturb.loc[i,'sign']=-1
    elif gene_perturb.loc[i,'pert_type']=='INHIBITION':
        gene_perturb.loc[i,'sign']=-1
    elif gene_perturb.loc[i,'pert_type']=='knock-down (shRNA 6-16, doxycycline-induced)':
        gene_perturb.loc[i,'sign']=-1
    elif gene_perturb.loc[i,'pert_type']=='stimulation':
        gene_perturb.loc[i,'sign']=1
    elif gene_perturb.loc[i,'pert_type']=='knock-down (shRNA 2-4)':
        gene_perturb.loc[i,'sign']=-1
    elif gene_perturb.loc[i,'pert_type']=='siRNA':
        gene_perturb.loc[i,'sign']=-1
    elif gene_perturb.loc[i,'pert_type']=='knock-down (shRNA 4-1, doxycycline-induced)':
        gene_perturb.loc[i,'sign']=-1

In [121]:
gene_perturb

Unnamed: 0_level_0,cell_type,chdir_norm,ctrl_ids,curator,geo_id,hs_gene_symbol,mm_gene_symbol,organism,pert_ids,pert_type,platform,version,sign
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
gene:1928,brown adipose tissue of ob/ob mice,,GSM800590|GSM800591|GSM800592|GSM800593|GSM800...,Andrew,GSE32316,FGFR1,Fgfr1,mouse,GSM800596|GSM800597|GSM800598|GSM800599|GSM800...,drugactivation,GPL1261,1.0,0
gene:2347,heart,,GSM360098|GSM360099|GSM360100,nbongio,GSE14411,ITGB1,Itgb1,mouse,GSM360101|GSM360102|GSM360103,KO,GPL6246,1.0,-1
gene:2584,Osteoclasts,,GSM1487524|GSM1487525|GSM1487526,maroulisv,GSE60761,CALCR,Calcr,mouse,GSM1487530|GSM1487531|GSM1487532,KO,GPL1261,1.0,-1
gene:2589,glioma cells,,GSM1498939|GSM1498940|GSM1498941,,GSE61178,KDR,Kdr,human,GSM1498942|GSM1498943|GSM1498944,KD,GPL10558,1.0,-1
gene:1852,mammary gland,,GSM72723|GSM72730|GSM72795|GSM72877,Andrew,GSE3260,FGFR1,Fgfr1,mouse,GSM72726|GSM72727|GSM72789|GSM72793|GSM72869,activemutant,GPL81,1.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
gene:1555,Xenograft tumors from DU145,,GSM1536778|GSM1536779|GSM1536780,gszeto,GSE62941,IL6,Il6,human,GSM1536775|GSM1536776|GSM1536777,"anti-IL6 monoclonal antibody, neutralization",GPL570,1.0,0
gene:1118,"Colon cancer tumor, CBA/C57BL/6",,GSM959587|GSM959588|GSM959589,OliFucMuc,GSE39273,CXCL8,,mouse,GSM959590|GSM959591|GSM959592,human IL8 transgenic,GPL1261,1.0,0
gene:1689,CD4+ T cells from spleen at 1 month,,GSM186552|GSM186553|GSM186554,gszeto,GSE7705,IL1RN,Il1rn,mouse,GSM186523|GSM186524,KO,GPL1261,1.0,-1
gene:1683,strain: 129/Sv tissue: madibular condyle carti...,,GSM865303|GSM865304|GSM865305,gszeto,GSE35297,DDR1,Ddr1,mouse,GSM865301|GSM865302,KO,GPL6246,1.0,-1


Keeping only receptor and ligand genes from NicheNet

In [91]:
fil=np.in1d( list(gene_perturb['hs_gene_symbol']), rl,)
gene_perturb=gene_perturb[fil]

In [92]:
gene_perturb

Unnamed: 0_level_0,cell_type,chdir_norm,ctrl_ids,curator,geo_id,hs_gene_symbol,mm_gene_symbol,organism,pert_ids,pert_type,platform,version
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
gene:1928,brown adipose tissue of ob/ob mice,,GSM800590|GSM800591|GSM800592|GSM800593|GSM800...,Andrew,GSE32316,FGFR1,Fgfr1,mouse,GSM800596|GSM800597|GSM800598|GSM800599|GSM800...,drugactivation,GPL1261,1.0
gene:2347,heart,,GSM360098|GSM360099|GSM360100,nbongio,GSE14411,ITGB1,Itgb1,mouse,GSM360101|GSM360102|GSM360103,KO,GPL6246,1.0
gene:2584,Osteoclasts,,GSM1487524|GSM1487525|GSM1487526,maroulisv,GSE60761,CALCR,Calcr,mouse,GSM1487530|GSM1487531|GSM1487532,KO,GPL1261,1.0
gene:2589,glioma cells,,GSM1498939|GSM1498940|GSM1498941,,GSE61178,KDR,Kdr,human,GSM1498942|GSM1498943|GSM1498944,KD,GPL10558,1.0
gene:1852,mammary gland,,GSM72723|GSM72730|GSM72795|GSM72877,Andrew,GSE3260,FGFR1,Fgfr1,mouse,GSM72726|GSM72727|GSM72789|GSM72793|GSM72869,activemutant,GPL81,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...
gene:1555,Xenograft tumors from DU145,,GSM1536778|GSM1536779|GSM1536780,gszeto,GSE62941,IL6,Il6,human,GSM1536775|GSM1536776|GSM1536777,"anti-IL6 monoclonal antibody, neutralization",GPL570,1.0
gene:1118,"Colon cancer tumor, CBA/C57BL/6",,GSM959587|GSM959588|GSM959589,OliFucMuc,GSE39273,CXCL8,,mouse,GSM959590|GSM959591|GSM959592,human IL8 transgenic,GPL1261,1.0
gene:1689,CD4+ T cells from spleen at 1 month,,GSM186552|GSM186553|GSM186554,gszeto,GSE7705,IL1RN,Il1rn,mouse,GSM186523|GSM186524,KO,GPL1261,1.0
gene:1683,strain: 129/Sv tissue: madibular condyle carti...,,GSM865303|GSM865304|GSM865305,gszeto,GSE35297,DDR1,Ddr1,mouse,GSM865301|GSM865302,KO,GPL6246,1.0


In [None]:
#gene_perturb.to_csv('../results/gene_perturb.csv', sep=',')