In [36]:
import pandas as pd
import numpy as np

from matplotlib import pyplot as plt
import seaborn as sns
%matplotlib inline
from cmapPy.pandasGEXpress.parse import parse

from scipy.stats import spearmanr as scor
from scipy.stats import pearsonr
from scipy.stats import mannwhitneyu as mwu

from sklearn.metrics import roc_curve as ROC
from sklearn.metrics import auc as AUC
from sklearn.metrics import precision_recall_curve as PRC

# Importing data for comparison

In [41]:
#Importing CREEDs benchmark data files 
gene_perturb=pd.read_csv('../results/gene_perturb_results.csv', sep=',', index_col=0, header=0)
drug_perturb=pd.read_csv('../results/drug_perturb_results.csv', sep=',', index_col=0, header=0)

In [42]:
#Checking the absolut values of the benchmark data also
gene_perturb_A=gene_perturb.abs()
drug_perturb_A=drug_perturb.abs()

In [48]:
#Importing the NicheNet model
nn_model=pd.read_csv('../data/ligand_target_matrix.csv', sep=',', header=0, index_col=0)
nn_model = nn_model.reindex(sorted(nn_model.columns), axis=1)

In [44]:
#Importing LINCS perturbation signatures 
consig_gse70138_trt_cp=pd.read_csv('../results/consensus_signature_gse70138_trt_cp.csv', header=0, sep=',', index_col=0)
consig_gse70138_trt_xpr=pd.read_csv('../results/consensus_signature_gse70138_trt_xpr.csv', header=0, sep=',', index_col=0)
consig_gse92742_trt_cp=pd.read_csv('../results/consensus_signature_gse92742_trt_cp.csv', header=0, sep=',', index_col=0)
consig_gse92742_trt_lig=pd.read_csv('../results/consensus_signature_gse92742_trt_lig.csv', header=0, sep=',', index_col=0)
consig_gse92742_trt_oe=pd.read_csv('../results/consensus_signature_gse92742_trt_oe.csv', header=0, sep=',', index_col=0)
consig_gse92742_trt_sh=pd.read_csv('../results/consensus_signature_gse92742_trt_sh.csv', header=0, sep=',', index_col=0)
consig_gse92742_trt_sh_fil=pd.read_csv('../results/consensus_signature_gse92742_trt_sh_fil.csv', header=0, sep=',', index_col=0)

In [45]:
#Checking the absolut values of LINCS also
consig_gse70138_trt_cp_A=consig_gse70138_trt_cp.abs()
consig_gse70138_trt_xpr_A=consig_gse70138_trt_xpr.abs()
consig_gse92742_trt_cp_A=consig_gse92742_trt_cp.abs()
consig_gse92742_trt_lig_A=consig_gse92742_trt_lig.abs()
consig_gse92742_trt_oe_A=consig_gse92742_trt_oe.abs()
consig_gse92742_trt_sh_A=consig_gse92742_trt_sh.abs()
consig_gse92742_trt_sh_fil_A=consig_gse92742_trt_sh_fil.abs()

# NicheNet vs. CREEDs

### NN vs. drug_perturb

In [49]:
drug_perturb.shape

(7781, 387)

In [50]:
nn_model.shape

(25345, 688)

In [51]:
common_genes=list(set(nn_model.index)& set(drug_perturb.index))

In [52]:
len(common_genes)

7727

In [53]:
nn_model=nn_model.loc[common_genes]
drug_perturb=drug_perturb.loc[common_genes]
drug_perturb_A=drug_perturb_A.loc[common_genes]

In [62]:
drug_perturb.shape

(7727, 387)

In [55]:
nn_model.shape

(7727, 688)

In [56]:
drug_perturb.head()

Unnamed: 0,1416,1074,925,345,551,750,885,1584,1585,636,...,1912,552,1362,668,697,1369,550,1579,1219,1514
FAM3A,1.528467,-0.150743,0.222181,-0.470496,0.73188,-0.181326,3.436021,-1.500258,-1.500258,0.068688,...,-0.054181,-0.471603,1.638888,0.813216,1.22066,7.95542,2.458027,-1.388246,0.192524,0.089808
MPI,-0.349662,-2.366964,-0.065323,0.101187,-0.379393,0.432761,3.318031,1.594171,1.594171,-0.759975,...,1.402696,-0.200662,-1.609156,2.248813,0.45314,2.587244,2.0989,1.27587,-0.174179,-0.738923
MIER2,-1.125718,0.779621,-0.603046,-0.580376,0.83618,1.459272,1.060877,0.616406,0.616406,-0.509168,...,1.23706,-3.720114,-0.269386,-0.343174,0.976845,-0.602606,-1.084516,1.839062,-1.892136,0.419547
ARHGEF10,-0.178789,0.251414,-1.485443,0.183717,0.564224,1.06256,-1.319166,-0.834716,-0.834716,-0.852956,...,-1.157459,-1.534592,-1.136904,-1.125984,-0.257826,8.268007,1.298609,0.539243,-1.166287,0.863441
SPATA24,1.00551,-2.834021,3.13343,-1.404423,0.773887,-0.14043,-0.627203,1.616636,1.616636,-0.366637,...,2.014616,1.198471,-0.936847,-0.579943,-4.438929,-10.927265,-0.068208,-0.686556,-0.744657,0.740323


In [67]:
drug_NN=pd.DataFrame(index=nn_model.columns, columns=drug_perturb.columns)
drug_A_NN=pd.DataFrame(index=nn_model.columns, columns=drug_perturb_A.columns)

In [68]:
drug_NN.head()

Unnamed: 0,1416,1074,925,345,551,750,885,1584,1585,636,...,1912,552,1362,668,697,1369,550,1579,1219,1514
A2M,,,,,,,,,,,...,,,,,,,,,,
AANAT,,,,,,,,,,,...,,,,,,,,,,
ADAM12,,,,,,,,,,,...,,,,,,,,,,
ADAM15,,,,,,,,,,,...,,,,,,,,,,
ADAM17,,,,,,,,,,,...,,,,,,,,,,


In [69]:
for lig in drug_NN.index:
    for exp in drug_NN.columns:
        r=pearsonr(nn_model[lig],drug_perturb[exp])
        r=r[0]
        drug_NN.loc[lig,exp]=r



In [None]:
for lig in drug_A_NN.index:
    for exp in drug_A_NN.columns:
        r=pearsonr(nn_model[lig],drug_perturb_A[exp])
        r=r[0]
        drug_A_NN.loc[lig,exp]=r

In [66]:
drug_NN.to_csv('../results/drug_NN_sim.csv', sep=',')
drug_A_NN.to_csv('../results/drug_A_NN_sim.csv', sep=',')

### NN vs. gene_perturb

In [None]:
common_genes=list(set(nn_model.index)& set(gene_perturb.index))
len(common_genes)

In [None]:
nn_model=nn_model.loc[common_genes]
gene_perturb=gene_perturb.loc[common_genes]
gene_perturb_A=gene_perturb_A.loc[common_genes]

In [None]:
gene_NN=pd.DataFrame(index=nn_model.columns, columns=gene_perturb.columns)
gene_A_NN=pd.DataFrame(index=nn_model.columns, columns=gene_perturb_A.columns)

In [None]:
gene_NN.head()

In [None]:
for lig in gene_NN.index:
    for exp in gene_NN.columns:
        r=pearsonr(nn_model[lig],gene_perturb[exp])
        r=r[0]
        gene_NN.loc[lig,exp]=r

In [None]:
for lig in drug_A_NN.index:
    for exp in drug_A_NN.columns:
        r=pearsonr(nn_model[lig],drug_perturb_A[exp])
        r=r[0]
        drug_A_NN.loc[lig,exp]=r