In [1]:
import omnipath as op 
import pandas as pd
import numpy as np
import os
import requests
import time
import csv
import pandas as pd
import torch 

from tkgdti.data.utils import uniprot_ids_to_gene_symbols

# set seed 
torch.manual_seed(0)
np.random.seed(0)

  from .autonotebook import tqdm as notebook_tqdm


# omnipath; protein interacts protein

Future work: 
- This should be broken out by relation type 
- Potentially use high confidence causal relationships (SIGNOR) 

In [2]:
ppi = op.interactions.OmniPath().get()
ppi.head()

Unnamed: 0,source,target,is_directed,is_stimulation,is_inhibition,consensus_direction,consensus_stimulation,consensus_inhibition,curation_effort,references,sources,n_sources,n_primary_sources,n_references,references_stripped
0,P0DP25,P48995,True,False,True,True,False,True,3,TRIP:11290752;TRIP:11983166;TRIP:12601176,TRIP,1,1,3,11290752;11983166;12601176
1,P0DP24,P48995,True,False,True,True,False,True,3,TRIP:11290752;TRIP:11983166;TRIP:12601176,TRIP,1,1,3,11290752;11983166;12601176
2,P0DP23,P48995,True,False,True,True,False,True,3,TRIP:11290752;TRIP:11983166;TRIP:12601176,TRIP,1,1,3,11290752;11983166;12601176
3,Q03135,P48995,True,True,False,True,True,False,13,DIP:19897728;HPRD:12732636;IntAct:19897728;Lit...,DIP;HPRD;IntAct;Lit-BM-17;TRIP,5,5,8,10980191;12732636;14551243;16822931;18430726;1...
4,P14416,P48995,True,True,False,True,True,False,1,TRIP:18261457,TRIP,1,1,1,18261457


In [3]:
protspace = np.unique(ppi.source.values.tolist() + ppi.target.values.tolist())
len(protspace)

9316

In [4]:
if os.path.exists('../../extdata/meta/omnipath_uniprot2symbol.csv'): 
    df = pd.read_csv('../../extdata/meta/omnipath_uniprot2symbol.csv')
else: 
    df = uniprot_ids_to_gene_symbols(protspace.tolist())
    df['FirstGene'] = df['Gene Names'].apply(lambda x: x.split()[0])
    df.to_csv('../../extdata/meta/omnipath_uniprot2symbol.csv', index=False)

In [5]:
# Split the 'Gene Names' column by space and explode the dataframe
df = df.assign(GeneName=df['Gene Names'].str.split()).explode('GeneName')

df.head()

Unnamed: 0,From,Entry,Gene Names,FirstGene,GeneName
0,A0A0B4J2F0,A0A0B4J2F0,PIGBOS1,PIGBOS1,PIGBOS1
1,A0A173G4P4,A0A173G4P4,Abl fusion Bcr,Abl,Abl
1,A0A173G4P4,A0A173G4P4,Abl fusion Bcr,Abl,fusion
1,A0A173G4P4,A0A173G4P4,Abl fusion Bcr,Abl,Bcr
2,A0A1B0GUS4,A0A1B0GUS4,UBE2L5,UBE2L5,UBE2L5


In [6]:
df = df[['From', 'GeneName']].rename({'From': 'uniprot', 'GeneName': 'gene'}, axis=1)

In [7]:
ppi = ppi[['source', 'target', 'consensus_inhibition', 'consensus_stimulation']].merge(df, left_on='source', right_on='uniprot').merge(df, left_on='target', right_on='uniprot')
ppi = ppi.rename({'gene_x': 'source_gene', 'gene_y': 'target_gene'}, axis=1)
ppi.to_csv('../../extdata/meta/omnipath__protein_interacts_protein.csv', index=False)
ppi.head()

Unnamed: 0,source,target,consensus_inhibition,consensus_stimulation,uniprot_x,source_gene,uniprot_y,target_gene
0,P0DP25,P48995,True,False,P0DP25,CALM3,P48995,TRPC1
1,P0DP25,P48995,True,False,P0DP25,CALM3,P48995,TRP1
2,P0DP25,P48995,True,False,P0DP25,CALML2,P48995,TRPC1
3,P0DP25,P48995,True,False,P0DP25,CALML2,P48995,TRP1
4,P0DP25,P48995,True,False,P0DP25,CAM3,P48995,TRPC1


In [8]:
ppi_fwd = ppi[['source_gene', 'target_gene', 'consensus_inhibition', 'consensus_stimulation']].rename({'source_gene':'src', 'target_gene':'dst'}, axis=1).assign(src_type='gene', dst_type='gene')
ppi_stim_fwd = ppi_fwd[ppi_fwd['consensus_stimulation']].assign(relation='stimulates_fwd').drop(['consensus_inhibition', 'consensus_stimulation'], axis=1)
ppi_inhib_fwd = ppi_fwd[ppi_fwd['consensus_inhibition']].assign(relation='inhibits_fwd').drop(['consensus_inhibition', 'consensus_stimulation'], axis=1)
ppi_other_fwd = ppi_fwd[~ppi_fwd['consensus_inhibition'] & ~ppi_fwd['consensus_stimulation']].assign(relation='interacts_fwd') .drop(['consensus_inhibition', 'consensus_stimulation'], axis=1)

ppi_rev = ppi_fwd.rename({'src':'dst', 'dst':'src'}, axis=1)
ppi_stim_rev = ppi_rev[ppi_rev['consensus_stimulation']].assign(relation='stimulates_rev').drop(['consensus_inhibition', 'consensus_stimulation'], axis=1)
ppi_inhib_rev = ppi_rev[ppi_rev['consensus_inhibition']].assign(relation='inhibits_rev').drop(['consensus_inhibition', 'consensus_stimulation'], axis=1)
ppi_other_rev = ppi_rev[~ppi_rev['consensus_inhibition'] & ~ppi_rev['consensus_stimulation']].assign(relation='interacts_rev').drop(['consensus_inhibition', 'consensus_stimulation'], axis=1)


In [9]:
ppi_stim_fwd.to_csv('../../extdata/relations/omnipath_ppi_stim_fwd', index=False)
ppi_inhib_fwd.to_csv('../../extdata/relations/omnipath_ppi_inhib_fwd', index=False)
ppi_stim_rev.to_csv('../../extdata/relations/omnipath_ppi_stim_rev', index=False)
ppi_inhib_rev.to_csv('../../extdata/relations/omnipath_ppi_inhib_rev', index=False)
ppi_other_fwd.to_csv('../../extdata/relations/omnipath_ppi_other_fwd', index=False)
ppi_other_rev.to_csv('../../extdata/relations/omnipath_ppi_other_rev', index=False)

In [12]:
# should check to make sure drug targets are in protein space 
protspace = np.unique(ppi_fwd['src'].tolist() + ppi_fwd['dst'].tolist())

druginfo = pd.read_csv('../../extdata/meta/targetome__drug_targets_gene.csv')
targspace = druginfo.Symbol.unique()

targets_not_in_ppispace = set(targspace) - set(protspace)
print(f'{len(targets_not_in_ppispace)}/{len(targspace)} drug targets not in protein space')


13/529 drug targets not in protein space
