In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
import os, sys

In [3]:
import pylab as plt
import matplotlib

import numpy as np
import pandas as pd
from sklearn import preprocessing
from scipy import stats
from statsmodels.sandbox.stats.multicomp import multipletests

In [10]:
sys.path.append('..')
from pyWebOmics.reactome import uniprot_to_reaction, compound_to_reaction
from pyWebOmics.constants import HOMO_SAPIENS

# Demonstration of pyWebOmics

### Load test data

In [5]:
DATA_FOLDER = os.path.abspath(os.path.join('test_data', 'covid19_data'))
DATA_FOLDER

'C:\\Users\\joewa\\Work\\git\\pyWebOmics\\notebooks\\test_data\\covid19_data'

Read proteomics data

In [6]:
protein_data = pd.read_csv(os.path.join(DATA_FOLDER, 'protein_data.csv'), index_col='Identifier')
protein_data.head()

Unnamed: 0_level_0,h_F1_131N,h_F1_131C,h_F1_132C,h_F2_131N,h_F2_131C,h_F2_132C,h_F3_131N,h_F3_131C,h_F3_132C,h_F4_131N,...,s_F3_128N,s_F3_128C,s_F3_129C,s_F4_128N,s_F4_128C,s_F5_128N,s_F5_128C,s_F6_128N,s_F6_128C,s_F6_133N
Identifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
P04114,0.75,0.853,0.822,1.191,1.175,1.078,0.693,0.947,0.931,1.057,...,1.044,1.305,1.657,1.323,1.624,1.17,0.981,0.791,1.029,1.195
P01024,0.782,1.057,0.994,0.864,0.917,0.79,0.823,1.152,0.816,0.92,...,1.1,0.986,1.114,1.21,1.289,1.104,1.111,1.007,1.159,0.979
P02768,1.183,1.101,1.045,1.086,1.041,1.187,1.234,1.079,1.011,1.099,...,0.786,0.706,0.947,0.831,0.717,0.795,0.776,0.938,0.903,0.743
P01023,1.066,1.278,0.959,0.811,0.789,0.931,0.971,0.769,1.011,0.866,...,0.817,0.728,0.861,0.798,0.751,0.917,0.809,0.78,1.195,0.706
P02751,1.085,0.947,0.993,1.343,1.13,0.778,0.731,1.084,1.107,0.909,...,0.566,0.854,1.109,0.63,0.85,0.661,0.848,0.829,0.76,0.811


In [7]:
protein_design = pd.read_csv(os.path.join(DATA_FOLDER, 'protein_design.csv'), index_col='sample')
protein_design

Unnamed: 0_level_0,group
sample,Unnamed: 1_level_1
h_F1_131N,healthy
h_F1_131C,healthy
h_F1_132C,healthy
h_F2_131N,healthy
h_F2_131C,healthy
...,...
s_F5_128N,severe
s_F5_128C,severe
s_F6_128N,severe
s_F6_128C,severe


Read metabolomics data

In [8]:
compound_data = pd.read_csv(os.path.join(DATA_FOLDER, 'compound_data.csv'), index_col='Identifier')
compound_data.head()

Unnamed: 0_level_0,h_jkdz1,h_jkdz2,h_jkdz3,h_jkdz4,h_jkdz5,h_jkdz6,h_jkdz7,h_jkdz8,h_jkdz9,h_jkdz10,...,s_ZX12,s_ZX13,s_ZX14,s_ZX15,s_ZX16,s_ZX17,s_ZX18,s_ZX19,s_ZX20,s_ZX21
Identifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C21482,19413052.0,6381812.0,9748316.0,5326872.0,19980720.0,3580375.0,8256121.0,8079382.0,15596590.0,15203630.0,...,1904349.0,3226016.0,737814.7,2817698.0,3329101.0,3206752.75,1466174.0,2779301.0,2117668.0,2184310.0
C18218,2711915.25,2056393.0,1445594.0,2038765.0,2536996.0,2638198.0,2285757.0,1973140.0,2015425.0,2290842.0,...,1409720.0,1413307.0,3218834.0,1602131.0,1317878.0,2930312.75,1168094.0,2946776.0,1417311.0,1474166.0
C05127,87727.25,,92387.06,,159787.9,,,,90551.3,121411.4,...,,,,,,,,138278.8,,
C01152,58832828.0,58439340.0,55521330.0,45162140.0,54789520.0,39412590.0,29878760.0,67517260.0,46660310.0,91185240.0,...,28813140.0,31643580.0,25387670.0,33076040.0,39156980.0,24400592.0,25933750.0,64138680.0,40205880.0,49044880.0
C02918,,181554.9,224039.2,160939.7,320619.4,717655.7,326818.2,513581.0,273458.2,,...,333724.5,,434715.2,35321.18,,655827.25,835970.6,4034381.0,283935.8,80621.6


In [9]:
protein_design = pd.read_csv(os.path.join(DATA_FOLDER, 'compound_design.csv'), index_col='sample')
compound_data

Unnamed: 0_level_0,h_jkdz1,h_jkdz2,h_jkdz3,h_jkdz4,h_jkdz5,h_jkdz6,h_jkdz7,h_jkdz8,h_jkdz9,h_jkdz10,...,s_ZX12,s_ZX13,s_ZX14,s_ZX15,s_ZX16,s_ZX17,s_ZX18,s_ZX19,s_ZX20,s_ZX21
Identifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C21482,1.941305e+07,6.381812e+06,9.748316e+06,5.326872e+06,1.998072e+07,3.580375e+06,8.256121e+06,8.079382e+06,1.559659e+07,1.520363e+07,...,1.904349e+06,3.226016e+06,7.378147e+05,2.817698e+06,3.329101e+06,3.206753e+06,1.466174e+06,2.779301e+06,2.117668e+06,2.184310e+06
C18218,2.711915e+06,2.056393e+06,1.445594e+06,2.038765e+06,2.536996e+06,2.638198e+06,2.285757e+06,1.973140e+06,2.015425e+06,2.290842e+06,...,1.409720e+06,1.413307e+06,3.218834e+06,1.602131e+06,1.317878e+06,2.930313e+06,1.168094e+06,2.946776e+06,1.417311e+06,1.474166e+06
C05127,8.772725e+04,,9.238706e+04,,1.597879e+05,,,,9.055130e+04,1.214114e+05,...,,,,,,,,1.382788e+05,,
C01152,5.883283e+07,5.843934e+07,5.552133e+07,4.516214e+07,5.478952e+07,3.941259e+07,2.987876e+07,6.751726e+07,4.666031e+07,9.118524e+07,...,2.881314e+07,3.164358e+07,2.538767e+07,3.307604e+07,3.915698e+07,2.440059e+07,2.593375e+07,6.413868e+07,4.020588e+07,4.904488e+07
C02918,,1.815549e+05,2.240392e+05,1.609397e+05,3.206194e+05,7.176557e+05,3.268182e+05,5.135810e+05,2.734582e+05,,...,3.337245e+05,,4.347152e+05,3.532118e+04,,6.558272e+05,8.359706e+05,4.034381e+06,2.839358e+05,8.062160e+04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C07880,1.765935e+06,1.893382e+05,1.650919e+05,1.256764e+06,6.897398e+05,5.363281e+05,1.080889e+06,1.129245e+05,6.332996e+05,4.778965e+05,...,4.301203e+06,4.358307e+05,,2.365828e+05,8.357362e+06,2.412475e+06,1.220492e+06,4.609959e+05,3.668384e+06,2.477980e+05
C00141,7.867564e+08,6.722941e+08,8.034464e+08,6.285106e+08,8.601340e+08,7.206609e+08,8.570945e+08,8.315696e+08,9.372724e+08,9.169534e+08,...,3.432344e+08,6.596301e+08,5.647217e+08,4.216608e+08,4.916179e+08,5.216197e+08,2.344082e+08,4.774541e+08,5.302854e+08,3.390910e+08
C00262,1.163331e+07,1.222588e+07,9.910138e+06,1.325615e+07,1.496530e+07,6.117347e+06,1.823549e+07,1.034256e+07,1.856934e+07,1.313032e+07,...,1.867050e+07,1.795596e+07,1.170789e+07,1.163837e+07,1.493698e+07,1.428548e+07,9.866073e+06,3.116646e+07,1.724864e+07,1.170101e+07
C00655,7.657407e+04,2.845183e+04,,,,,,,3.171763e+04,3.771615e+04,...,6.375514e+04,5.797543e+04,1.740941e+04,,,5.416402e+04,,9.195635e+04,5.216439e+04,


### Which proteins are involved with which compounds in a reaction?

In [22]:
species_list = [HOMO_SAPIENS]

In [23]:
protein_ids = list(protein_data.index.values)
protein_results, _ = uniprot_to_reaction(protein_ids, species_list)
list(protein_results.items())[:3]

2020-10-26 12:49:27.982 | DEBUG    | pyWebOmics.reactome:uniprot_to_reaction:182 - 
        MATCH (rle:ReactionLikeEvent)-[:input|output|catalystActivity
              |physicalEntity|regulatedBy|regulator|hasComponent|hasMember
              |hasCandidate*]->
              (pe:PhysicalEntity)-[:referenceEntity]->
              (re:ReferenceEntity)-[:referenceDatabase]->
              (rd:ReferenceDatabase)
        WHERE
            re.identifier IN {uniprot_ids} AND
            rd.displayName = 'UniProt' AND
            rle.speciesName IN {species}
        RETURN DISTINCT
            re.identifier AS protein_id,
            re.description AS description,
            rd.displayName AS protein_db,
            rle.stId AS reaction_id,
            rle.displayName AS reaction_name
        


[('P41222',
  [{'reaction_id': 'R-HSA-2161620',
    'reaction_name': 'PGH2 is isomerised to PGD2 by PTGDS'}]),
 ('P01042',
  [{'reaction_id': 'R-HSA-481007',
    'reaction_name': 'Exocytosis of platelet alpha granule contents'},
   {'reaction_id': 'R-HSA-158354',
    'reaction_name': 'kininogen + C1q binding protein tetramer -> kininogen:C1q binding protein tetramer'},
   {'reaction_id': 'R-HSA-158311',
    'reaction_name': 'kallikrein:kininogen:C1q binding protein tetramer -> kallikrein + activated kininogen:C1q binding protein tetramer + bradykinin'},
   {'reaction_id': 'R-HSA-749456',
    'reaction_name': 'Liganded Gi-activating GPCRs bind inactive heterotrimeric G-protein Gi'},
   {'reaction_id': 'R-HSA-749454',
    'reaction_name': 'The Ligand:GPCR:Gi complex dissociates'},
   {'reaction_id': 'R-HSA-380073',
    'reaction_name': 'Liganded Gi-activating GPCR acts as a GEF for Gi'},
   {'reaction_id': 'R-HSA-374331',
    'reaction_name': 'Bradykinin receptors B1 and B2 bind to brady

In [30]:
compound_ids = list(compound_data.index.values)
compound_results, _ = compound_to_reaction(compound_ids, species_list)
list(compound_results.items())[10:12]

2020-10-26 12:54:03.277 | DEBUG    | pyWebOmics.reactome:compound_to_reaction:250 - 
        MATCH (rle:ReactionLikeEvent)-[:input|output|catalystActivity
              |physicalEntity|regulatedBy|regulator|hasComponent|hasMember
              |hasCandidate*]->
              (pe:PhysicalEntity)-[:crossReference|:referenceEntity]->
              (do:DatabaseObject)
        WHERE
            do.identifier IN {compound_ids} AND
            rle.speciesName IN {species}
        RETURN DISTINCT
            do.identifier AS compound_id,
            do.displayName as display_name,
            do.databaseName AS compound_db,
            rle.stId AS reaction_id,
        	rle.displayName AS reaction_name        
        


[('C00158',
  [{'reaction_id': 'R-HSA-70467',
    'reaction_name': 'D-fructose 6-phosphate + ATP => D-fructose 1,6-bisphosphate + ADP'},
   {'reaction_id': 'R-HSA-372449',
    'reaction_name': 'phosphoenolpyruvate [mitochondrial matrix] + citrate [cytosol] => phosphoenolpyruvate [cytosol] + citrate [mitochondrial matrix]'},
   {'reaction_id': 'R-HSA-70971', 'reaction_name': 'citrate <=> isocitrate'},
   {'reaction_id': 'R-HSA-70975',
    'reaction_name': 'Acetyl-CoA + H2O + Oxaloacetate => Citrate + CoA'},
   {'reaction_id': 'R-HSA-5690911',
    'reaction_name': 'ACO1:4Fe-4S isomerises CIT to ISCIT'},
   {'reaction_id': 'R-HSA-433104',
    'reaction_name': 'NACT co-transports trivalent citrate and a sodium ion'},
   {'reaction_id': 'R-HSA-433131',
    'reaction_name': 'NaDC1 co-transports dicarboxylic acids and a sodium ion'},
   {'reaction_id': 'R-HSA-200555',
    'reaction_name': 'acetyl-CoA + bicarbonate + ATP => malonyl-CoA + H2O + ADP + orthophosphate'},
   {'reaction_id': 'R-HSA-