#### Create metabolic network as `networkx` graph

In [2]:
import pandas as pd
import numpy as np
import json
import networkx as nx
from tqdm import tqdm
from compound import Compound
from reaction import Reaction
from graph import Graph
from data import Data

# suppres rdkit warnings
import rdkit
from rdkit import RDLogger
RDLogger.DisableLog('rdApp.*')


# read data from csv
cpds = pd.read_csv('../GNN_toxic/data/raw/compounds_final.csv', index_col=0) # containing toxicity
rxns = pd.read_csv('data/reactions_final.csv', index_col=0)
pairs = pd.read_csv('data/pairs_final_RPAIRS.csv', index_col=0)
cofactors = pd.read_csv('data/original/cofactors_KEGG.csv')

# create class instances
data = Data()
graph = Graph(pairs=pairs)

# Create a Compound object for each row in the DataFrame and add it to the data
for index, row in cpds.iterrows():
    entry = row['Entry']
    name = row['Names']
    formula = row['Formula']
    mw = row['mol_weight']
    smiles = row['SMILES']
    is_cofactor = row['Entry'] in cofactors['Entry'].values
    is_toxic = row['toxic']

    compound = Compound(entry, name, formula, mw, smiles, is_cofactor, is_toxic)
    data.add_element('compound', compound)

# Create a Reaction object for each row in the DataFrame and add it to the data
for index, row in rxns.iterrows():
    entry = row['Entry']
    name = row['Names']
    compounds = row['Compound']
    enzyme = row['EC Number']

    reaction = Reaction(entry, name, compounds, enzyme)
    data.add_element('reaction', reaction)

# CREATE GRAPH
graph.create_graph(data=data, pairs=pairs)

graph.calculate_edge_mol_weight(data)
graph.calculate_smiles_similarity(data)

# nodes: 8481 
# edges: 25809


100%|██████████| 8481/8481 [00:09<00:00, 927.12it/s] 
100%|██████████| 25809/25809 [00:00<00:00, 375287.03it/s]
100%|██████████| 25809/25809 [00:39<00:00, 657.56it/s] 


#### Get KEGG and BIGG names of source and target

In [3]:
butanol_KEGG = 'C06142'
butanol_BIGG = graph.kegg_to_bigg_compound(butanol_KEGG)

src_list = ['C00084', 'C00024', 'C00074', 'C00022', 'C00497', 'C00149', 'C00122', 'C00042', 'C00036', 'C00091', 'C00111', 'C00158']

''' Create dataframe with columns the two lists above'''
src = pd.DataFrame({'source': src_list})
src['bigg'] = src['source'].apply(lambda x: graph.kegg_to_bigg_compound(x))

#### Pathway example for investigation

In [8]:
pred_path, idx_smi, idx_com = graph.constrained_shortest_path('C00024', butanol_KEGG, 
                                                                weight='mol_weight')
pred_path[idx_smi]

['C00024', 'C05231', 'C00877', 'C00136', 'C01412', 'C06142']

In [11]:
pred_path, idx_smi, idx_com = graph.constrained_shortest_path('C00024', butanol_KEGG, 
                                                                weight='mol_weight')
pred_path

[['C00024', 'C00136', 'C01412', 'C06142'],
 ['C00024', 'C00332', 'C00136', 'C01412', 'C06142'],
 ['C00024', 'C00083', 'C00332', 'C00136', 'C01412', 'C06142'],
 ['C00024', 'C05231', 'C00877', 'C00136', 'C01412', 'C06142'],
 ['C00024',
  'C00894',
  'C05668',
  'C05989',
  'C00083',
  'C00332',
  'C00136',
  'C01412',
  'C06142'],
 ['C00024', 'C11062', 'C00877', 'C00136', 'C01412', 'C06142'],
 ['C00024', 'C11062', 'C02331', 'C00877', 'C00136', 'C01412', 'C06142'],
 ['C00024', 'C00332', 'C01144', 'C00877', 'C00136', 'C01412', 'C06142'],
 ['C00024', 'C00332', 'C03561', 'C00877', 'C00136', 'C01412', 'C06142'],
 ['C00024',
  'C00332',
  'C01144',
  'C03561',
  'C00877',
  'C00136',
  'C01412',
  'C06142']]

#### Get pathway from every source to target

In [3]:
pathways = []
for index, row in src.iterrows():
  
    # get the shortest path between the source and the target
    pred_path, idx_smi, idx_com = graph.constrained_shortest_path(row['source'], butanol_KEGG, 
                                                                  weight='mol_weight')
    print(index, 'Starting from:', row['source'], pred_path[idx_smi])
    pathways.append(pred_path[idx_smi])
    print('------------------')

0 Starting from: C00084 ['C00084', 'C00024', 'C00136', 'C01412', 'C06142']
------------------
1 Starting from: C00024 ['C00024', 'C00136', 'C01412', 'C06142']
------------------
2 Starting from: C00074 ['C00074', 'C00111', 'C20960', 'C00024', 'C00136', 'C01412', 'C06142']
------------------
3 Starting from: C00022 ['C00022', 'C00024', 'C00136', 'C01412', 'C06142']
------------------
4 Starting from: C00497 ['C00497', 'C20747', 'C00091', 'C00332', 'C00136', 'C01412', 'C06142']
------------------
5 Starting from: C00149 ['C00149', 'C00024', 'C00136', 'C01412', 'C06142']
------------------
6 Starting from: C00122 ['C00122', 'C00149', 'C00024', 'C00136', 'C01412', 'C06142']
------------------
7 Starting from: C00042 ['C00042', 'C00149', 'C00024', 'C00136', 'C01412', 'C06142']
------------------
8 Starting from: C00036 ['C00036', 'C00083', 'C00332', 'C00136', 'C01412', 'C06142']
------------------
9 Starting from: C00091 ['C00091', 'C00332', 'C00136', 'C01412', 'C06142']
------------------


#### Add the pathways to the `src` dataframe

In [4]:
src['pathway'] = pathways
src.head(3)

Unnamed: 0,source,bigg,pathway
0,C00084,acald,"[C00084, C00024, C00136, C01412, C06142]"
1,C00024,accoa,"[C00024, C00136, C01412, C06142]"
2,C00074,pep,"[C00074, C00111, C20960, C00024, C00136, C0141..."


In [5]:
reactions = []
for index, row in src.iterrows():
    rxn = []
    # iterate through pairs of compounds in the pathway
    for cpd in range(len(row['pathway'])-1):
        cpd_a = row['pathway'][cpd]
        cpd_b = row['pathway'][cpd+1]

        # get the reaction between the two compounds
        rxn.append(graph.get_reaction_by_compounds(cpd_a, cpd_b))
    reactions.append(rxn)

In [6]:
''' list of arrays to list of lists '''
def list_of_arrays_to_list_of_lists(list_of_arrays):
    return [list(array) for array in list_of_arrays]

''' apply function to every element of reactions '''
reactions = list(map(list_of_arrays_to_list_of_lists, reactions))

#### Add the reactions to the `src` dataframe

In [7]:
src['reactions'] = reactions
src.head(3)

Unnamed: 0,source,bigg,pathway,reactions
0,C00084,acald,"[C00084, C00024, C00136, C01412, C06142]","[[R00228], [R01179], [R01172, R01173], [R03544..."
1,C00024,accoa,"[C00024, C00136, C01412, C06142]","[[R01179], [R01172, R01173], [R03544, R03545]]"
2,C00074,pep,"[C00074, C00111, C20960, C00024, C00136, C0141...","[[R01012], [R11182], [R11182], [R01179], [R011..."


#### From KEGG to BIGG 

In [28]:
rxn_bigg_list = []
for row in range(len(src)):
    rxn_bigg = []
    for rxn in src.iloc[row]['reactions']:
        rxn_bigg.append([graph.kegg_to_bigg_reaction(x) for x in rxn])
    rxn_bigg_list.append(rxn_bigg)

In [31]:
src['reactions_bigg'] = rxn_bigg_list
src.head(3)

Unnamed: 0,source,bigg,pathway,reactions,reactions_bigg
0,C00084,acald,"[C00084, C00024, C00136, C01412, C06142]","[[R00228], [R01179], [R01172, R01173], [R03544...","[[ACALD], [Not found], [BNORh, Not found], [AL..."
1,C00024,accoa,"[C00024, C00136, C01412, C06142]","[[R01179], [R01172, R01173], [R03544, R03545]]","[[Not found], [BTCOARx, Not found], [BTS, ALCD..."
2,C00074,pep,"[C00074, C00111, C20960, C00024, C00136, C0141...","[[R01012], [R11182], [R11182], [R01179], [R011...","[[DHAPT], [Not found], [Not found], [Not found..."


In [33]:
for path, rxn, rxn_bigg in zip(src['pathway'], src['reactions'], src['reactions_bigg']):
    print(path)
    path_bigg = [graph.kegg_to_bigg_compound(x) for x in path]
    print(path_bigg)

    print(rxn)
    print(rxn_bigg)
    print('------------------')

['C00084', 'C00024', 'C00136', 'C01412', 'C06142']
['acald', 'accoa', 'btcoa', 'btal', '1btol']
[['R00228'], ['R01179'], ['R01172', 'R01173'], ['R03544', 'R03545']]
[['ACALD'], ['Not found'], ['BNORh', 'Not found'], ['ALCD4', 'ALCD4y']]
------------------
['C00024', 'C00136', 'C01412', 'C06142']
['accoa', 'btcoa', 'btal', '1btol']
[['R01179'], ['R01172', 'R01173'], ['R03544', 'R03545']]
[['Not found'], ['BTCOARx', 'Not found'], ['BTS', 'ALCD4y']]
------------------
['C00074', 'C00111', 'C20960', 'C00024', 'C00136', 'C01412', 'C06142']
['pep', 'dhap', 'Not found', 'accoa', 'btcoa', 'btal', 'btoh']
[['R01012'], ['R11182'], ['R11182'], ['R01179'], ['R01172', 'R01173'], ['R03544', 'R03545']]
[['DHAPT'], ['Not found'], ['Not found'], ['Not found'], ['BNORh', 'Not found'], ['BTS', 'ALCD4y']]
------------------
['C00022', 'C00024', 'C00136', 'C01412', 'C06142']
['pyr', 'accoa', 'btcoa', 'btal', '1btol']
[['R00209', 'R00210', 'R00211', 'R00353', 'R10866'], ['R01179'], ['R01172', 'R01173'], ['R