#### Create metabolic network as `networkx` graph

In [1]:
import pandas as pd
import numpy as np
import json
import networkx as nx
from tqdm import tqdm
from compound import Compound
from reaction import Reaction
from graph import Graph
from data import Data
from pathway import Pathway

# suppres rdkit warnings
import rdkit
from rdkit import RDLogger
RDLogger.DisableLog('rdApp.*')


# read data from csv
cpds = pd.read_csv('../GNN_toxic/data/raw/compounds_final.csv', index_col=0) # containing toxicity
rxns = pd.read_csv('data/reactions_final.csv', index_col=0)
pairs = pd.read_csv('data/pairs_final_RPAIRS.csv', index_col=0)
cofactors = pd.read_csv('data/original/cofactors_KEGG.csv')

# create class instances
data = Data()
graph = Graph(pairs=pairs)
pathway = Pathway()

# Create a Compound object for each row in the DataFrame and add it to the data
for index, row in cpds.iterrows():
    entry = row['Entry']
    name = row['Names']
    formula = row['Formula']
    mw = row['mol_weight']
    smiles = row['SMILES']
    is_cofactor = row['Entry'] in cofactors['Entry'].values
    is_toxic = row['toxic']
    is_polymer = row['polymer']

    compound = Compound(entry, name, formula, mw, smiles, is_cofactor, is_polymer, is_toxic)
    data.add_element('compound', compound)

# Create a Reaction object for each row in the DataFrame and add it to the data
for index, row in rxns.iterrows():
    entry = row['Entry']
    name = row['Names']
    compounds = row['Compound']
    enzyme = row['EC Number']

    reaction = Reaction(entry, name, compounds, enzyme)
    data.add_element('reaction', reaction)

# CREATE GRAPH
graph.create_graph(data=data, pairs=pairs)

graph.calculate_edge_mol_weight(data)
graph.calculate_smiles_similarity(data)

# nodes: 8481 
# edges: 25809


100%|██████████| 8481/8481 [00:06<00:00, 1300.47it/s]
100%|██████████| 25809/25809 [00:00<00:00, 564414.72it/s]
100%|██████████| 25809/25809 [00:54<00:00, 474.04it/s] 


#### Get KEGG and BIGG names of source and target

In [2]:
# define target compound
butanol_KEGG = 'C06142'
butanol_BIGG = pathway.kegg_to_bigg_compound(butanol_KEGG)

# define source list
src_list = ['C00084', 'C00024', 'C00074', 'C00022', 'C00497', 'C00149', 'C00122', 'C00042', 'C00036', 'C00091', 'C00111', 'C00158']

''' Create dataframe with columns the two lists above'''
src = pd.DataFrame({'source': src_list})
src['bigg'] = src['source'].apply(lambda x: pathway.kegg_to_bigg_compound(x))

#### Simple Pathway example for investigation

In [3]:
# intialize Pathway class instance
from pathway import Pathway

pathway.initialize(src_list[0], butanol_KEGG, graph)
print(pathway)

# get all possible paths
pathway.get_pathway()
pathway.print_pathway(with_reactions=True, to_bigg=True)

Pathway from C00084 to C06142
Pathway from C00084 to C06142:
['C00084', 'C00024', 'C05231', 'C00877', 'C00136', 'C01412', 'C06142']
[['R00228'], ['R10564'], ['R03030'], ['R01171', 'R01175', 'R09738', 'R11675'], ['R01172', 'R01173'], ['R03544', 'R03545']]
['acald', 'accoa', 'Not found', 'b2coa', 'btcoa', 'btal', '1btol']
[['ACALD'], ['Not found'], ['Not found'], ['ACOAD1', 'ACOA40OR', 'Not found', 'Not found'], ['BTCOARx', 'Not found'], ['BTS', 'BTS_nadph']]


['C00084', 'C00024', 'C00332', 'C01144', 'C00877', 'C00136', 'C01412', 'C06142']
[['R00228'], ['R00238', 'R09527'], ['R01975', 'R01976'], ['R03026'], ['R01171', 'R01175', 'R09738', 'R11675'], ['R01172', 'R01173'], ['R03544', 'R03545']]
['acald', 'accoa', 'aacoa', '3hbcoa', 'b2coa', 'btcoa', 'btal', '1btol']
[['ACALD'], ['ACACT1m', 'Not found'], ['HACD1', 'HBCO_nadp'], ['ECOAH1m'], ['ACOAD1', 'ACOA40OR', 'Not found', 'Not found'], ['BTCOARx', 'Not found'], ['ALCD4', 'ALCD4y']]


['C00084', 'C00024', 'C00332', 'C03561', 'C00877', 'C