In [4]:
import cobra

# Read E. coli model from cobrapy
ecoli = cobra.io.load_model("iJO1366")

metabolites = [m.name for m in ecoli.metabolites]
reactions = [r.name for r in ecoli.reactions]

#### Create metabolic network as `networkx` graph

In [11]:
# add folder for imports
import sys
sys.path.append('../')

import pandas as pd
import numpy as np
import json
import networkx as nx
from tqdm import tqdm
from compound import Compound
from reaction import Reaction
from graph import Graph
from data import Data
from pathway import Pathway

# suppres rdkit warnings
import rdkit
from rdkit import RDLogger
RDLogger.DisableLog('rdApp.*')


# read data from csv
cpds = pd.read_csv('../data/compounds_final.csv', index_col=0) # containing toxicity
rxns = pd.read_csv('../data/reactions_final.csv', index_col=0)
pairs = pd.read_csv('../data/pairs_final_RPAIRS_pred.csv', index_col=0)
cofactors = pd.read_csv('../data/original/cofactors_KEGG.csv')

# create class instances
data = Data()
graph = Graph(pairs=pairs)
pathway = Pathway()

# Create a Compound object for each row in the DataFrame and add it to the data
for index, row in cpds.iterrows():
    entry = row['Entry']
    name = row['Names']
    formula = row['Formula']
    mw = row['mol_weight']
    smiles = row['SMILES']
    is_cofactor = row['Entry'] in cofactors['Entry'].values
    # is_toxic = row['toxic']
    is_polymer = row['polymer']

    compound = Compound(entry, name, formula, mw, smiles, is_cofactor, is_polymer)
    data.add_element('compound', compound)

# Create a Reaction object for each row in the DataFrame and add it to the data
for index, row in rxns.iterrows():
    entry = row['Entry']
    name = row['Names']
    compounds = row['Compound']
    enzyme = row['EC Number']

    reaction = Reaction(entry, name, compounds, enzyme)
    data.add_element('reaction', reaction)

# CREATE GRAPH
graph.create_graph(data=data, pairs=pairs)

graph.calculate_edge_mol_weight(data)
graph.calculate_smiles_similarity(data)

# nodes: 7997 
# edges: 11783


100%|██████████| 7997/7997 [00:08<00:00, 889.58it/s]
100%|██████████| 11783/11783 [00:00<00:00, 385010.47it/s]
100%|██████████| 11783/11783 [00:25<00:00, 455.85it/s]


#### Get KEGG source and target

In [12]:
# define target compound
butanol_KEGG = 'C06142'
oxoglatarate_KEGG = 'C00026'

# define source list
src_list = ['C00084', 'C00024', 'C00074', 'C00022', 'C00497', 'C00149', \
            'C00122', 'C00042', 'C00036', 'C00091', 'C00111', 'C00158']

#### Find metabolic pathways from src to trg

Find metabolic pathways from `2-oxoglatarate` to `n-Butanol`

In [13]:
pathway.initialize(oxoglatarate_KEGG, butanol_KEGG, graph)
pathway.get_pathway()
pathway.print_pathway(with_reactions=True)

Pathway from C00026 to C06142:
['C00026', 'C00024', 'C00332', 'C01144', 'C00877', 'C00136', 'C01412', 'C06142']
[['R02772'], ['R00238', 'R09527'], ['R01975', 'R01976'], ['R03026'], ['R01171', 'R01175', 'R09738', 'R11675'], ['R01172', 'R01173'], ['R03544', 'R03545']]
['C00026', 'C00024', 'C00332', 'C03561', 'C00877', 'C00136', 'C01412', 'C06142']
[['R02772'], ['R00238', 'R09527'], ['R01977'], ['R03027'], ['R01171', 'R01175', 'R09738', 'R11675'], ['R01172', 'R01173'], ['R03544', 'R03545']]
['C00026', 'C00024', 'C00332', 'C01144', 'C03561', 'C00877', 'C00136', 'C01412', 'C06142']
[['R02772'], ['R00238', 'R09527'], ['R01975', 'R01976'], ['R03276'], ['R03027'], ['R01171', 'R01175', 'R09738', 'R11675'], ['R01172', 'R01173'], ['R03544', 'R03545']]
['C00026', 'C00024', 'C00332', 'C05116', 'C00877', 'C00136', 'C01412', 'C06142']
[['R02772'], ['R00238', 'R09527'], ['R05576'], ['R05595'], ['R01171', 'R01175', 'R09738', 'R11675'], ['R01172', 'R01173'], ['R03544', 'R03545']]
['C00026', 'C00024', 'C

In [33]:
pathway.get_pathway_reactions(pathway.pred_paths[0])

[['R02772'],
 ['R00238', 'R09527'],
 ['R01975', 'R01976'],
 ['R03026'],
 ['R01171', 'R01175', 'R09738', 'R11675'],
 ['R01172', 'R01173'],
 ['R03544', 'R03545']]

In [73]:
comps = data.reactions['R02772'].compounds

# Transform comps from list of lists to lists of signle elements
comps = [item for sublist in comps for item in sublist]

for c in comps:
    pass

c_names = data.compounds['C00036'].name

# Transform string of list of strings to list of strings
c_names = c_names.strip('][').split(', ')
# Remove "" 
c_names = [c.strip('"') for c in c_names]

# check if any of the c_names is in metabolites
for c in c_names:
    if c in metabolites:
        print(c)
        break
    else:
        print('not in metabolites')

Oxaloacetate


### Find pathways for all source compounds

In [None]:
for src in src_list[:11]:
    pathway.initialize(src, butanol_KEGG, graph)
    pathway.get_pathway()
    pathway.print_pathway(with_reactions=True, to_bigg=True)    
    print('--------------------------------------')    