In [49]:
import cobra

# Read E. coli model from cobrapy
ecoli = cobra.io.load_model("textbook")

metabolites = [m.name for m in ecoli.metabolites]
reactions = [r.name for r in ecoli.reactions]

# Print the number of metabolites and reactions
print("Number of metabolites: ", len(metabolites))
print("Number of reactions: ", len(reactions))

Number of metabolites:  72
Number of reactions:  95


In [12]:
cofacs = cofactors['Name'].tolist()

# Find elements of cofacs in metabolites
cofacs_in_model = [m for m in cofacs if m in metabolites]
print(len(cofacs_in_model))
print(len(cofacs))

41
209


In [1]:
# add folder for imports
import sys
sys.path.append('../')

import pandas as pd
import numpy as np
import json
import networkx as nx
from tqdm import tqdm
from compound import Compound
from reaction import Reaction
from graph import Graph
from data import Data
from pathway import Pathway
from utils import create_compound, create_reaction
# suppres rdkit warnings
import rdkit
from rdkit import RDLogger
RDLogger.DisableLog('rdApp.*')


# read data from csv
cpds = pd.read_csv('../data/compounds_final.csv', index_col=0) # containing toxicity
rxns = pd.read_csv('../data/reactions_final.csv', index_col=0)
pairs = pd.read_csv('../data/pairs_final_RPAIRS_pred.csv', index_col=0)
cofactors = pd.read_csv('../data/original/cofactors_KEGG.csv')

# create class instances
data = Data()
graph = Graph(pairs=pairs)
pathway = Pathway()

data = create_compound(data, cpds, cofactors)
data = create_reaction(data, rxns)

# CREATE GRAPH
graph.create_graph(data=data, pairs=pairs)

graph.calculate_edge_mol_weight(data)
graph.calculate_smiles_similarity(data)

# nodes: 7997 
# edges: 11783


100%|██████████| 7997/7997 [00:09<00:00, 873.69it/s]
100%|██████████| 11783/11783 [00:00<00:00, 414112.97it/s]
100%|██████████| 11783/11783 [00:25<00:00, 459.72it/s]


#### Create metabolic network as `networkx` graph

#### Get KEGG source and target

In [2]:
# define target compound
butanol_KEGG = 'C06142'
oxoglatarate_KEGG = 'C00026'

# define source list
src_list = ['C00084', 'C00024', 'C00074', 'C00022', 'C00497', 'C00149', \
            'C00122', 'C00042', 'C00036', 'C00091', 'C00111', 'C00158']

#### Find metabolic pathways from src to trg

Find metabolic pathways from `2-oxoglatarate` to `n-Butanol`

In [7]:
pathway.initialize(oxoglatarate_KEGG, butanol_KEGG, graph)
pathway.get_pathway()
pathway.print_pathway(with_reactions=True)

Pathway from C00026 to C06142:
['C00026', 'C00024', 'C00332', 'C01144', 'C00877', 'C00136', 'C01412', 'C06142']
[['R02772'], ['R00238', 'R09527'], ['R01975', 'R01976'], ['R03026'], ['R01171', 'R01175', 'R09738', 'R11675'], ['R01172', 'R01173'], ['R03544', 'R03545']]
['C00026', 'C00024', 'C00332', 'C03561', 'C00877', 'C00136', 'C01412', 'C06142']
[['R02772'], ['R00238', 'R09527'], ['R01977'], ['R03027'], ['R01171', 'R01175', 'R09738', 'R11675'], ['R01172', 'R01173'], ['R03544', 'R03545']]
['C00026', 'C00024', 'C00332', 'C01144', 'C03561', 'C00877', 'C00136', 'C01412', 'C06142']
[['R02772'], ['R00238', 'R09527'], ['R01975', 'R01976'], ['R03276'], ['R03027'], ['R01171', 'R01175', 'R09738', 'R11675'], ['R01172', 'R01173'], ['R03544', 'R03545']]
['C00026', 'C00024', 'C00332', 'C05116', 'C00877', 'C00136', 'C01412', 'C06142']
[['R02772'], ['R00238', 'R09527'], ['R05576'], ['R05595'], ['R01171', 'R01175', 'R09738', 'R11675'], ['R01172', 'R01173'], ['R03544', 'R03545']]
['C00026', 'C00024', 'C

In [47]:
reactions = [['R02772'], ['R00238', 'R09527'], ['R01975', 'R01976'], ['R03026'], ['R01171', 'R01175', 'R09738', 'R11675'], ['R01172', 'R01173'], ['R03544', 'R03545']]

RXN_NAMES = []
RXN_EQS = []
for r in reactions:
    for r_i in r:
        rxn_eq = data.reactions[r_i].equation
        # Replace <=> with --> 
        rxn_eq = rxn_eq.replace('<=>', '-->')
        RXN_NAMES.append(r_i)
        RXN_EQS.append(rxn_eq)

# Create a dataframe with the reactions
df = pd.DataFrame({'Reaction': RXN_NAMES, 'Equation': RXN_EQS})
df

Unnamed: 0,Reaction,Equation
0,R02772,C06018 + C00026 + C00010 --> C11907 + C00025 +...
1,R00238,2 C00024 --> C00010 + C00332
2,R09527,C00024 + C00083 --> C00332 + C00010 + C00011
3,R01975,C01144 + C00003 --> C00332 + C00004 + C00080
4,R01976,C01144 + C00006 --> C00332 + C00005 + C00080
5,R03026,C01144 --> C00877 + C00001
6,R01171,C00136 + C00003 --> C00877 + C00004 + C00080
7,R01175,C00136 + C00016 --> C01352 + C00877
8,R09738,C00136 + C00006 --> C00877 + C00005 + C00080
9,R11675,C00136 + 2 C00003 + 2 C00138 --> C00877 + 2 C0...


In [66]:
data.compounds['C00010'].name

'["CoA", "Coenzyme A", "CoA-SH"]'

In [69]:
ecoli.metabolites.coa_c

0,1
Metabolite identifier,coa_c
Name,Coenzyme A
Memory address,0x7fbcab9fe260
Formula,C21H32N7O16P3S
Compartment,c
In 9 reaction(s),"PFL, MALS, ACALD, SUCOAS, Biomass_Ecoli_core, AKGDH, CS, PTAr, PDH"


### Find pathways for all source compounds

In [None]:
for src in src_list[:11]:
    pathway.initialize(src, butanol_KEGG, graph)
    pathway.get_pathway()
    pathway.print_pathway(with_reactions=True, to_bigg=True)    
    print('--------------------------------------')    