# Load microorganism

In [1]:
# Add path to the model
import sys
sys.path.append("../")
import pandas as pd
import cobra
from microorganism import Microorganism

# Read E. coli model from cobrapy
ecoli = cobra.io.load_json_model("../data/original/e_coli_core.json")

# Create a microorganism object
cobra_model = Microorganism(cobra_model=ecoli)

### Create Metabolic Network

In [2]:
# add folder for imports
import sys
sys.path.append('../')

import pandas as pd
import numpy as np
import json
import networkx as nx
from tqdm import tqdm
from compound import Compound
from reaction import Reaction
from graph import Graph
from data import Data
from pathway import Pathway
from utils import create_compound, create_reaction
# suppres rdkit warnings
import rdkit
from rdkit import RDLogger
from utils import check_list

RDLogger.DisableLog('rdApp.*')


# read data from csv
cpds = pd.read_csv('../data/compounds_final.csv', index_col=0) # containing toxicity
rxns = pd.read_csv('../data/reactions_final.csv', index_col=0)
pairs = pd.read_csv('../data/pairs_final_RPAIRS_pred.csv', index_col=0)
cofactors = pd.read_csv('../data/original/cofactors_KEGG.csv')

# create class instances
data = Data()
graph = Graph(pairs=pairs)
pathway = Pathway()

data = create_compound(data, cpds, cofactors)
data = create_reaction(data, rxns)

# CREATE GRAPH
graph.create_graph(data=data, pairs=pairs)

graph.calculate_edge_mol_weight(data)
graph.calculate_smiles_similarity(data)

# nodes: 7997 
# edges: 11783


100%|██████████| 7997/7997 [00:08<00:00, 892.33it/s]
100%|██████████| 11783/11783 [00:00<00:00, 407176.74it/s]
100%|██████████| 11783/11783 [00:25<00:00, 462.00it/s]


#### Get KEGG source and target

In [3]:
# define target compound
butanol_KEGG = 'C06142'
oxoglatarate_KEGG = 'C00026'

# define source list
src_list = ['C00084', 'C00024', 'C00074', 'C00022', 'C00497', 'C00149', \
            'C00122', 'C00042', 'C00036', 'C00091', 'C00111', 'C00158']

#### Find metabolic pathways from src to trg

Find metabolic pathways from `Acetyl-coA` to `n-Butanol`

In [4]:
pathway.initialize(src_list[1], butanol_KEGG, graph)
pathway.get_pathway()
pathway.print_pathway(with_reactions=True)

Pathway from C00024 to C06142:
['C00024', 'C00332', 'C01144', 'C00877', 'C00136', 'C01412', 'C06142']
[['R00238', 'R09527'], ['R01975', 'R01976'], ['R03026'], ['R01171', 'R01175', 'R09738', 'R11675'], ['R01172', 'R01173'], ['R03544', 'R03545']]
['C00024', 'C00332', 'C03561', 'C00877', 'C00136', 'C01412', 'C06142']
[['R00238', 'R09527'], ['R01977'], ['R03027'], ['R01171', 'R01175', 'R09738', 'R11675'], ['R01172', 'R01173'], ['R03544', 'R03545']]
['C00024', 'C00332', 'C01144', 'C03561', 'C00877', 'C00136', 'C01412', 'C06142']
[['R00238', 'R09527'], ['R01975', 'R01976'], ['R03276'], ['R03027'], ['R01171', 'R01175', 'R09738', 'R11675'], ['R01172', 'R01173'], ['R03544', 'R03545']]
['C00024', 'C00332', 'C05116', 'C00877', 'C00136', 'C01412', 'C06142']
[['R00238', 'R09527'], ['R05576'], ['R05595'], ['R01171', 'R01175', 'R09738', 'R11675'], ['R01172', 'R01173'], ['R03544', 'R03545']]
['C00024', 'C00332', 'C03561', 'C01144', 'C00877', 'C00136', 'C01412', 'C06142']
[['R00238', 'R09527'], ['R0197

#### Get a single pathway for further research

In [85]:
pathway.single_pathway(pathway.pred_paths[pathway.idx_smi[0]])

(['C00024', 'C00332', 'C01144', 'C00877', 'C00136', 'C01412', 'C06142'],
 [['R00238', 'R09527'],
  ['R01975', 'R01976'],
  ['R03026'],
  ['R01171', 'R01175', 'R09738', 'R11675'],
  ['R01172', 'R01173'],
  ['R03544', 'R03545']])

In [92]:
cobra_kegg = cobra_model.metabolites_df['kegg'].tolist()
product_list = []

j = 0
for i, r in enumerate(pathway.path_reactions):
    for r_i in r:

        # Check the part of reaction that exist the src metabolite
        if pathway.path_compound[j] in data.reactions[r_i].compounds[0]:
            products = data.reactions[r_i].compounds[1]
            reactants = data.reactions[r_i].compounds[0]
        elif pathway.path_compound[j] in data.reactions[r_i].compounds[1]:
            products = data.reactions[r_i].compounds[0]
            reactants = data.reactions[r_i].compounds[1]
        else:
            print('Error')
            break
        
        print(f'{i}: Reaction {r_i}, {data.reactions[r_i].equation}')
        print(pathway.path_compound[j])
        print(f'Products: {products}')
        print(f'Reactants: {reactants}')

        b1 = check_list(reactants, cobra_kegg)
        b2 = check_list(reactants, product_list)
        if b1:
            print(f'{r_i} is valid')
        elif b2:
            print(f'{r_i} is valid')
        # else:
        #     print(f'Missing reactants in {r_i}')
        #     print(diff(reactants, cobra_kegg)) 
        product_list.append(products)
        print()
    j+=1

0: Reaction R00238, 2 C00024 <=> C00010 + C00332
C00024
Products: ['C00010', 'C00332']
Reactants: ['C00024']
R00238 is valid

0: Reaction R09527, C00024 + C00083 <=> C00332 + C00010 + C00011
C00024
Products: ['C00332', 'C00010', 'C00011']
Reactants: ['C00024', 'C00083']

1: Reaction R01975, C01144 + C00003 <=> C00332 + C00004 + C00080
C00332
Products: ['C01144', 'C00003']
Reactants: ['C00332', 'C00004', 'C00080']

1: Reaction R01976, C01144 + C00006 <=> C00332 + C00005 + C00080
C00332
Products: ['C01144', 'C00006']
Reactants: ['C00332', 'C00005', 'C00080']

2: Reaction R03026, C01144 <=> C00877 + C00001
C01144
Products: ['C00877', 'C00001']
Reactants: ['C01144']

3: Reaction R01171, C00136 + C00003 <=> C00877 + C00004 + C00080
C00877
Products: ['C00136', 'C00003']
Reactants: ['C00877', 'C00004', 'C00080']

3: Reaction R01175, C00136 + C00016 <=> C01352 + C00877
C00877
Products: ['C00136', 'C00016']
Reactants: ['C01352', 'C00877']

3: Reaction R09738, C00136 + C00006 <=> C00877 + C00005

In [None]:
# In a string map some words to others using a df
def str_map(string, df):
    for i in range(len(df)):
        string = string.replace(df.iloc[i, 1], df.iloc[i, 0])
    return string

s = str_map(eq, cobra_model.metabolites_df)
s

'C06018 + akg_c + coa_c <=> C11907 + glu__L_c + accoa_c'

In [None]:
cobra_model.metabolites_df[cobra_model.metabolites_df['kegg'] == 'C06018']

Unnamed: 0,metabolites,kegg,compartment


### Find pathways for all source compounds

In [None]:
for src in src_list[:11]:
    pathway.initialize(src, butanol_KEGG, graph)
    pathway.get_pathway()
    pathway.print_pathway(with_reactions=True, to_bigg=True)    
    print('--------------------------------------')    