In [1]:
import cobra
import csv
from json2xml import json2xml
from json2xml.utils import readfromjson
import csv
import os
import pandas as pd
from pandas import DataFrame

We will use MetaNetX xref tables for converting ModelSEED compounds/reactions into BiGG nomenclature. The tables have been downloaded from:
https://www.metanetx.org/mnxdoc/mnxref.html

In [2]:
# translator for compounds; removing header and those which don't belong to BiGG or ModelSEED
os.system('cat metanetx/chem_xref.tsv | grep "bigg\|seedM" | tail -n +4 > metanetx/grep_chemm.tsv ')

0

In [3]:
# loading the file
compounds_df = pd.read_csv("metanetx/grep_chemm.tsv", sep = "\t", header = None)

In [4]:
compounds_df

Unnamed: 0,0,1,2
0,seedM:M_cpd11416,BIOMASS,secondary/obsolete/fantasy identifier
1,seedM:cpd11416,BIOMASS,Biomass
2,seedM:M_cpd28297,MNXM0,secondary/obsolete/fantasy identifier
3,seedM:cpd28297,MNXM0,UNKNOWN
4,biggM:M_oh1,MNXM02,secondary/obsolete/fantasy identifier
...,...,...,...
86202,seedM:cpd33563,MNXM99992,"(9Z,11E)-tetradeca-9,11-dien-1-yl acetate"
86203,biggM:M_h2o,WATER,secondary/obsolete/fantasy identifier
86204,biggM:h2o,WATER,H2O
86205,seedM:M_cpd00001,WATER,secondary/obsolete/fantasy identifier


In [5]:
# removing obsolete IDs
notobsolete = compounds_df[2] != 'secondary/obsolete/fantasy identifier'
filtered_compounds_df = compounds_df[notobsolete]
filtered_compounds_df.head()

Unnamed: 0,0,1,2
1,seedM:cpd11416,BIOMASS,Biomass
3,seedM:cpd28297,MNXM0,UNKNOWN
5,biggM:oh1,MNXM02,Hydroxide ion
7,seedM:cpd15275,MNXM02,hydroxide ion||oh1
9,biggM:h,MNXM1,H+


In [6]:
# removing third column which we don't need
del filtered_compounds_df[2]

In [7]:
filtered_compounds_df.head()

Unnamed: 0,0,1
1,seedM:cpd11416,BIOMASS
3,seedM:cpd28297,MNXM0
5,biggM:oh1,MNXM02
7,seedM:cpd15275,MNXM02
9,biggM:h,MNXM1


In [8]:
# separating the words in the first column
w = list(filtered_compounds_df[0])
# number of metabolites (bigg + seed)
len(w)

43082

In [9]:
l = []
for i in range(len(w)):
    l.append(w[i].split(":"))
compounds = DataFrame(l)
compounds.head()

Unnamed: 0,0,1
0,seedM,cpd11416
1,seedM,cpd28297
2,biggM,oh1
3,seedM,cpd15275
4,biggM,h


In [10]:
# MetaNetX IDs
m = list(filtered_compounds_df[1])
compounds[2] = m
compounds.head()

Unnamed: 0,0,1,2
0,seedM,cpd11416,BIOMASS
1,seedM,cpd28297,MNXM0
2,biggM,oh1,MNXM02
3,seedM,cpd15275,MNXM02
4,biggM,h,MNXM1


Now that we have a purged table we can use it for translating metabolites.

In [22]:
seed = compounds[compounds[0] == "seedM"]
seed

Unnamed: 0,0,1,2
0,seedM,cpd11416,BIOMASS
1,seedM,cpd28297,MNXM0
3,seedM,cpd15275,MNXM02
5,seedM,cpd00067,MNXM1
7,seedM,cpd00004,MNXM10
...,...,...,...
43076,seedM,cpd28907,MNXM99969
43077,seedM,cpd23431,MNXM9997
43078,seedM,cpd25878,MNXM9999
43079,seedM,cpd33563,MNXM99992


In [23]:
bigg = compounds[compounds[0] == "biggM"]
bigg

Unnamed: 0,0,1,2
2,biggM,oh1,MNXM02
4,biggM,h,MNXM1
6,biggM,nadh,MNXM10
8,biggM,grdp,MNXM100
17,biggM,prostgf2,MNXM1001
...,...,...,...
42981,biggM,3htmelys,MNXM990
43001,biggM,4mptnl,MNXM992
43011,biggM,3AStrmyn,MNXM9930
43013,biggM,35cdamp,MNXM9931


In [24]:
del bigg[0]
del seed[0] 

In [34]:
bigg = bigg.reset_index(drop = True)

In [35]:
seed = seed.reset_index(drop = True)

In [61]:
# keys -> bigg; values -> seed
C = {}

In [62]:
for i in range(len(bigg)):
    m_id = bigg[2][i]
    b_id = bigg[1][i]
    try:
        s_id = seed[seed[2] == m_id][1].values[0]
    except IndexError:
        continue
    C[b_id] = s_id

In [67]:
C_df = DataFrame(data = {'BiGG': C.keys(), 'SEED': C.values()})

In [68]:
C_df

Unnamed: 0,BiGG,SEED
0,oh1,cpd15275
1,h,cpd00067
2,nadh,cpd00004
3,grdp,cpd00283
4,prostgf2,cpd00488
...,...,...
2630,3htmelys,cpd00923
2631,4mptnl,cpd01585
2632,3AStrmyn,cpd02250
2633,35cdamp,cpd00713


In [69]:
# Saving table
C_df.to_csv(r'compounds.tsv', header = True, index = False, sep = "\t")

In [81]:
# translator for reactions
os.system('cat metanetx/reac_xref.tsv | grep "bigg\|seedM" | tail -n +4 > metanetx/grep_reac.tsv ')

0

In [82]:
reactions_df = pd.read_csv("metanetx/grep_reac.tsv", sep = "\t", header = None)

In [83]:
reactions_df[:20]

Unnamed: 0,0,1,2
0,biggR:CRBNTD,EMPTY,H2CO3 dissociation||1 biggM:h2co3@biggC:x = 1 ...
1,biggR:H2CO3D2,EMPTY,Carboxylic acid dissociation||1 biggM:h2co3@bi...
2,biggR:H2CO3D2m,EMPTY,"Carboxylic acid dissociation, mitochondrial||1..."
3,biggR:HMR_5409,EMPTY,HMR 5409||1 biggM:h@biggC:e + 1 biggM:hco3@big...
4,biggR:RE2594C,EMPTY,RE2594||1 biggM:h@biggC:c + 1 biggM:CE2949@big...
5,biggR:R_CRBNTD,EMPTY,secondary/obsolete/fantasy identifier
6,biggR:R_H2CO3D2,EMPTY,secondary/obsolete/fantasy identifier
7,biggR:R_H2CO3D2m,EMPTY,secondary/obsolete/fantasy identifier
8,biggR:R_HMR_5409,EMPTY,secondary/obsolete/fantasy identifier
9,biggR:R_RE2594C,EMPTY,secondary/obsolete/fantasy identifier


In [84]:
# Let's remove obsolete IDs
notobsolete = reactions_df[2] != 'secondary/obsolete/fantasy identifier'
notob_reactions_df = reactions_df[notobsolete]

In [85]:
notob_reactions_df[:10]

Unnamed: 0,0,1,2
0,biggR:CRBNTD,EMPTY,H2CO3 dissociation||1 biggM:h2co3@biggC:x = 1 ...
1,biggR:H2CO3D2,EMPTY,Carboxylic acid dissociation||1 biggM:h2co3@bi...
2,biggR:H2CO3D2m,EMPTY,"Carboxylic acid dissociation, mitochondrial||1..."
3,biggR:HMR_5409,EMPTY,HMR 5409||1 biggM:h@biggC:e + 1 biggM:hco3@big...
4,biggR:RE2594C,EMPTY,RE2594||1 biggM:h@biggC:c + 1 biggM:CE2949@big...
10,seedR:rxn22163,EMPTY,RXN0-5219.c||1 seedM:cpd00013@seedC:0 <=> 1 se...
11,seedR:rxn40958,EMPTY,rxn40958||1 seedM:cpd05262@seedC:0 <=> 1 seedM...
12,seedR:rxn42499,EMPTY,rxn42499||1 seedM:cpd00067@seedC:0 + 1 seedM:c...
13,seedR:rxn48508,EMPTY,rxn48508||1 seedM:cpd19028@seedC:0 <=> 1 seedM...
14,biggR:EX_h_e,MNXR02,H+ exchange||1 biggM:h@BOUNDARY = 1 biggM:h@bi...


In [86]:
# Now we will remove rows with "EMPTY" in the second column
notempty = reactions_df[1] != 'EMPTY'
filtered_reactions_df = notob_reactions_df[notempty]
filtered_reactions_df[:10]

  filtered_reactions_df = notob_reactions_df[notempty]


Unnamed: 0,0,1,2
14,biggR:EX_h_e,MNXR02,H+ exchange||1 biggM:h@BOUNDARY = 1 biggM:h@bi...
16,biggR:HMR_1095,MNXR03,HMR 1095||1 biggM:h@biggC:n = 1 biggM:h@biggC:c
17,biggR:Ht,MNXR03,Proton diffusion||1 biggM:h@biggC:c = 1 biggM:...
18,biggR:Htcx,MNXR03,Proton transport to carboxysome via diffusion|...
19,biggR:Htex,MNXR03,Proton transport via diffusion (extracellular ...
20,biggR:Htg,MNXR03,"Proton transport, Golgi||1 biggM:h@biggC:c = 1..."
21,biggR:Hth,MNXR03,"H transporter,by diffusiontoapicoplast||1 bigg..."
22,biggR:Htl,MNXR03,Htl||1 biggM:h@biggC:c = 1 biggM:h@biggC:l
23,biggR:Htm,MNXR03,Uncoupling protein||1 biggM:h@biggC:m = 1 bigg...
24,biggR:Htm_cho,MNXR03,Uncoupling protein||1 biggM:h@biggC:m = 1 bigg...


In [87]:
# same as compounds
del filtered_reactions_df[2]

In [88]:
w = list(filtered_reactions_df[0])
len(w)

72016

In [89]:
l = []
for i in range(len(w)):
    l.append(w[i].split(":"))
reactions = DataFrame(l)
reactions.head()

Unnamed: 0,0,1
0,biggR,EX_h_e
1,biggR,HMR_1095
2,biggR,Ht
3,biggR,Htcx
4,biggR,Htex


In [90]:
m = list(filtered_reactions_df[1])
reactions[2] = m
reactions.head()

Unnamed: 0,0,1,2
0,biggR,EX_h_e,MNXR02
1,biggR,HMR_1095,MNXR03
2,biggR,Ht,MNXR03
3,biggR,Htcx,MNXR03
4,biggR,Htex,MNXR03


In [91]:
bigg = reactions[reactions[0] == "biggR"]
bigg

Unnamed: 0,0,1,2
0,biggR,EX_h_e,MNXR02
1,biggR,HMR_1095,MNXR03
2,biggR,Ht,MNXR03
3,biggR,Htcx,MNXR03
4,biggR,Htex,MNXR03
...,...,...,...
72011,biggR,GALNACT1g,MNXR99995
72012,biggR,GALNACT1g_cho,MNXR99996
72013,biggR,GALNACT2g,MNXR99997
72014,biggR,GALNACT3g,MNXR99998


In [92]:
seed = reactions[reactions[0] == "seedR"]
seed

Unnamed: 0,0,1,2
16,seedR,rxn08730,MNXR03
17,seedR,rxn11009,MNXR03
18,seedR,rxn13646,MNXR03
19,seedR,rxn22797,MNXR03
20,seedR,rxn26298,MNXR03
...,...,...,...
71949,seedR,rxn02173,MNXR99957
71951,seedR,rxn08576,MNXR99958
71954,seedR,rxn08577,MNXR99959
71957,seedR,rxn08579,MNXR99961


In [93]:
del bigg[0]
del seed[0]

In [94]:
bigg = bigg.reset_index(drop = True)
seed = seed.reset_index(drop = True)

In [95]:
R = {}

In [96]:
for i in range(len(bigg)):
    m_id = bigg[2][i]
    b_id = bigg[1][i]
    try:
        s_id = seed[seed[2] == m_id][1].values[0]
    except IndexError:
        continue
    R[b_id] = s_id

In [97]:
R

{'HMR_1095': 'rxn08730',
 'Ht': 'rxn08730',
 'Htcx': 'rxn08730',
 'Htex': 'rxn08730',
 'Htg': 'rxn08730',
 'Hth': 'rxn08730',
 'Htl': 'rxn08730',
 'Htm': 'rxn08730',
 'Htm_cho': 'rxn08730',
 'Htmi': 'rxn08730',
 'Htr': 'rxn08730',
 'Hts': 'rxn08730',
 'Htu': 'rxn08730',
 'Htx': 'rxn08730',
 'PCXHtpp': 'rxn08730',
 'GALTpts': 'rxn05567',
 'GALTptspp': 'rxn05567',
 'EX_galt_e': 'rxn08587',
 'GALTt': 'rxn08587',
 'GALTtex': 'rxn08587',
 'GALh': 'rxn00187',
 'GALm': 'rxn00187',
 'GLNS': 'rxn00187',
 'GAM': 'rxn00189',
 'GLUN': 'rxn00189',
 'GLUNm': 'rxn00189',
 'GLUNpp': 'rxn00189',
 'EX_gam6p_e': 'rxn05568',
 'GAM6Pt': 'rxn05568',
 'GAMAN6Ptex': 'rxn05568',
 'GAM6Pt6_2pp': 'rxn08590',
 'GAMpts': 'rxn05569',
 'GAMptspp': 'rxn05569',
 'EX_gam_e': 'rxn08593',
 'GAMt1r': 'rxn08593',
 'GAMtex': 'rxn08593',
 'GART': 'rxn04783',
 'EX_ga_e': 'rxn12797',
 'GAt1': 'rxn12797',
 'GAt2pp': 'rxn12798',
 'GCALDD': 'rxn00979',
 'GCALDDm': 'rxn00979',
 'EX_gcald_e': 'rxn09680',
 'GCALDt': 'rxn09680',
 'GC

In [98]:
R_df = DataFrame(data = {'BiGG': R.keys(), 'SEED': R.values()})
R_df

Unnamed: 0,BiGG,SEED
0,HMR_1095,rxn08730
1,Ht,rxn08730
2,Htcx,rxn08730
3,Htex,rxn08730
4,Htg,rxn08730
...,...,...
7154,EX_galctn__L_e,rxn08577
7155,GALCTNLtex,rxn08577
7156,EX_galctn__D_e,rxn08579
7157,GALCTNtex,rxn08579


In [99]:
R_df.to_csv(r'reactions.tsv', header = True, index = False, sep = "\t")