In [5]:
import mackinac
from Gapfilling import *
import pandas as pd
import re
import gurobipy
from contextlib import contextmanager

In [3]:
mackinac.modelseed.ms_client.url = 'https://p3.theseed.org/services/ProbModelSEED/'
mackinac.workspace.ws_client.url = 'https://p3.theseed.org/services/Workspace'
mackinac.genome.patric_url = 'https://www.patricbrc.org/api/'

In [4]:
# password: ASafSLUqQc@7zyP
mackinac.get_token("fcomnozz")

patric password: ········


'fcomnozz@patricbrc.org'

In [6]:
# Function for avoiding stdout
@contextmanager
def suppress_stdout():
    with open(os.devnull, "w") as devnull:
        old_stdout = sys.stdout
        sys.stdout = devnull
        try:  
            yield
        finally:
            sys.stdout = old_stdout

In [7]:
def get_models(ID_list):
    """
    This function receives a list with PATRIC IDs, reconstructs those models and converts them into COBRA models.
    It returns a dictionary where keys are model IDs and values are the models.
    """
    # Reconstructing models; this step will likely take a long time
    for ID in ID_list:
        # RAST can be used too (source = "rast)
        mackinac.reconstruct_modelseed_model(ID)
    # Creating COBRA models
    d = {}
    for ID in ID_list:
        d[ID] = mackinac.create_cobra_model_from_modelseed_model(model_id=ID)
    return d

In [8]:
IDs = ['1428.847', '451708.12']

In [9]:
D = get_models(IDs)

  warn('Model for genome ID {0} has no genes, verify genome ID is valid'.format(genome_id))


Using license file /home/fco/gurobi.lic
Academic license - for non-commercial use only - expires 2021-06-01


In [10]:
D

{'1428.847': <Model 1428.847 at 0x7fbb2d527ca0>,
 '451708.12': <Model 451708.12 at 0x7fba83dc4730>}

In [11]:
def modelseed_gapfilling(model_dictionary):
    """
    Performs ModelSEED gapfilling against SEED reaction database.
    """
    for x in model_dictionary.values():
        with suppress_stdout():
            mackinac.gapfill_modelseed_model(x)

In [12]:
modelseed_gapfilling(model_dictionary=D)

In [14]:
def seed_to_bigg(model, m_table, r_table):
    """
    Translates metabolites and reactions of the model to BiGG nomenclature.
    """
    new_model = cobra.Model(str(model) + " (translated)")
    # metabolites
    for metabolite in model.metabolites:
        m = metabolite.copy()
        sid = m.id
        # assumption -> nx_c; maybe will need to change it
        c = sid[-2:]
        sid = sid[:-2]
        try:
            bid = m_table[m_table["SEED"] == sid]["BiGG"].values[0]
        except IndexError:
            new_model.add_metabolites(m)
            continue
        ID = str(bid)+str(c)
        m.id = ID
        new_model.add_metabolites(m)
    # reactions
    for reaction in model.reactions:
        r = reaction.copy()
        sid = r.id
        # We will use metabolites table for exchange reactions
        # ID
        if sid.startswith('EX_'):
            c = sid[-2:]
            sid = sid[3:-2]
            try:
                bid = m_table[m_table["SEED"] == sid]["BiGG"].values[0]
                bid = "EX_" + str(bid) + str(c)
                r.id = bid
            except IndexError:
                pass
        else:
            c = sid[-2:]
            sid = sid[:-2]
            try:
                bid = r_table[r_table["SEED"] == sid]["BiGG"].values[0]
                # BiGG reactions' IDs don't have compartment termination
                r.id = bid
            except IndexError:
                pass
        # METABOLITES WITHIN REACTION
        # metabolite objects
        metab = [x.copy() for x in r.metabolites]
        # SEED IDs
        met_id = [str(x) for x in r.metabolites.keys()]
        # stoichiometric coefficients
        coef = [float(x) for x in r.metabolites.values()]
        # managing compartment terminations
        met_c = [x[-2:] for x in met_id]
        met_id = [x[:-2] for x in met_id]
        # getting BiGG ids
        new_ids = []
        for x in met_id:
            try:
                new_id = m_table[m_table["SEED"] == x]["BiGG"].values[0]
                new_ids.append(new_id)
            except IndexError:
                new_ids.append(x)
        # dict
        m_dict = {}
        # adding compartment terminations
        for i in range(len(new_ids)):
            new_ids[i] = str(new_ids[i]) + str(met_c[i])
            # changing metabolites' id
            metab[i].id = new_ids[i]
            # making the dict
            m_dict[metab[i]] = coef[i]
        # removing old metabolites
        r.subtract_metabolites(r.metabolites)
        # adding translated metabolites
        r.add_metabolites(m_dict)
        # ADDING REACTION
        new_model.add_reaction(r)
    return new_model


In [16]:
n = {}
for i in D:
    n[i] = seed_to_bigg(D[i], m_table = Metabolites, r_table = Reactions)
n

Ignoring reaction 'EX_h2o_e' since it already exists.
Ignoring reaction 'EX_o2_e' since it already exists.
Ignoring reaction 'EX_h2o_e' since it already exists.
Ignoring reaction 'EX_o2_e' since it already exists.


{'1428.847': <Model 1428.847 (translated) at 0x7fba838fab80>,
 '451708.12': <Model 451708.12 (translated) at 0x7fba83950be0>}

In [4]:
seed_model = mackinac.create_cobra_model_from_modelseed_model(model_id="226186.12")

Using license file /home/fco/gurobi.lic
Academic license - for non-commercial use only - expires 2021-06-01


In [7]:
seed_model.optimize().objective_value

185.15224240824574

In [10]:
mackinac.gapfill_modelseed_model(seed_model)

{'fba_count': 0,
 'gapfilled_reactions': 0,
 'gene_associated_reactions': 967,
 'genome_ref': '/fcomnozz@patricbrc.org/modelseed/226186.12/genome',
 'id': '226186.12',
 'integrated_gapfills': 3,
 'name': 'Bacteroides thetaiotaomicron VPI-5482',
 'num_biomass_compounds': 85,
 'num_biomasses': 1,
 'num_compartments': 2,
 'num_compounds': 1213,
 'num_genes': 704,
 'num_reactions': 1086,
 'ref': '/fcomnozz@patricbrc.org/modelseed/226186.12',
 'rundate': '2021-04-12T12:12:47Z',
 'source': 'ModelSEED',
 'source_id': '226186.12',
 'template_ref': '/chenry/public/modelsupport/templates/GramNegModelTemplate',
 'type': 'GenomeScale',
 'unintegrated_gapfills': 0}

In [14]:
d = {1: 'a', 2: 'b'}
for i in d.keys():
    print(d[i])
    

a
b


In [9]:
seed_model.optimize().objective_value

185.15224240824574

In [166]:
d = {}
name  = "model_" + "21232.34".replace(".","_")
d[name] = 35
d

{'model_21232_34': 35}

In [5]:
seed_model.reactions.rxn00011_c

0,1
Reaction identifier,rxn00011_c
Name,pyruvate:thiamin diphosphate acetaldehydetransferase (decarboxylating)
Memory address,0x07f5e59aa2fd0
Stoichiometry,cpd00020_c + cpd00056_c + cpd00067_c --> cpd00011_c + cpd03049_c  Pyruvate_c + TPP_c + H+_c --> CO2_c + 2-Hydroxyethyl-ThPP_c
GPR,( 226186.12.peg.2137 and 226186.12.peg.2138 )
Lower bound,0.0
Upper bound,1000.0


In [6]:
# this function can be useful for extracting the name of organism
mackinac.list_modelseed_models(print_output=True)

Model /fcomnozz@patricbrc.org/modelseed/226186.12 for organism Bacteroides thetaiotaomicron VPI-5482 with 967 reactions and 1142 metabolites


In [7]:
# MODEL TRANSLATION TESTING
# reference
bigg_model = read_sbml_model("BiGG_files/iJN746.xml")

In [8]:
bigg_model.reactions.ACS

0,1
Reaction identifier,ACS
Name,Acetyl-CoA synthetase
Memory address,0x07f5dd15701c0
Stoichiometry,ac_c + atp_c + coa_c <=> accoa_c + amp_c + ppi_c  Acetate + ATP C10H12N5O13P3 + Coenzyme A <=> Acetyl-CoA + AMP C10H12N5O7P + Diphosphate
GPR,PP_4487 or PP_4702
Lower bound,-999999.0
Upper bound,999999.0


In [15]:
# loading translation tables
Reactions = pd.read_csv("reactions.tsv", sep = "\t")
Metabolites = pd.read_csv("compounds.tsv", sep = "\t")

In [10]:
Metabolites[Metabolites["BiGG"]=='atp']

Unnamed: 0,BiGG,SEED
880,atp,cpd00002


In [11]:
# Metabolites' table does not consider compartments so we must add them
new_model = cobra.Model("test")

In [12]:
seed_model.metabolites.cpd00001_c

0,1
Metabolite identifier,cpd00001_c
Name,H2O_c
Memory address,0x07f5dd14ae6d0
Formula,H2O
Compartment,c
In 315 reaction(s),"rxn12639_c, rxn12634_c, rxn02418_c, rxn06823_c, rxn05319_c, rxn00902_c, rxn00503_c, rxn03002_c, rxn00816_c, rxn12643_c, rxn00109_c, rxn03842_c, rxn00927_c, rxn02090_c, rxn00278_c, rxn00743_c, rxn07..."


In [13]:
m = seed_model.metabolites.cpd00001_c.copy()

In [14]:
ID = m.id
ID

'cpd00001_c'

In [15]:
c = ID[-2:]
ID = ID[:-2]
print(c, ID)

_c cpd00001


In [16]:
ID2 = Metabolites[Metabolites['SEED'] == ID]["BiGG"]
ID2

2634    h2o
Name: BiGG, dtype: object

In [17]:
if 'cpd00001' in Metabolites['SEED']:
    print("yes")

In [18]:
ID2.values[0]

'h2o'

In [19]:
meta = str(ID2.values[0])+str(c)
meta

'h2o_c'

In [20]:
m.id = meta

In [21]:
new_model.add_metabolites(m)
new_model

0,1
Name,test
Memory address,0x07f5e51213d30
Number of metabolites,1
Number of reactions,0
Number of groups,0
Objective expression,0
Compartments,c


In [160]:
def seed_to_bigg(model, m_table, r_table):
    new_model = cobra.Model(str(model) + " (translated)")
    # metabolites
    for metabolite in model.metabolites:
        m = metabolite.copy()
        sid = m.id
        # assumption -> nx_c; maybe will need to change it
        c = sid[-2:]
        sid = sid[:-2]
        try:
            bid = m_table[m_table["SEED"] == sid]["BiGG"].values[0]
        except IndexError:
            new_model.add_metabolites(m)
            continue
        ID = str(bid)+str(c)
        m.id = ID
        new_model.add_metabolites(m)
    # reactions
    for reaction in model.reactions:
        r = reaction.copy()
        sid = r.id
        # We will use metabolites table for exchange reactions
        # ID
        if sid.startswith('EX_'):
            c = sid[-2:]
            sid = sid[3:-2]
            try:
                bid = m_table[m_table["SEED"] == sid]["BiGG"].values[0]
                bid = "EX_" + str(bid) + str(c)
                r.id = bid
            except IndexError:
                pass
        else:
            c = sid[-2:]
            sid = sid[:-2]
            try:
                bid = r_table[r_table["SEED"] == sid]["BiGG"].values[0]
                # BiGG reactions' IDs don't have compartment termination
                r.id = bid
            except IndexError:
                pass
        # METABOLITES WITHIN REACTION
        # metabolite objects
        metab = [x.copy() for x in r.metabolites]
        # SEED IDs
        met_id = [str(x) for x in r.metabolites.keys()]
        # stoichiometric coefficients
        coef = [float(x) for x in r.metabolites.values()]
        # managing compartment terminations
        met_c = [x[-2:] for x in met_id]
        met_id = [x[:-2] for x in met_id]
        # getting BiGG ids
        new_ids = []
        for x in met_id:
            try:
                new_id = m_table[m_table["SEED"] == x]["BiGG"].values[0]
                new_ids.append(new_id)
            except IndexError:
                new_ids.append(x)
        # dict
        m_dict = {}
        # adding compartment terminations
        for i in range(len(new_ids)):
            new_ids[i] = str(new_ids[i]) + str(met_c[i])
            # changing metabolites' id
            metab[i].id = new_ids[i]
            # making the dict
            m_dict[metab[i]] = coef[i]
        # removing old metabolites
        r.subtract_metabolites(r.metabolites)
        # adding translated metabolites
        r.add_metabolites(m_dict)
        # ADDING REACTION
        new_model.add_reaction(r)
    return new_model

In [158]:
model = seed_to_bigg(seed_model, m_table=Metabolites, r_table=Reactions)

Ignoring reaction 'EX_h2o_e' since it already exists.
Ignoring reaction 'EX_nh4_e' since it already exists.
Ignoring reaction 'EX_mg2_e' since it already exists.
Ignoring reaction 'EX_o2_e' since it already exists.


In [23]:
# REACTIONS
seed_model.reactions.rxn00011_c

0,1
Reaction identifier,rxn00011_c
Name,pyruvate:thiamin diphosphate acetaldehydetransferase (decarboxylating)
Memory address,0x07f5e59aa2fd0
Stoichiometry,cpd00020_c + cpd00056_c + cpd00067_c --> cpd00011_c + cpd03049_c  Pyruvate_c + TPP_c + H+_c --> CO2_c + 2-Hydroxyethyl-ThPP_c
GPR,( 226186.12.peg.2137 and 226186.12.peg.2138 )
Lower bound,0.0
Upper bound,1000.0


BiGG reactions IDs don't usually have a compartment termination, except for exchange reactions. We must take that into account.

In [26]:
new_model = cobra.Model('test')

In [27]:
r = seed_model.reactions.rxn00011_c

In [28]:
ID = r.id
c = ID[-2:]
ID = ID[:-2]
print(ID,c)

rxn00011 _c


In [29]:
ID2 = Reactions[Reactions['SEED'] == ID]['BiGG']

In [31]:
ID2

1833       ACLSa
1834    PDHam1hi
1835     PDHam1m
1836    PDHam1mi
Name: BiGG, dtype: object

In [30]:
ID2.values[0]

'ACLSa'

In [32]:
# if there are several names the first one will be chosen 

In [108]:
# there's no EX_ reactions in our translation table; we'll try with metabolites table
r = seed_model.reactions.EX_cpd00159_e
r

0,1
Reaction identifier,EX_cpd00159_e
Name,L-Lactate_e exchange
Memory address,0x07f5df7710c10
Stoichiometry,cpd00159_e <=> L-Lactate_e <=>
GPR,
Lower bound,-1000.0
Upper bound,1000.0


In [55]:
ID = r.id
c = ID[-2:]
ID = ID[3:-2]
ID

'cpd00159'

In [57]:
ID2 = Metabolites[Metabolites["SEED"] == ID]["BiGG"].values[0]
ID2

'lac__L'

In [58]:
ID2 = 'EX_' + str(ID2) + str(c)
ID2

'EX_lac__L_e'

In [133]:
r = seed_model.reactions.rxn00011_c.copy()
r

0,1
Reaction identifier,rxn00011_c
Name,pyruvate:thiamin diphosphate acetaldehydetransferase (decarboxylating)
Memory address,0x07f5df772eac0
Stoichiometry,cpd00020_c + cpd00056_c + cpd00067_c --> cpd00011_c + cpd03049_c  Pyruvate_c + TPP_c + H+_c --> CO2_c + 2-Hydroxyethyl-ThPP_c
GPR,( 226186.12.peg.2137 and 226186.12.peg.2138 )
Lower bound,0.0
Upper bound,1000.0


In [134]:
# metabolite objects
metab = [x.copy() for x in r.metabolites]
# SEED IDs
met_id = [str(x) for x in r.metabolites.keys()]
# stoichiometric coefficients
coef = [float(x) for x in r.metabolites.values()]
# managing compartment terminations
met_c = [x[-2:] for x in met_id]
met_id = [x[:-2] for x in met_id]
print(metab, met_id, met_c, coef)

[<Metabolite cpd00011_c at 0x7f5df6deafd0>, <Metabolite cpd03049_c at 0x7f5df7fe62b0>, <Metabolite cpd00020_c at 0x7f5df7fe6940>, <Metabolite cpd00056_c at 0x7f5df7fe66d0>, <Metabolite cpd00067_c at 0x7f5df812e040>] ['cpd00011', 'cpd03049', 'cpd00020', 'cpd00056', 'cpd00067'] ['_c', '_c', '_c', '_c', '_c'] [1.0, 1.0, -1.0, -1.0, -1.0]


In [139]:
# getting BiGG ids
new_ids = []
for x in met_id:
    try:
        new_id = Metabolites[Metabolites["SEED"] == x]["BiGG"].values[0]
        new_ids.append(new_id)
    except IndexError:
        new_ids.append(x)
new_ids

['co2', '2ahethmpp', 'pyr', 'thmpp', 'h']

In [140]:
# dict
m_dict = {}
# adding compartment terminations
for i in range(len(new_ids)):
    new_ids[i] = str(new_ids[i]) + str(met_c[i])
    # changing metabolites' id
    metab[i].id = new_ids[i]
    # making the dict
    m_dict[metab[i]] = coef[i]
m_dict

{<Metabolite co2_c at 0x7f5df6deafd0>: 1.0,
 <Metabolite 2ahethmpp_c at 0x7f5df7fe62b0>: 1.0,
 <Metabolite pyr_c at 0x7f5df7fe6940>: -1.0,
 <Metabolite thmpp_c at 0x7f5df7fe66d0>: -1.0,
 <Metabolite h_c at 0x7f5df812e040>: -1.0}

In [141]:
# removing old metabolites
r.subtract_metabolites(r.metabolites)
r

0,1
Reaction identifier,rxn00011_c
Name,pyruvate:thiamin diphosphate acetaldehydetransferase (decarboxylating)
Memory address,0x07f5df772eac0
Stoichiometry,--> -->
GPR,( 226186.12.peg.2137 and 226186.12.peg.2138 )
Lower bound,0.0
Upper bound,1000.0


In [142]:
# adding metabolites
r.add_metabolites(m_dict)

In [143]:
r

0,1
Reaction identifier,rxn00011_c
Name,pyruvate:thiamin diphosphate acetaldehydetransferase (decarboxylating)
Memory address,0x07f5df772eac0
Stoichiometry,h_c + pyr_c + thmpp_c --> 2ahethmpp_c + co2_c  H+_c + Pyruvate_c + TPP_c --> 2-Hydroxyethyl-ThPP_c + CO2_c
GPR,( 226186.12.peg.2137 and 226186.12.peg.2138 )
Lower bound,0.0
Upper bound,1000.0


In [38]:
# just one time
"""
mackinac.reconstruct_modelseed_model('86668.3')
"""

{'fba_count': 0,
 'gapfilled_reactions': 0,
 'gene_associated_reactions': 1283,
 'genome_ref': '/fcomnozz@patricbrc.org/modelseed/86668.3/genome',
 'id': '86668.3',
 'integrated_gapfills': 0,
 'name': 'Bacillus niacini F8',
 'num_biomass_compounds': 100,
 'num_biomasses': 1,
 'num_compartments': 2,
 'num_compounds': 1430,
 'num_genes': 1003,
 'num_reactions': 1283,
 'ref': '/fcomnozz@patricbrc.org/modelseed/86668.3',
 'rundate': '2021-05-03T10:32:14Z',
 'source': 'ModelSEED',
 'source_id': '86668.3',
 'template_ref': '/chenry/public/modelsupport/templates/GramPosModelTemplate',
 'type': 'GenomeScale',
 'unintegrated_gapfills': 0}