# Merge models

The model was improved in terms of stoichometric, mass and change balance, but it is lacking the previous annotations.

In [1]:
from pathlib import Path

import cobra

In [2]:
ROOT = Path.cwd().parent
model_file = str(ROOT / "iMN22HE.xml")
new_model_file = str(ROOT / "M.nitroreducensGEM.xml")

In [4]:
# model at HEAD
model = cobra.io.read_sbml_model(model_file)
# 100% consistency new model
new_model = cobra.io.read_sbml_model(new_model_file)

The `new_model` is at the starting point (first commit) annotation-wise.

In [5]:
{k for reac in new_model.reactions for k in reac.annotation}

set()

In [6]:
{k for gene in new_model.genes for k in gene.annotation}

set()

In [7]:
{k for metabolite in new_model.metabolites for k in metabolite.annotation}

{'kegg.compound'}

### Check the identifiers between the two models

In [9]:
len(model.reactions) == len(new_model.reactions)

True

Let's check if the identifiers are the same, with the symmetric difference.

In [10]:
{r.id for r in model.reactions} ^ {r.id for r in new_model.reactions}

{'ASPO2',
 'ASPO2x',
 'GHMT2',
 'GHMT2r',
 'GTPCI',
 'PEP_guanylytransferase',
 'R00424',
 'R12646'}

In [13]:
reac_ids_to_change = {r.id for r in model.reactions} - {r.id for r in new_model.reactions}

In [14]:
reac_ids_new = {r.id for r in new_model.reactions} - {r.id for r in model.reactions}

Four identifiers were changed in the reactions.

In [15]:
reac_renamed = {k: v for k, v in zip(reac_ids_to_change, reac_ids_new)}

In [16]:
reac_renamed

{'ASPO2x': 'ASPO2',
 'R00424': 'GTPCI',
 'PEP_guanylytransferase': 'GHMT2r',
 'GHMT2': 'R12646'}

First, change them in the previous model, to facilitate the annotation.

In [19]:
for new_id, reac_id in reac_renamed.items():
    model.reactions.get_by_id(new_id).id = reac_id

Same with genes and metabolites.

In [20]:
print(f"ΔGenes -> { {g.id for g in model.genes} ^ {g.id for g in new_model.genes} }")
print(f"ΔMets -> { {m.id for m in model.metabolites} ^ {m.id for m in new_model.metabolites} }")

ΔGenes -> set()
ΔMets -> set()


### Apply annotation from master to new model

Both are fine.

In [21]:
for reac in model.reactions:
    new_reac = new_model.reactions.get_by_id(reac.id)
    new_reac.annotation = reac.annotation
for gene in model.genes:
    new_gene = new_model.genes.get_by_id(gene.id)
    new_gene.annotation = gene.annotation
for metabolite in model.metabolites:
    new_metabolite = new_model.metabolites.get_by_id(metabolite.id)
    new_metabolite.annotation = metabolite.annotation

In [30]:
print(f"Reac annotations -> { {k for reac in new_model.reactions for k in reac.annotation} }")
print(f"Gene annotations -> { {k for gene in new_model.genes for k in gene.annotation} }")
print(f"Met annotations -> { {k for met in new_model.metabolites for k in met.annotation} }")

Reac annotations -> {'rhea', 'kegg.reaction', 'rheaR', 'keggR', 'ec-code', 'mnx', 'metacycR', 'sabiorkR', 'metanetx.reaction', 'bigg.reaction', 'seed.reaction', 'sbo', 'seedR', 'biggR', 'metacyc.reaction', 'sabiork.reaction'}
Gene annotations -> {'symbol', 'locus_tag', 'sbo', 'ncbiprotein', 'uniprot'}
Met annotations -> {'keggD', 'keggC', 'bigg.metabolite', 'lipidmapsM', 'sbo', 'metacycM', 'hmdb', 'reactome', 'lipidmaps', 'seed.compound', 'seedM', 'metanetx.chemical', 'slm', 'sabiork.compound', 'rheaG', 'reactomeM', 'keggG', 'kegg.compound', 'envipath', 'chebi', 'kegg.drug', 'kegg.glycan', 'metacyc.compound', 'sabiorkM', 'biggM', 'envipathM'}


### Annotate subsystems

While we're at it, we can give the reactions subsystems from the spreadsheet.

In [23]:
import pandas as pd

In [26]:
xls_model = pd.read_excel(ROOT / "M.nitroreducensGEM.xlsx")

In [28]:
xls_model.head()

Unnamed: 0,Abbreviation,Description,Reaction,EC-Number,GPR,Gene,Subsystem,Reversible,Lower bound,Upper bound,Objective
0,ALAD_L,L-Alanine:NAD+ oxidoreductase (deaminating)_c0,ala-L[c] + nad[c] + h2o[c] <=> pyr[c] + nh4[c]...,EC-1.4.1.1,ANME2D_RS14405,ANME2D_RS14405,Alanine and Aspartate Metabolism,1,-1000,1000,0
1,ASPTA,aspartate transaminase,glu-L[c] + oaa[c] <=> akg[c] + asp-L[c],EC-2.6.1.1,ANME2D_RS08380 or ANME2D_RS05600 or ANME2D_RS0...,ANME2D_RS08380 ANME2D_RS05600 ANME2D_RS078...,Alanine and Aspartate Metabolism,1,-1000,1000,0
2,ARGSS,argininosuccinate synthase,asp-L[c] + atp[c] + citr-L[c] <=> amp[c] + arg...,EC-6.3.4.5,ANME2D_RS05565,ANME2D_RS05565,Alanine and Aspartate Metabolism,1,-1000,1000,0
3,ARGSL,argininosuccinate lyase,argsuc[c] <=> arg-L[c] + fum[c],EC-4.3.2.1,ANME2D_RS01360,ANME2D_RS01360,Alanine and Aspartate Metabolism,1,-1000,1000,0
4,ADSS,adenylosuccinate synthase,asp-L[c] + gtp[c] + imp[c] <=> dcamp[c] + gdp[...,EC-6.3.4.4,ANME2D_RS03615,ANME2D_RS03615,Alanine and Aspartate Metabolism,1,-1000,1000,0


In [45]:
for reac in new_model.reactions:
    matched = xls_model.loc[xls_model.Abbreviation == reac.id, ["EC-Number", "Subsystem"]]
    if len(matched):
        reac.annotation["ec-code"] = matched.iloc[0, 0]
        reac.subsystem = matched.iloc[0, 1]

Write the merged model.

In [48]:
cobra.io.write_sbml_model(new_model, model_file)