# Gene annotation correction
Fix errors in GSM gene mapping to match transcript notation

In [1]:
import pandas as pd
import cobra


### Load genome scale model

In [2]:
model = cobra.io.json.load_json_model("../genome_scale_models/iYLI647_corr.json")
model

0,1
Name,model
Memory address,10f453700
Number of metabolites,1121
Number of reactions,1348
Number of genes,646
Number of groups,0
Objective expression,1.0*biomass_C - 1.0*biomass_C_reverse_c1d5c
Compartments,"c, e, m, n, x, r, g, v"


### Fix reaction that has a duplicate gene

In [5]:
# fix Diphosphoglyceromutase gene_reaction_rule
print(model.reactions.DPGM.gene_reaction_rule)

# remove duplicate genes
model.reactions.DPGM.gene_reaction_rule = 'YALI0B02728g or YALI0D09229g'

display(model.reactions.DPGM)

YALI0B02728g or (YALI0D09229g and YALI0D09229g)


0,1
Reaction identifier,DPGM
Name,Diphosphoglyceromutase
Memory address,0x13f686fb0
Stoichiometry,13dpg[c] <=> 23dpg[c] + h[c]  3_Phospho_D_glyceroyl_phosphate <=> 2_3_Disphospho_D_glycerate + H
GPR,YALI0B02728g or YALI0D09229g
Lower bound,-1000.0
Upper bound,1000.0


### Remove genes that are not in the transcriptomics data set

In [6]:
# fix cytochrome c oxidase mitochondrial gene_reaction_rule
print(model.reactions.CYOOm.gene_reaction_rule)

# remove misanotated gene (YALI0F04)
model.reactions.CYOOm.gene_reaction_rule = 'YALI0E10144g and YALI0E12628g and YALI0E16709g and YALI0E19723g and YALI0F03201g and YALI0F03567g and YALI0F04103g'

display(model.reactions.CYOOm)

YALI0E10144g and YALI0E12628g and YALI0E16709g and YALI0E19723g and YALI0F03201g and YALI0F03567g and YALI0F04103g and YALI0F04


0,1
Reaction identifier,CYOOm
Name,cytochrome c oxidase mitochondrial
Memory address,0x13f7e9780
Stoichiometry,4.0 focytc[m] + 6.0 h[m] + o2[m] --> 4.0 ficytc[m] + 2.0 h2o[m] + 6.0 h[c]  4.0 Ferrocytochrome_c + 6.0 H + O2 --> 4.0 Ferricytochrome_c + 2.0 H2O + 6.0 H
GPR,YALI0E10144g and YALI0E12628g and YALI0E16709g and YALI0E19723g and YALI0F03201g and YALI0F03567g...
Lower bound,0.0
Upper bound,1000.0


In [7]:
# ubiquinol 6 cytochrome c reductase gene_reaction_rule
print(model.reactions.CYOR_u6m.gene_reaction_rule)

# remove misanotated gene (YALI0F24)
model.reactions.CYOR_u6m.gene_reaction_rule = 'YALI0A02915g and YALI0A17468g and YALI0B01540g and YALI0C12210g and YALI0E34037g and YALI0F01771g and YALI0F08613g'

display(model.reactions.CYOR_u6m)

YALI0A02915g and YALI0A17468g and YALI0B01540g and YALI0C12210g and YALI0E34037g and YALI0F01771g and YALI0F08613g and YALI0F24


0,1
Reaction identifier,CYOR_u6m
Name,ubiquinol 6 cytochrome c reductase
Memory address,0x13f7eb0d0
Stoichiometry,2.0 ficytc[m] + 1.5 h[m] + q6h2[m] --> 2.0 focytc[m] + 1.5 h[c] + q6[m]  2.0 Ferricytochrome_c + 1.5 H + Ubiquinol_6 --> 2.0 Ferrocytochrome_c + 1.5 H + Ubiquinone_6
GPR,YALI0A02915g and YALI0A17468g and YALI0B01540g and YALI0C12210g and YALI0E34037g and YALI0F01771g...
Lower bound,0.0
Upper bound,1000.0


### Fix gene ids that have typos

In [8]:
# fix typo in hexadecanoate transport gene_reaction_rule
print(model.reactions.HCAt.gene_reaction_rule)

# fix typo (YALIB02544g)
model.reactions.HCAt.gene_reaction_rule = 'YALI0C20265g or YALI0B02544g'

display(model.reactions.HCAt)

YALI0C20265g or YALIB02544g


0,1
Reaction identifier,HCAt
Name,Hexanoate n C60 transport in via uniport YL
Memory address,0x13fbae470
Stoichiometry,hdca[e] --> hdca[c]  Hexadecanoate__n_C160_ --> Hexadecanoate__n_C160_
GPR,YALI0C20265g or YALI0B02544g
Lower bound,0.0
Upper bound,1000.0


In [9]:
# fix typo in decanoate transport gene_reaction_rule
print(model.reactions.dca_t.gene_reaction_rule)

# fix typo (YALIB02544g)
model.reactions.dca_t.gene_reaction_rule = 'YALI0C20265g or YALI0B02544g'

display(model.reactions.dca_t)

YALI0C20265g or YALIB02544g


0,1
Reaction identifier,dca_t
Name,decanoate transport in via uniport
Memory address,0x13fbaed40
Stoichiometry,dca[e] --> dca[c]  Decanoate__n_C100_ --> Decanoate__n_C100_
GPR,YALI0C20265g or YALI0B02544g
Lower bound,0.0
Upper bound,1000.0


In [10]:
# fix typo in Acetyl-CoA:acetoacetyl-CoA transferase gene_reaction_rule
print(model.reactions.ACACCT.gene_reaction_rule)

# fix typo (YALI00F26587g)
model.reactions.ACACCT.gene_reaction_rule = 'YALI0F26587g'

display(model.reactions.ACACCT)

YALI00F26587g


0,1
Reaction identifier,ACACCT
Name,Acetyl-CoA:acetoacetyl-CoA transferase
Memory address,0x13fc0e2f0
Stoichiometry,acac[c] + accoa[c] --> aacoa[c] + ac[c]  Acetoacetate + Acetyl_CoA --> Acetoacetyl_CoA + Acetate
GPR,YALI0F26587g
Lower bound,0.0
Upper bound,1000.0


### Save the updated model

In [11]:
# save the updated model
cobra.io.save_json_model(model, "../genome_scale_models/iYLI647_corr_2.json")
model

0,1
Name,model
Memory address,10f453700
Number of metabolites,1121
Number of reactions,1348
Number of genes,648
Number of groups,0
Objective expression,1.0*biomass_C - 1.0*biomass_C_reverse_c1d5c
Compartments,"c, e, m, n, x, r, g, v"
