# Introduction
After the previous commits in notebook 33, Ben noticed that there are some issues that arise that prevent the model from working in MatLab. Here I will fix those issues.

In [1]:
import cameo
import pandas as pd
import cobra.io

In [2]:
model = cobra.io.read_sbml_model('../model/g-thermo.xml')

## Modify EC code in annotations
Here, the multiple EC codes are stored in the annotations as strings instead of as lists. This is a problem and will not be tolerated by CobraToolbox in Matlab and so needs to be fixed.

I also observed one metabolite still has 'EC ' in the annotation, I will write a script to remove it from whereever it is still present.

In [4]:
for rct in model.reactions:
    try:
        anno= rct.annotation['ec-code']
        if type(anno) == str:
            if len(anno) > 7:
                anno = anno.split(sep = ' ')
                rct.annotation['ec-code'] = anno
            else:
                continue
        else: 
            continue
    except KeyError: #rcts without ec-code
        continue

In [6]:
#remove any 'EC' in the annotation field
for rct in model.reactions:
    try:
        anno = rct.annotation['ec-code']
        for a in anno:
            if a in 'EC':
                anno.remove(a)
            else:
                continue
    except KeyError: #where there is no ec-code
        continue

Also, I see that there are some semicolons in the notes that were lifted to the annotation. I will remove them from the annotation field.

In [60]:
for rct in model.reactions:
    try:
        anno = rct.annotation['ec-code']
        if type(anno) == list:
            for ec in anno:
                if ec.find(';') < 0:
                    continue
                elif ec.find(';') > 0:
                    anno.remove(ec)
                    ec = ec.replace(';','')
                    anno.append(ec)
        if type(anno) == str:
            anno.replace(';','')
        else:
            continue
    except KeyError:#for rcts without ec code
        continue

In [64]:
#Save&commit
cobra.io.write_sbml_model(model,'../model/g-thermo.xml')

# Remove 'other group'
Per Ben's recommendation, I will remove the 'other' group that was added to include all reactions into a group.


In [69]:
model.remove_groups(model.groups.Other)


need to pass in a list



In [72]:
#save&commit
cobra.io.write_sbml_model(model,'../model/g-thermo.xml')

## Fix Chebi IDs
The Chebi IDs should have the format e.g. CHEBI:30772, not 30772. Otherwise the resource link will be wrongly generated: https://identifiers.org/chebi/30772 leads to nowhere, but https://identifiers.org/chebi/CHEBI:30772 works well.

In [120]:
model = cobra.io.read_sbml_model('../model/g-thermo.xml')

In [5]:
for met in model.metabolites:
    try:
        chebi = met.annotation['chebi']
        if type(chebi) == list:
            continue
        elif type(chebi) == str: 
            if chebi.find(':') > 0:
                continue
            else: 
                chebi = 'CHEBI:' + chebi
                met.annotation['chebi'] = chebi
    except KeyError: 
        print(met.id)

octdecacp_c
3hpalmACP_c
3hmrsACP_c
dodecacp_c
3hdecACP_c
toct2eACP_c
3hoctACP_c
but2eACP_c
6dg_c
cellb_c
stys_c
cellulose_c
starch_c
focytB561_c
decdp_c
dextrin_c
5mtr_c
cellb6p_c
3hbutACP_c
Biomass_c
Biomass_e
cmtdepp_c
cthzp_c
enzcys_c
enzscys_c
aglyc3p_c
cellb_e
gtbi_e
gtbi_c
tura_c
tura_e
kdg2_e
kdg2_c
dglcn5_e
dglcn5_c
mdgp_e
mdgp_c
tag__D_e
tag1p__D_c
tagdp__D_c


In [6]:
#save& commit
cobra.io.write_sbml_model(model,'../model/g-thermo.xml')

## Fix Glycan IDs

As Ben corrected in PR #56, if a kegg id has a G at the beginning, it should be stored under the kegg.glycan namespace, not kegg.compound. So here I will move it from the kegg compounds to kegg glycan and remove it from the kegg compound where necessary.


- move all with a G to the kegg.glycan
- remove the kegg.compound for those mets

In [22]:
model = cobra.io.read_sbml_model('../model/g-thermo.xml')

In [24]:
#for metabolites where there are both kegg.glycan and kegg.compound
#remove the kegg.glycan
for met in model.metabolites:
    try:
        glycan = met.annotation['kegg.glycan']
        try: 
            com = met.annotation['kegg.compound']
            if glycan in com: #i.e. if the kegg ID from notes was added to the kegg.compound field in duplicate
                del met.annotation['kegg.compound']
            else: 
                continue #there are quite some compounds who have both kegg compound and kegg glycan IDs. Those I will leave
        except:
            continue
    except KeyError:
        continue

In [25]:
#save
cobra.io.write_sbml_model(model,'../model/g-thermo.xml')

In [23]:
model.metabolites.tre_e.annotation

{'sbo': 'SBO:0000247',
 'bigg.metabolite': 'tre',
 'biocyc': ['META:CPD-15990', 'META:TREHALOSE'],
 'chebi': ['CHEBI:46211',
  'CHEBI:12284',
  'CHEBI:12281',
  'CHEBI:15251',
  'CHEBI:22365',
  'CHEBI:10202',
  'CHEBI:12287',
  'CHEBI:16551',
  'CHEBI:27082'],
 'hmdb': 'HMDB00975',
 'inchi_key': 'HDTRYLNUVZCQOY-LIZSDCNHSA-N',
 'kegg.compound': 'C01083',
 'kegg.glycan': 'G00293',
 'metanetx.chemical': 'MNXM198',
 'reactome.compound': ['868614', '189078'],
 'sabiork': '91',
 'seed.compound': 'cpd00794'}

In [None]:
#for mets where there is only a kegg compound, move it to kegg glycan where needed

In [18]:
met = model.metabolites.starch_c

In [19]:
met.annotation

{'sbo': 'SBO:0000247',
 'kegg.compound': 'G10495',
 'kegg.glycan': 'G10495',
 'metanetx.chemical': 'MNXM60163'}

In [20]:
glycan = met.annotation['kegg.glycan']

In [21]:
glycan[0]

'G'

In [9]:
del met.annotation['kegg.compound']