In [10]:
# Import libraries - REQUIRES pip version 9.0.3
import pandas
import pandas as pd
import os
from os.path import join
import sys
import scipy.stats
import numpy
import math
import pickle
import copy
import time
import warnings
import gc

# Using Cobrapy 0.13.0
import cobra
import cobra.test
import cobra.flux_analysis.gapfilling
from cobra.io import write_sbml_model
from cobra.flux_analysis import  flux_variability_analysis
from cobra.flux_analysis.reaction import assess_component
from cobra.manipulation.delete import *
from cobra.flux_analysis.parsimonious import add_pfba
from cobra.medium import find_boundary_types
from cobra.util import solver as sutil


# Confidence levels in each annotation
#import probanno

# Estabish handler for logger
import logging
logging.basicConfig()
logger = logging.getLogger('logger')

# Verbose exception printing
%xmode


Exception reporting mode: Minimal


In [11]:
# Quicker way to read in models
import pickle
def read_model(fileName, obj='none'):
    
    fileType = fileName.split('.')[-1]
    
    if fileType == 'sbml' or fileType == 'xml':
        model = cobra.io.read_sbml_model(fileName)
    elif fileType == 'json':
        model = cobra.io.load_json_model(fileName)
    elif fileType == 'yaml':
        model = cobra.io.load_yaml_model(fileName)
    elif fileType == 'mat':
        model = cobra.io.load_matlab_model(fileName)
    elif fileType == 'pkl':
        model = pickle.load(open(fileName, 'rb'))
    else:
        raise TypeError('Unrecognized file extension')
    
    if obj != 'none': model.objective = obj
    for rxn in model.boundary: rxn.bounds = (-1000., 1000.)
        
    return model

In [12]:
model=read_model(cwd+ '/Gc_GENRE_2022/Curate_Gc_Model/In_progress_curation/NGO_557_polished.xml')
cobra.io.save_json_model(model, cwd+ '/Gc_GENRE_2022/Curate_Gc_Model/In_progress_curation/NGO_557_polished.json')
cobra.io.write_sbml_model(model, cwd+ '/Gc_GENRE_2022/Curate_Gc_Model/In_progress_curation/NGO_557_polished.sbml')
model=read_model(cwd+ '/Gc_GENRE_2022/Curate_Gc_Model/In_progress_curation/NGO_557_polished.sbml')
model

In [13]:
#Add Kegg annotations
for gene in model.genes:
    gene.annotation['kegg.genes']='ngo:'+ str(gene)

In [14]:
#load gene annotation table
genes= list()
for gene in model.genes:
    genes.append(gene.id)
genes

#Use PATRIC/BVBRC database, uniprot database, and PubMLST
annotation_table = pd.read_csv ('C://Users/Aimee/Documents/UVA/Metabolic_Modeling/organized/annotation_resources/refseq_gene_annotations.csv', usecols= ['RefSeq_ID', 'Protein_ID', 'Gene_ID', 'Uniprot_ID', 'BVBRC_ID','PUBMLST_locus' ],index_col=0)
annotation_table = annotation_table.fillna('')
annotation_table                       


Unnamed: 0_level_0,Protein_ID,Gene_ID,Uniprot_ID,BVBRC_ID,PUBMLST_locus
RefSeq_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
NGO0001,YP_207186.1,3283049,Q5FAJ2,fig|242231.10.peg.1,NEIS0320
NGO0002,YP_207187.1,3283050,Q5FAJ1,fig|242231.10.peg.2,NEIS0321
NGO0003,YP_207188.1,3283051,Q5FAJ0,fig|242231.10.peg.3,NEIS0323
NGO0004,YP_207189.1,3283052,,fig|242231.10.peg.4,NEIS0324
NGO0006,YP_207190.1,3283053,,fig|242231.10.peg.7,NEIS0326
...,...,...,...,...,...
NGO2178,YP_209183.1,3282747,Q5F4W6,fig|242231.10.peg.2631,NEIS0316
NGO2179,YP_209184.1,3282746,Q5F4W5,fig|242231.10.peg.2632,
NGO2180,YP_209185.1,3282745,Q5F4W4,fig|242231.10.peg.2633,NEIS0317
NGO2182,YP_209187.1,3282743,Q5F4W2,fig|242231.10.peg.2635,NEIS0319


In [15]:
#Add gene annotations to model from annotation table
for x in model.genes:
    geneid=x.id
    if geneid in annotation_table.index:
        protein_id=annotation_table['Protein_ID'].loc[geneid]
        model.genes.get_by_id(geneid).annotation['ncbiprotein']=protein_id
        ncbigene_id=annotation_table['Gene_ID'].loc[geneid]
        model.genes.get_by_id(geneid).annotation['ncbigene']= str(ncbigene_id)
        uniprot_id=annotation_table['Uniprot_ID'].loc[geneid]
        model.genes.get_by_id(geneid).annotation['UNIPROT']= uniprot_id
        patric_id=annotation_table['BVBRC_ID'].loc[geneid]
        model.genes.get_by_id(geneid).annotation['PATRIC']=patric_id
        pubmlst_id=annotation_table['PUBMLST_locus'].loc[geneid]
        model.genes.get_by_id(geneid).annotation['PUBMLST_Locus']=pubmlst_id
    else:
        pass

In [16]:
# Add SBOs

# Metabolites
for cpd in model.metabolites:
    cpd.annotation['sbo'] = 'SBO:0000247'

# Reactions
for rxn in model.reactions:
    substrates = list(rxn.metabolites)
    compartments = set([x.compartment for x in substrates])
    
    if 'EX_' in rxn.id:
        rxn.annotation['sbo'] = 'SBO:0000627' # exchange
    elif len(compartments) > 1:
        rxn.annotation['sbo'] = 'SBO:0000185' # transport
    else:
        rxn.annotation['sbo'] = 'SBO:0000176' # metabolic

# Biomass
model.reactions.biomass.annotation['sbo'] = 'SBO:0000629'  

#Demand reactions
model.reactions.DM_BIOMASS.annotation['sbo'] = 'SBO:0000628'  
model.reactions.DM_4HBA.annotation['sbo'] = 'SBO:0000628'  
model.reactions.DM_5DRIB.annotation['sbo'] = 'SBO:0000628' 
model.reactions.DM_AMOB.annotation['sbo'] = 'SBO:0000628'  
model.reactions.DM_BIOMASS.annotation['sbo'] = 'SBO:0000628'  
model.reactions.DM_BV.annotation['sbo'] = 'SBO:0000628'  
model.reactions.DM_HMFURN.annotation['sbo'] = 'SBO:0000628'
model.reactions.DM_PAP.annotation['sbo'] = 'SBO:0000628'
model.reactions.DM_UREA.annotation['sbo'] = 'SBO:0000628'


# Genes
for gene in model.genes:
    gene.annotation['sbo'] = 'SBO:0000243'

In [17]:
model.genes.NGO1881.annotation

{'kegg.genes': 'ngo:NGO1881',
 'ncbiprotein': 'YP_208914.1',
 'ncbigene': '3282312',
 'UNIPROT': 'Q5F5N5',
 'PATRIC': 'fig|242231.10.peg.2264',
 'PUBMLST_Locus': 'NEIS0074',
 'sbo': 'SBO:0000243'}

In [18]:
cobra.io.write_sbml_model(model, 'C:/Users/Aimee/Documents/UVA/Metabolic_Modeling/organized/models/annotatedGCmodel2.sbml')
cobra.io.save_json_model(model, 'C:/Users/Aimee/Documents/UVA/Metabolic_Modeling/organized/models/annotatedGCmodel2.json')