In [4]:
#import package needed
import cobra
import pandas as pd
from cobra.io import load_json_model
from glob import glob
from cobra.manipulation.delete import delete_model_genes, remove_genes
import seaborn as sns
from cobra.flux_analysis import single_gene_deletion
import os

#Simplify reading/writing files
cwd=os.path.realpath(os.path.join(os.path.dirname(os.getcwd()),"..",".."))

In [2]:
#From Jenior 2021 C.diff notebooks - benchmarking C. difficile GENRES. https://github.com/csbl/Jenior_CdifficileGENRE_2021
def basicCheck(model):
    
    # Determination
    determination = float(len(model.reactions)) / float(len(model.metabolites))
    determination = round(determination, 3)
    if len(model.reactions) < len(model.metabolites): 
        statement = ' (overdetermined)'
    elif len(model.reactions) > len(model.metabolites):
        statement = ' (underdetermined)'
    print('Reactions to metabolites ratio: ' + str(determination) + statement)
    
    # Compartments
    print('GENRE has ' + str(len(model.compartments.keys())) + ' compartment(s)')
    
    # Genes
    if len(model.genes) == 0: 
        print('GENRE has no gene data')
    else:
        print('GENRE has ' + str(len(model.genes)) + ' genes')
    no_rxns = []
    for gene in model.genes:
          if len(gene.reactions) == 0:
                no_rxns.append(gene.id)
    if len(no_rxns) > 0:
        print('\t' + str(len(no_rxns)) + ' are not associated with reactions')
        
    # Growth
    ov = model.slim_optimize(error_value=0.)
    if ov < 1e-6:
        for rxn in model.boundary: rxn.bounds = (-1000., 1000.)
        ov = model.slim_optimize(error_value=0.)
        if ov < 1e-6:
            print('GENRE cannot acheive objective flux')
        else:
            ov = round(ov, 3)
            print(str(ov) + ' objective flux, only in complete media')
    else:
        ov = round(ov, 3)
        print(str(ov) + ' objective flux in current media')

# Quicker way to read in models
import pickle
def read_model(fileName, obj='none'):
    
    fileType = fileName.split('.')[-1]
    
    if fileType == 'sbml' or fileType == 'xml':
        model = cobra.io.read_sbml_model(fileName)
    elif fileType == 'json':
        model = cobra.io.load_json_model(fileName)
    elif fileType == 'yaml':
        model = cobra.io.load_yaml_model(fileName)
    elif fileType == 'mat':
        model = cobra.io.load_matlab_model(fileName)
    elif fileType == 'pkl':
        model = pickle.load(open(fileName, 'rb'))
    else:
        raise TypeError('Unrecognized file extension')
    
    if obj != 'none': model.objective = obj
    for rxn in model.boundary: rxn.bounds = (-1000., 1000.)
        
    return model

In [28]:
#Define model medias
rpmi_req=set(['EX_apoACP_c_', 'EX_trdrd_c_','EX_cobalt2_e_','EX_cu2_e_','EX_fe3_e_','EX_mn2_e_','EX_mobd_e_','EX_zn2_e_','EX_ca2_e_','EX_no3_e_','EX_h2o_e_','EX_k_e_','EX_cl_e_','EX_mg2_e_','EX_so4_e_','EX_na1_e_','EX_pi_e_','EX_h_e_','EX_arg_L_e_','EX_asn_L_e_','EX_asp_L_e_','EX_cys_L_e_','EX_glu_L_e_','EX_gly_e_','EX_his_L_e_','EX_ile_L_e_','EX_leu_L_e_','EX_lys_L_e_','EX_met_L_e_','EX_phe_L_e_','EX_pro_L_e_','EX_ser_L_e_','EX_thr_L_e_','EX_trp_L_e_','EX_tyr_L_e_','EX_val_L_e_','EX_pnto_R_e_','EX_chol_e_','EX_inost_e_','EX_glc_D_e_','EX_gthrd_e_','EX_co2_e_'])
mdm_req=set(['EX_co2_e_', 'EX_cobalt2_e_', 'EX_cu2_e_' , 'EX_h_e_', 'EX_h2o_e_','EX_mn2_e_','EX_mobd_e_','EX_tungs_e_', 'EX_zn2_e_','EX_na1_e_','EX_cl_e_','EX_k_e_','EX_so4_e_','EX_nh4_e_','EX_mg2_e_','EX_pi_e_','EX_ca2_e_','EX_fe3_e_','EX_no3_e_','EX_asp_L_e_','EX_glu_L_e_','EX_arg_L_e_','EX_gly_e_','EX_ser_L_e_','EX_leu_L_e_','EX_ile_L_e_','EX_val_L_e_','EX_tyr_L_e_','EX_cys_L_e_','EX_pro_L_e_','EX_trp_L_e_','EX_thr_L_e_','EX_phe_L_e_','EX_asn_L_e_','EX_gln_L_e_','EX_his_L_e_','EX_met_L_e_','EX_ala_L_e_','EX_lys_L_e_','EX_gthrd_e_','EX_thm_e_','EX_pnto_R_e_','EX_glc_D_e_','EX_hxan_e_','EX_ura_e_'])


def rpmi(model):
    for reaction in model.reactions:
        if 'EX_' in  reaction.id:
            reaction.lower_bound=0
        if reaction.id in rpmi_req:
            reaction.lower_bound = -10.
        if reaction.id == 'EX_o2_e_':
            reaction.lower_bound = -20.
        if reaction.id == 'EX_glc_D_e_':
            reaction.lower_bound = -10.

def MDM(model):
    for reaction in model.reactions:
        if 'EX_' in  reaction.id:
            reaction.lower_bound=0
        if reaction.id in mdm_req:
            reaction.lower_bound = -10.
        if reaction.id == 'EX_o2_e_':
            reaction.lower_bound = -20.
        if reaction.id == 'EX_glc_D_e_':
            reaction.lower_bound = -10.
            
            
def complete(model):
    for reaction in model.reactions:
        if 'EX_' in  reaction.id:
            reaction.lower_bound=-10.
        if reaction.id == 'EX_o2_e_':
            reaction.lower_bound = -20.




In [5]:
base = read_model(cwd+'/Gc_GENRE_2022/Models/Nmb_iTM560.json')

In [6]:
## Load the curated homology matrix for N. gonorrhoeae 
hom_matrix=pd.read_csv(cwd+'/Gc_GENRE_2022/Generate_Gc_Model/Matrices/ortho_matrixcurated.csv')
hom_matrix=hom_matrix.set_index('Unnamed: 0')

In [7]:
#Because ZN import is required, but ZupT is not present in Gc, Gc must use another method of aquiring Zn.
#Gc uses the ZNU system to aquire zinc. This system is present in the model, however the reaction is incorrect. 
#Znu moves zinc from the periplasm to the cytosol, but the reaction is written so that Znu is exporting zinc in the NME model.
#The following reactions correct this problem. 
#PMID: 11506909

base.reactions.ZN2abcpp

0,1
Reaction identifier,ZN2abcpp
Name,R_ZN2abcpp
Memory address,0x0180433ada00
Stoichiometry,M_atp_c_c + M_h2o_c_c + M_zn2_c_c --> M_adp_c_c + M_h_c_c + M_pi_c_c + M_zn2_p_c  M_atp_c + M_h2o_c + M_zn2_c --> M_adp_c + M_h_c + M_pi_c + M_zn2_p
GPR,NMB0586 and NMB0587 and NMB0588
Lower bound,0.0
Upper bound,999999.0


In [8]:
ZN2abcpp = base.reactions.ZN2abcpp 
ZN2abcpp.metabolites

{<Metabolite M_atp_c_c at 0x18042dacca0>: -1.0,
 <Metabolite M_h2o_c_c at 0x18042d24e80>: -1.0,
 <Metabolite M_zn2_c_c at 0x18042db7370>: -1.0,
 <Metabolite M_adp_c_c at 0x18042942b50>: 1.0,
 <Metabolite M_h_c_c at 0x18042d24820>: 1.0,
 <Metabolite M_pi_c_c at 0x18042d24640>: 1.0,
 <Metabolite M_zn2_p_c at 0x18042db7280>: 1.0}

In [9]:
M_atp_c_c=base.metabolites.get_by_id('M_atp_c_c')
M_h2o_c_c=base.metabolites.get_by_id('M_h2o_c_c')
M_zn2_c_c=base.metabolites.get_by_id('M_zn2_c_c')
M_adp_c_c=base.metabolites.get_by_id('M_adp_c_c')
M_h_c_c=base.metabolites.get_by_id('M_h_c_c')
M_pi_c_c=base.metabolites.get_by_id('M_pi_c_c')
M_zn2_p_c=base.metabolites.get_by_id('M_zn2_p_c')

In [10]:
ZN2abcpp.subtract_metabolites({
    M_atp_c_c: -1,
    M_h2o_c_c: -1,
    M_zn2_c_c: -1,
    M_adp_c_c: 1,
    M_h_c_c: 1,
    M_pi_c_c: 1,
    M_zn2_p_c: 1,
})


ZN2abcpp.add_metabolites({
    M_atp_c_c: -1,
    M_h2o_c_c: -1,
    M_zn2_c_c: 1,
    M_adp_c_c: 1,
    M_h_c_c: 1,
    M_pi_c_c: 1,
    M_zn2_p_c: -1,
})

In [11]:
base.reactions.ZN2abcpp

0,1
Reaction identifier,ZN2abcpp
Name,R_ZN2abcpp
Memory address,0x0180433ada00
Stoichiometry,M_atp_c_c + M_h2o_c_c + M_zn2_p_c --> M_adp_c_c + M_h_c_c + M_pi_c_c + M_zn2_c_c  M_atp_c + M_h2o_c + M_zn2_p --> M_adp_c + M_h_c + M_pi_c + M_zn2_c
GPR,NMB0586 and NMB0587 and NMB0588
Lower bound,0.0
Upper bound,999999.0


In [12]:
#Gc uses NGO2110 and NGO2109 (HbuAB) for hemoglobin aquisition instead (see notes in NGO model excel file for more info)
base.reactions.HGFE
base.reactions.HGFE.gene_reaction_rule = "(NGO2110ortholog and NGO2109ortholog)"
base.reactions.HGFE.gene_reaction_rule

'(NGO2110ortholog and NGO2109ortholog)'

In [13]:
#Gc strain FA1090 does not have a functional cysU (NGO0881) but still functionally aquires sulfate (see notes in NGO model excel file for more info)
base.reactions.SULabcpp.gene_reaction_rule = "(NMB0879 and NMB0880 and NMB1017)"
base.reactions.SULabcpp.gene_reaction_rule

'(NMB0879 and NMB0880 and NMB1017)'

In [14]:
base.reactions.TSULabcpp.gene_reaction_rule

'NMB0879 and NMB0880 and NMB0881 and NMB1017'

In [15]:
#Gc strain FA1090 does not have a functional cysU (NGO0881) but still functionally aquires thiosulfate (see notes in NGO model excel file for more info)
base.reactions.TSULabcpp.gene_reaction_rule = "(NMB0879 and NMB0880 and NMB1017)"
base.reactions.TSULabcpp.gene_reaction_rule

'(NMB0879 and NMB0880 and NMB1017)'

In [16]:
print (base.id,'Number of Model Genes:',len(base.genes),'Number of Model Reactions:',len(base.reactions))

Nmb_iTM560 Number of Model Genes: 564 Number of Model Reactions: 1519


In [18]:
#create strain-specific draft models and save them
for strain in hom_matrix.columns:
    
    #Get the list of Gene IDs from the homology matrix dataframe for the current strain without a homolog
    currentStrain=hom_matrix[strain]
    nonHomologous=currentStrain[currentStrain==0.0]
    nonHomologous=nonHomologous.index.tolist()
    
    
    
    #Define a list of Gene objects from the base reconstruction to be deleted from the current strain
    toDelete=[]
    for gene in nonHomologous:
        toDelete.append(base.genes.get_by_id(gene))

    #Establish a model copy and use the COBRApy function to remove the appropriate content and save this model
    baseCopy=base.copy()
    remove_genes(baseCopy, toDelete, remove_reactions=True)
    baseCopy.id=str(strain)
    cobra.io.json.save_json_model(baseCopy, str(cwd+'/Gc_GENRE_2022/Generate_Gc_Model/FinalModels/AE004969.1.json'), pretty=False)

In [19]:
print (baseCopy.id,'Number of Model Genes:',len(baseCopy.genes),'Number of Model Reactions:',len(baseCopy.reactions))

AE004969.1 Number of Model Genes: 520 Number of Model Reactions: 1484


In [24]:
#load the geneID matrix from the notebook1 
models=glob(cwd+'/Gc_GENRE_2022/Generate_Gc_Model/FinalModels/AE004969.1.json')
geneIDs_matrix=pd.read_csv(cwd+'/Gc_GENRE_2022/Generate_Gc_Model/Matrices/geneIDs_matrixcurated.csv')
geneIDs_matrix=geneIDs_matrix.set_index('Unnamed: 0')
geneIDs_matrix

Unnamed: 0_level_0,AE004969.1
Unnamed: 0,Unnamed: 1_level_1
Blank,Blank
NGO2109ortholog,NGO2109
NGO2110ortholog,NGO2110
NMB0003,NGO1926
NMB0006,NGO1923
...,...
NMB2156,NGO1934
NMB2157,NGO1933
NMB2159,NGO1931
Orphan,Orphan


In [25]:
#Utilize the geneIDs matrix to update the GPRs in each of the strain-specific Final model with the proper gene ID

from cobra.manipulation.modify import rename_genes

for mod in models:
    model=cobra.io.load_json_model(mod)
    for column in geneIDs_matrix.columns:
        if column in mod:
            currentStrain=column
    
    IDMapping=geneIDs_matrix[currentStrain].to_dict()
    IDMappingParsed = {k:v for k,v in IDMapping.items() if v != 'None'}
    
    rename_genes(model,IDMappingParsed)
    cobra.io.json.save_json_model(model,str(cwd+'/Gc_GENRE_2022/Generate_Gc_Model/FinalModels/AE004969.1.json'), pretty=False)

In [26]:
# gather the general information on the Final Model
for strain in hom_matrix.columns:
    model=cobra.io.load_json_model(str(cwd+'/Gc_GENRE_2022/Generate_Gc_Model/FinalModels/AE004969.1.json'))
    print (model.id, 'Number of Model Genes:',len(model.genes),'Number of Model Reactions:',len(model.reactions))

AE004969.1 Number of Model Genes: 514 Number of Model Reactions: 1484


In [30]:
#Confirm growth of the Final model on desired in silico media
model=read_model(cwd+'/Gc_GENRE_2022/Generate_Gc_Model/FinalModels/AE004969.1.json')
base=read_model(cwd+'/Gc_GENRE_2022/Models/Nmb_iTM560.json')
model.objective ="Nm_Ess_biomass"
base.objective ="Nm_Ess_biomass"

print (model.id)
MDM(model)
print ('MDM:',model.slim_optimize())
rpmi(model)
print ('RPMI:', model.slim_optimize())
complete(model)
print ('Complete media:', model.slim_optimize())

print (base.id)
MDM(base)
print ('MDM:',base.slim_optimize())
rpmi(base)
print ('RPMI:', base.slim_optimize())
complete(base)
print ('Complete media:', base.slim_optimize())

AE004969.1
MDM: 0.9886693787246059
RPMI: 0.9378805966435457
Complete media: 2.3284316745885696
Nmb_iTM560
MDM: 1.0557843488117042
RPMI: 1.0113116219943517
Complete media: 3.72664027103791
