Author: Sanjeev Dahal <br>
Script for simulating model iSD1509 to predict gene essentiality

In [1]:
#cobra package
import cobra
from cobra import Model, Reaction, Metabolite


#other packages
import os
from os.path import join
import pandas as pd
import numpy as np

In [2]:
mydir = os.getcwd()

In [3]:
#loading the model
model_iSD = cobra.io.read_sbml_model(join(mydir, 'iSDModel.xml'))

# storing default boundaries for both models
original_bounds_dict = {}
for r in model_iSD.reactions:
    lb, ub = model_iSD.reactions.get_by_id(r.id).lower_bound, model_iSD.reactions.get_by_id(r.id).upper_bound
    original_bounds_dict[r.id] = [lb, ub]

Set parameter Username
Academic license - for non-commercial use only - expires 2023-01-08


https://doi.org/10.1101/2021.03.10.434463 does not conform to 'http(s)://identifiers.org/collection/id' or'http(s)://identifiers.org/COLLECTION:id


In [4]:
# experimental data for core essential genes
# from Poulsen et al., 2019 paper
expData_core = pd.read_csv(join(mydir, 'core_essential_genes_Poulsen.txt'), header=None)
essentialgenes_core = [x for x in expData_core[0]]
essentialgenes_core = list(set(essentialgenes_core)) #to remove duplicates, if any

iSDgenes = [x.id for x in model_iSD.genes]
#not including PA_s0001 in iSDgenes
iSDgenes.remove('PA_s0001')

#only genes present in the model and experimental data
exp_essential_iSD = list(set(essentialgenes_core).intersection(set(iSDgenes))) # essential genes
exp_nonessential_iSD = list(set(iSDgenes).difference(set(exp_essential_iSD))) # non-essential genes

### gene essentiality in LB media

In [5]:
#loading the lb_media dictionary
lb_media = pd.read_excel(join(mydir, 'media_LB_SCFM_MM.xlsx'), sheet_name= 'LB', header=0)


#adding the LB media as a dictionary
medium = {}
for met in lb_media.metabolites:
    metrxn = "EX_" + met + "_e"
    medium[metrxn] = abs(np.float(lb_media[lb_media.loc[:,'metabolites'] == met].lb.values[0]))

geneKOdict = {} #stores the gene knockout to growth prediction data

for genes in iSDgenes:
    #setting to the original bounds
    for r in model_iSD.reactions:
        try:
            model_iSD.reactions.get_by_id(r.id).bounds = original_bounds_dict[r.id][0], original_bounds_dict[r.id][1]
        except KeyError:
            pass
        
    model_iSD.medium = medium #set the media to LB
    
    #set the objective
    objective = 'BIOMASS_PA14_v27M'
    model_iSD.reactions.get_by_id(objective).bounds = 0., 1000.
    
    #delete the gene
    cobra.manipulation.delete_model_genes(model_iSD, [genes])
    
    # simulate
    model_iSD.objective = objective
    solution_iSD = model_iSD.optimize()
    # undelete the gene
    cobra.manipulation.undelete_model_genes(model_iSD)    
    
    #store the information in the geneKOdict dictionary
    geneKOdict[genes] = solution_iSD.objective_value, solution_iSD.status



Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if __name__ == '__main__':


In [7]:
count_tp = 0 #this will count the true positive
count_tn = 0 #this will count the true negative
count_predictedessential = 0 #for counting all predicted essential genes

dict_geneessentiality = {}

for k in geneKOdict:
    if k in exp_essential_iSD:
        if geneKOdict[k][1] != 'optimal' or geneKOdict[k][0] < 0.00001:
            count_predictedessential += 1
            count_tp += 1
            dict_geneessentiality[k] = geneKOdict[k][0], geneKOdict[k][1], 'essential', '1'
        else:
            dict_geneessentiality[k] = geneKOdict[k][0], geneKOdict[k][1], 'essential', '0'
    else:
        if geneKOdict[k][1] != 'optimal' or geneKOdict[k][0] < 0.00001: 
            count_predictedessential += 1
            dict_geneessentiality[k] = geneKOdict[k][0], geneKOdict[k][1], 'nonessential', '0'
        else:
            count_tn += 1
            dict_geneessentiality[k] = geneKOdict[k][0], geneKOdict[k][1], 'nonessential', '1'

count_negative = len(iSDgenes) - count_predictedessential
count_fn = count_negative - count_tn
count_fp = count_predictedessential - count_tp

# metrics for gene essentiality prediction comparison with experimental data
accuracy = (count_tn + count_tp)/len(iSDgenes) #overall accuracy
precision = count_tp/count_predictedessential #precision
neg_pa = count_tn/(len(iSDgenes) - count_predictedessential) #negative predictive accuracy
recall = count_tp/len(exp_essential_iSD) # recall
specificity = count_tn/len(exp_nonessential_iSD) #specificity

# MCC: (tp*tn - fp*fn)/((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))**0.5
mcc = (count_tp*count_tn - count_fp*count_fn)/((count_tp+count_fp)*(count_tp+count_fn)*(count_tn+count_fp)*(count_tn+count_fn))**0.5

print("Precision: %s\nNegative Predictive Accuracy: %s\nRecall: %s\nSpecificity: %s\nOverall Accuracy: %s\nMCC: %s"%(str(precision), str(neg_pa), str(recall), str(specificity), str(accuracy), str(mcc)))

Precision: 0.6397058823529411
Negative Predictive Accuracy: 0.9526584122359796
Recall: 0.5723684210526315
Specificity: 0.9638909358879882
Overall Accuracy: 0.9244532803180915
MCC: 0.5636141372347206
