In [2]:
import cobra
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from cobra.flux_analysis import single_gene_deletion
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from pandas_ml import ConfusionMatrix

%matplotlib inline

In [3]:
cd D:/ScientificReports/

D:\ScientificReports


In [3]:
def main(media,method):
    
    single_knockout(media,method)


In [4]:
def single_knockout(media,method):
    
    df_homo=pd.read_csv('D:/ScientificReports/Homologs.csv')
    df_main=df_homo
    model=cobra.io.read_sbml_model("D:/ScientificReports/iPN730.xml")
    model.solver='glpk'
    model.reactions.EX_cpd00092_e0.bounds=(0,1000)
    model.reactions.EX_cpd19013_e0.bounds=(-1000,1000)
    model.reactions.EX_cpd00027_e0.bounds=(-2.28,1000)
    model.reactions.EX_cpd00007_e0.bounds=(-6,1000)

    model_cons,df_main=media_constrain(model,media,df_main)
    
    sgd_minimal=single_gene_deletion(model_cons,method=method)
    table_merge(sgd_minimal,df_main)

In [5]:
def media_constrain(model,media,df_main):
    
    rx=[]
    for rxn in model.reactions:
        rx.append(rxn.id)
    
    gene_deletion=pd.read_excel('D:/ScientificReports/SyntheticLethality.xlsx')
    gendel_galac=gene_deletion[gene_deletion['galactose']==0]['ORF']
    gendel_ethoh=gene_deletion[gene_deletion['ethanol']==0]['ORF']                                                              
    
    if(media=='Minimal'):
        ess_gene=pd.read_csv('D:/ScientificReports/Essential_ORFs.txt',sep="\t",encoding='latin1')
        ess_orfs=pd.Series(ess_gene['ORF_name'])
        ess_orfs=list(ess_orfs.str.strip()[1:1156])
        df_main['GroundTruth']=df_main['Scerevisiae'].apply(lambda x: 1 if (x in ess_orfs) else 0)
        
    if(media=='Rich'):
        media_comps=pd.read_csv('D:/ScientificReports/Rich.tsv',sep="\t",encoding='latin1')
        media_comps=media_comps['compounds']
        for met in media_comps:
            ex_id='EX_'+met+"_e0"
            if(ex_id in rx):
                ex=model.reactions.get_by_id(ex_id)
                if(ex.lower_bound==0):
                    ex.bounds=(-1000,1000)
        cobra.io.write_sbml_model(model,'iPN730_rich.xml')
        ess_gene=pd.read_csv('D:/ScientificReports/Essential_ORFs.txt',sep="\t",encoding='latin1')
        ess_orfs=pd.Series(ess_gene['ORF_name'])
        ess_orfs=list(ess_orfs.str.strip()[1:1156])
        df_main['GroundTruth']=df_main['Scerevisiae'].apply(lambda x: 1 if (x in ess_orfs) else 0)
        
    if(media=='Galactose'):
        model.reactions.EX_cpd00027_e0.bounds=(0,1000)
        model.reactions.EX_cpd00108_e0.bounds=(-2.36,1000)
        ess_orfs=list(gendel_galac.str.strip())
        df_main['GroundTruth']=df_main['Scerevisiae'].apply(lambda x: 1 if (x in ess_orfs) else 0)
    
    if(media=='Ethanol'):
        model.reactions.EX_cpd00027_e0.bounds=(0,1000)
        model.reactions.EX_cpd00363_e0.bounds=(-3,1000)
        ess_orfs=list(gendel_ethoh.str.strip())
        df_main['GroundTruth']=df_main['Scerevisiae'].apply(lambda x: 1 if (x in ess_orfs) else 0)
    
    
    return model,df_main

In [6]:
def table_merge(sgd_minimal,df_main):
    sgd_minimal['IDs']=list(sgd_minimal.index)
    sgd_minimal['Lkluyveri']=sgd_minimal['IDs'].apply(lambda x:list(x)[0])
    sgd_minimal['SimulationTruth']=sgd_minimal['growth'].apply(lambda x:0 if (round(x,5)!=0) else 1)
    sgd_minimal=sgd_minimal.drop(['IDs','growth','status'],axis=1)
    sgd_min=pd.merge(df_main,sgd_minimal,on='Lkluyveri',how='inner')
    confusion_matrix(sgd_min)

In [7]:
def confusion_matrix(sgd_min):
    cm = ConfusionMatrix(sgd_min['GroundTruth'],sgd_min['SimulationTruth'])
    cm.print_stats()
#     sgd_min['ConfMat']=sgd_min['GroundTruth']+'_'+sgd_min['SimulationTruth']
#     sgd_min['FP']=sgd_min['ConfMat'].apply(lambda x: 1 if (x=='Growth_No Growth') else 0)
#     sgd_min['TP']=sgd_min['ConfMat'].apply(lambda x: 1 if (x=='No Growth_No Growth') else 0)
#     sgd_min['FN']=sgd_min['ConfMat'].apply(lambda x: 1 if (x=='No Growth_Growth') else 0)
#     sgd_min['TN']=sgd_min['ConfMat'].apply(lambda x: 1 if (x=='Growth_Growth') else 0)
#     print('FP: ',sum(sgd_min['FP']),"\t",'TP:',sum(sgd_min['TP']),'\t','FN:',sum(sgd_min['FN']),'\t','TN:',sum(sgd_min['TN']))

In [32]:
main('Ethanol','FBA')

'M_abt-L_e' is not a valid SBML 'SId'.
'M_abt-L_c' is not a valid SBML 'SId'.
'M_lald-L_c' is not a valid SBML 'SId'.
'M_trp-L_m' is not a valid SBML 'SId'.
'M_trp-L_c' is not a valid SBML 'SId'.
'M_trp-L_e' is not a valid SBML 'SId'.
'M_asn-L_c' is not a valid SBML 'SId'.
'M_asn-L_e' is not a valid SBML 'SId'.
'M_tyr-L_p' is not a valid SBML 'SId'.
'M_arab-D_c' is not a valid SBML 'SId'.
'M_arab-D_e' is not a valid SBML 'SId'.
'M_tyr-L_m' is not a valid SBML 'SId'.
'M_tyr-L_v' is not a valid SBML 'SId'.
'M_tyr-L_e' is not a valid SBML 'SId'.
'M_tyr-L_c' is not a valid SBML 'SId'.
'M_glu-L_e' is not a valid SBML 'SId'.
'M_glu-L_n' is not a valid SBML 'SId'.
'M_glu-L_m' is not a valid SBML 'SId'.
'M_glc-D_v' is not a valid SBML 'SId'.
'M_glu-L_c' is not a valid SBML 'SId'.
'M_glc-D_c' is not a valid SBML 'SId'.
'M_glc-D_e' is not a valid SBML 'SId'.
'M_glu-L_p' is not a valid SBML 'SId'.
'M_manmi1p-D_c' is not a valid SBML 'SId'.
'M_glu-L_v' is not a valid SBML 'SId'.
'M_phe-L_m' is not

population: 547
P: 83
N: 464
PositiveTest: 106
NegativeTest: 441
TP: 33
TN: 391
FP: 73
FN: 50
TPR: 0.39759036144578314
TNR: 0.8426724137931034
PPV: 0.3113207547169811
NPV: 0.8866213151927438
FPR: 0.15732758620689655
FDR: 0.6886792452830188
FNR: 0.6024096385542169
ACC: 0.7751371115173674
F1_score: 0.3492063492063492
MCC: 0.21807822232639415
informedness: 0.24026277523888662
markedness: 0.19794206990972496
prevalence: 0.15173674588665448
LRP: 2.5271496946690872
LRN: 0.714879980279173
DOR: 3.5350684931506846
FOR: 0.11337868480725624


In [25]:
main('Galactose','FBA')

'M_abt-L_e' is not a valid SBML 'SId'.
'M_abt-L_c' is not a valid SBML 'SId'.
'M_lald-L_c' is not a valid SBML 'SId'.
'M_trp-L_m' is not a valid SBML 'SId'.
'M_trp-L_c' is not a valid SBML 'SId'.
'M_trp-L_e' is not a valid SBML 'SId'.
'M_asn-L_c' is not a valid SBML 'SId'.
'M_asn-L_e' is not a valid SBML 'SId'.
'M_tyr-L_p' is not a valid SBML 'SId'.
'M_arab-D_c' is not a valid SBML 'SId'.
'M_arab-D_e' is not a valid SBML 'SId'.
'M_tyr-L_m' is not a valid SBML 'SId'.
'M_tyr-L_v' is not a valid SBML 'SId'.
'M_tyr-L_e' is not a valid SBML 'SId'.
'M_tyr-L_c' is not a valid SBML 'SId'.
'M_glu-L_e' is not a valid SBML 'SId'.
'M_glu-L_n' is not a valid SBML 'SId'.
'M_glu-L_m' is not a valid SBML 'SId'.
'M_glc-D_v' is not a valid SBML 'SId'.
'M_glu-L_c' is not a valid SBML 'SId'.
'M_glc-D_c' is not a valid SBML 'SId'.
'M_glc-D_e' is not a valid SBML 'SId'.
'M_glu-L_p' is not a valid SBML 'SId'.
'M_manmi1p-D_c' is not a valid SBML 'SId'.
'M_glu-L_v' is not a valid SBML 'SId'.
'M_phe-L_m' is not

population: 547
P: 77
N: 470
PositiveTest: 99
NegativeTest: 448
TP: 30
TN: 401
FP: 69
FN: 47
TPR: 0.38961038961038963
TNR: 0.8531914893617021
PPV: 0.30303030303030304
NPV: 0.8950892857142857
FPR: 0.14680851063829786
FDR: 0.696969696969697
FNR: 0.6103896103896104
ACC: 0.7879341864716636
F1_score: 0.3409090909090909
MCC: 0.21932580424647763
informedness: 0.2428018789720916
markedness: 0.1981195887445888
prevalence: 0.14076782449725778
LRP: 2.653867871259176
LRN: 0.7154192440975483
DOR: 3.7095282146160966
FOR: 0.10491071428571429


In [8]:
main('Rich','FBA')

'M_abt-L_e' is not a valid SBML 'SId'.
'M_abt-L_c' is not a valid SBML 'SId'.
'M_lald-L_c' is not a valid SBML 'SId'.
'M_trp-L_m' is not a valid SBML 'SId'.
'M_trp-L_c' is not a valid SBML 'SId'.
'M_trp-L_e' is not a valid SBML 'SId'.
'M_asn-L_c' is not a valid SBML 'SId'.
'M_asn-L_e' is not a valid SBML 'SId'.
'M_tyr-L_p' is not a valid SBML 'SId'.
'M_arab-D_c' is not a valid SBML 'SId'.
'M_arab-D_e' is not a valid SBML 'SId'.
'M_tyr-L_m' is not a valid SBML 'SId'.
'M_tyr-L_v' is not a valid SBML 'SId'.
'M_tyr-L_e' is not a valid SBML 'SId'.
'M_tyr-L_c' is not a valid SBML 'SId'.
'M_glu-L_e' is not a valid SBML 'SId'.
'M_glu-L_n' is not a valid SBML 'SId'.
'M_glu-L_m' is not a valid SBML 'SId'.
'M_glc-D_v' is not a valid SBML 'SId'.
'M_glu-L_c' is not a valid SBML 'SId'.
'M_glc-D_c' is not a valid SBML 'SId'.
'M_glc-D_e' is not a valid SBML 'SId'.
'M_glu-L_p' is not a valid SBML 'SId'.
'M_manmi1p-D_c' is not a valid SBML 'SId'.
'M_glu-L_v' is not a valid SBML 'SId'.
'M_phe-L_m' is not

population: 547
P: 102
N: 445
PositiveTest: 43
NegativeTest: 504
TP: 24
TN: 426
FP: 19
FN: 78
TPR: 0.23529411764705882
TNR: 0.9573033707865168
PPV: 0.5581395348837209
NPV: 0.8452380952380952
FPR: 0.04269662921348315
FDR: 0.4418604651162791
FNR: 0.7647058823529411
ACC: 0.8226691042047533
F1_score: 0.3310344827586207
MCC: 0.2787283954887798
informedness: 0.19259748843357571
markedness: 0.40337763012181616
prevalence: 0.18647166361974407
LRP: 5.510835913312693
LRN: 0.7988124827395746
DOR: 6.898785425101215
FOR: 0.15476190476190477


In [31]:
main('Minimal','FBA')

'M_abt-L_e' is not a valid SBML 'SId'.
'M_abt-L_c' is not a valid SBML 'SId'.
'M_lald-L_c' is not a valid SBML 'SId'.
'M_trp-L_m' is not a valid SBML 'SId'.
'M_trp-L_c' is not a valid SBML 'SId'.
'M_trp-L_e' is not a valid SBML 'SId'.
'M_asn-L_c' is not a valid SBML 'SId'.
'M_asn-L_e' is not a valid SBML 'SId'.
'M_tyr-L_p' is not a valid SBML 'SId'.
'M_arab-D_c' is not a valid SBML 'SId'.
'M_arab-D_e' is not a valid SBML 'SId'.
'M_tyr-L_m' is not a valid SBML 'SId'.
'M_tyr-L_v' is not a valid SBML 'SId'.
'M_tyr-L_e' is not a valid SBML 'SId'.
'M_tyr-L_c' is not a valid SBML 'SId'.
'M_glu-L_e' is not a valid SBML 'SId'.
'M_glu-L_n' is not a valid SBML 'SId'.
'M_glu-L_m' is not a valid SBML 'SId'.
'M_glc-D_v' is not a valid SBML 'SId'.
'M_glu-L_c' is not a valid SBML 'SId'.
'M_glc-D_c' is not a valid SBML 'SId'.
'M_glc-D_e' is not a valid SBML 'SId'.
'M_glu-L_p' is not a valid SBML 'SId'.
'M_manmi1p-D_c' is not a valid SBML 'SId'.
'M_glu-L_v' is not a valid SBML 'SId'.
'M_phe-L_m' is not

population: 547
P: 102
N: 445
PositiveTest: 97
NegativeTest: 450
TP: 32
TN: 380
FP: 65
FN: 70
TPR: 0.3137254901960784
TNR: 0.8539325842696629
PPV: 0.32989690721649484
NPV: 0.8444444444444444
FPR: 0.14606741573033707
FDR: 0.6701030927835051
FNR: 0.6862745098039216
ACC: 0.753199268738574
F1_score: 0.32160804020100503
MCC: 0.17096705916412017
informedness: 0.16765807446574144
markedness: 0.17434135166093934
prevalence: 0.18647166361974407
LRP: 2.147812971342383
LRN: 0.8036635706914345
DOR: 2.6725274725274724
FOR: 0.15555555555555556


In [None]:
statistic = ["Accuracy","Sensitivity","Specificity","MCC","F1 Score"]
fba_glucose_minimal = [0.75,0.85,0.31,0.17,0.32,0.75]
fba_galactose = [0.79,0.85,0.39,0.22,0.34,0.79]
fba_ethanol = [0.77,0.84,0.4,0.22,0.35,0.77]
fba_ypd=[0.82,0.96,0.24,0.28,0.33,0.82]


plt.figure(figsize=(30, 15))
plt.subplot(polar=True) 
theta = np.linspace(0, 2 * np.pi, len(fba_ypd))
 
lines, labels = plt.thetagrids(range(0, 360, int(360/len(statistic))))
 
plt.plot(theta, fba_ypd)
plt.plot(theta,fba_glucose_minimal)
plt.plot(theta, fba_ethanol)
plt.plot(theta,fba_galactose)

plt.legend(labels=('YPD','YMMG','Ethanol','Galactose'), loc=0,fontsize=14)

plt.savefig("D:/ScientificReports/ClassificationPolar.jpg",dpi=2000)
