## Set the environment

In [2]:
import cobra
from cobra import Model, Reaction, Metabolite
from cobra.flux_analysis import flux_variability_analysis
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
import escher
from escher import Builder
from utils import show_map
from utils.check_precursor_problem import check_precursor_problem

## Add all the transcript data to the same table.

### Import the excel that have all gene reaction rule information

In [5]:
df_gene_reaction_rule =pd.read_excel('../data/transciptone.xlsx',sheet_name = "gene_reaction_rule")
# set the reaction id column as the index in order to map it to the model
df_gene_reaction_rule = df_gene_reaction_rule.set_index(["reaction_id"],)

### Import the excel that contain the transcript data with shifting temperature from 80 ℃ to 90 ℃.
- The data is from the paper 'Dynamic Metabolic Adjustments and Genome Plasticity Are Implicated in the Heat Shock Response of the Extremely Thermoacidophilic Archaeon Sulfolobus solfataricus'.
- Samples were taken 10 min (-10) before starting the temperature shift and then 5, 30, and 60 min after reaching 90°C.
- The data can be download from the website (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1482968/bin/jbacter_188_12_4553__index.html)

In [38]:
df_temperature_transcript =pd.read_excel('../data/transciptone.xlsx',sheet_name = "dynamic shift")
df_temperature_transcript = df_temperature_transcript.set_index(["ORF"],)

### Import the excel that contains the HU treatment  transcript
- The data is from the paper 'HU treatment of Sulfolobus solfataricus P2'.
- Samples were taken from the cells growing with or without the treatment of Hydroxyurea (HU) for 4 hours.
- The data can be download from the website (https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE87612)
- As this transcript data doesn't contain the information of SSO numbers, and it contains the gene location information so we used a file with SSO numbers and also gene location information to map the SSO numbers to the transcript data.

In [7]:
df_genome=pd.read_table('../data/GCA_000007005.1_ASM700v1_feature_table.txt') # This file contains the gene information of sulfolubus. 
df_HU_treatment =pd.read_excel('../data/transciptone.xlsx',sheet_name = "HU treatment of sulfolobus ")
df_genome = df_genome.set_index(["# feature"],)
df_genome = df_genome.drop(index='gene')
df_HU_treatment = df_HU_treatment.set_index(["START"],)
df_HU_treatment["ORF"] = ""
for start in df_HU_treatment.index:
    for i in range(len(df_genome)):
        if start == df_genome["start"].iloc[i]:
            df_HU_treatment["ORF"].at[start] = (df_genome["locus_tag"].iloc[i]).strip()     
df_HU_treatment = df_HU_treatment.set_index("ORF")

### Import the excel that contrains the transcript data of different oxygen treatment
- The data is from the paper 'Effect of O2 concentrations on Sulfolobus solfataricus P2'.
- Sulfolobus solfataricus P2 was grown aerobically, with O2 concentrations ranging from 1.5 to 26 % (v/v; gas phase). To gain some insight in control of the respiratory system, transcriptomes of the strain cultivated in different O2 concentrations (1.5 % vs 21 %, 1.5 % vs 26 %) were compared using a DNA microarray approach.
- The data can be download from the website (https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE16043)

In [52]:
df_oxygen_transcript =pd.read_excel('../data/transciptone.xlsx',sheet_name = "Oxygen treatment")
#drop out the one without SSO number
df_oxygen_transcript = df_oxygen_transcript.loc[df_oxygen_transcript['26vs1.5 hyb_1'].str.contains('SSO') == True]
df_oxygen_transcript["ORF"] = ""
df_oxygen_transcript['26vs1.5 hyb_1 transcript'] = ""
df_oxygen_transcript['26vs1.5 hyb_2 transcript'] = ""
# df_oxygen_transcript['21vs1.5 hyb_1 transcript'] = ""
# df_oxygen_transcript['21vs1.5 hyb_2 transcript'] = ""
df_oxygen_transcript["position"] = ""
for i in range (len(df_oxygen_transcript)):
    hyb_1 = df_oxygen_transcript["26vs1.5 hyb_1"].iloc[i].split("_")
    hyb_2 = df_oxygen_transcript["26vs1.5 hyb_2"].iloc[i].split("_")
#     hyb_3 = df_oxygen_transcript["21vs1.5 hyb_1"].iloc[i].split("_")
#     hyb_4 = df_oxygen_transcript["21vs1.5 hyb_2"].iloc[i].split("_")
    df_oxygen_transcript['ORF'].iloc[i] = hyb_1[0]
    position = hyb_1[1].split(" ")
    df_oxygen_transcript["position"].iloc[i] = position [0]
    
    df_oxygen_transcript["26vs1.5 hyb_1 transcript"].iloc[i] = position[-1] 
    df_oxygen_transcript['26vs1.5 hyb_2 transcript'].iloc[i] = (hyb_2[1].split(" ")) [-1] 
#     df_oxygen_transcript['21vs1.5 hyb_1 transcript'].iloc[i] = (hyb_3[1].split(" ")) [-1]
#     df_oxygen_transcript['21vs1.5 hyb_2 transcript'].iloc[i] = (hyb_4[1].split(" ")) [-1] 
df_oxygen_transcript = df_oxygen_transcript.set_index(["ORF"],)


In [55]:
df_oxygen_transcript2 =pd.read_excel('../data/transciptone.xlsx',sheet_name = "Oxygen treatment")
#drop out the one without SSO number
df_oxygen_transcript2 = df_oxygen_transcript2.loc[df_oxygen_transcript2['21vs1.5 hyb_1'].str.contains('SSO') == True]
df_oxygen_transcript2["ORF"] = ""
# df_oxygen_transcript['26vs1.5 hyb_1 transcript'] = ""
# df_oxygen_transcript['26vs1.5 hyb_2 transcript'] = ""
df_oxygen_transcript2['21vs1.5 hyb_1 transcript'] = ""
df_oxygen_transcript2['21vs1.5 hyb_2 transcript'] = ""
df_oxygen_transcript2["position"] = ""
for i in range (len(df_oxygen_transcript2)):
#     hyb_1 = df_oxygen_transcript["26vs1.5 hyb_1"].iloc[i].split("_")
#     hyb_2 = df_oxygen_transcript["26vs1.5 hyb_2"].iloc[i].split("_")
    hyb_3 = df_oxygen_transcript2["21vs1.5 hyb_1"].iloc[i].split("_")
    hyb_4 = df_oxygen_transcript2["21vs1.5 hyb_2"].iloc[i].split("_")
    df_oxygen_transcript2['ORF'].iloc[i] = hyb_3[0]
    position = hyb_3[1].split(" ")
    df_oxygen_transcript2["position"].iloc[i] = position [0]
    
#     df_oxygen_transcript["26vs1.5 hyb_1 transcript"].iloc[i] = position[-1] 
#     df_oxygen_transcript['26vs1.5 hyb_2 transcript'].iloc[i] = (hyb_2[1].split(" ")) [-1] 
    df_oxygen_transcript2['21vs1.5 hyb_1 transcript'].iloc[i] = (hyb_3[1].split(" ")) [-1]
    df_oxygen_transcript2['21vs1.5 hyb_2 transcript'].iloc[i] = (hyb_4[1].split(" ")) [-1] 
df_oxygen_transcript2 = df_oxygen_transcript2.set_index(["ORF"],)


In [56]:
df_oxygen_transcript2

Unnamed: 0_level_0,26vs1.5 hyb_1,26vs1.5 hyb_2,21vs1.5 hyb_1,21vs1.5 hyb_2,21vs1.5 hyb_1 transcript,21vs1.5 hyb_2 transcript,position
ORF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
snoRNAgene,SIRV2_19623-19832_141-210 2.921390165,SIRV2_19623-19832_141-210 2.272297327,snoRNAgene_4_3-58_SSO2977 null,snoRNAgene_4_3-58_SSO2977 0.239566125,4,4,4
snoRNAgene,SIRV2_19829-20173_114-183 1.490050854,SIRV2_19829-20173_114-183 1.210896782,snoRNAgene_5_1-65_SSO3188 1.008682243,snoRNAgene_5_1-65_SSO3188 0.962969269,5,5,5
snoRNAgene,SIRV2_20451-23663_1627-1696 1.055891201,SIRV2_20451-23663_1627-1696 0.785875195,snoRNAgene_6_2-52_SSO3214 -0.379620962,snoRNAgene_6_2-52_SSO3214 -0.781569545,6,6,6
SSO0001,SIRV2_23785-25488_950-1008 null,SIRV2_23785-25488_950-1008 0.763660461,SSO0001_283-351 0.300448367,SSO0001_283-351 -0.348232419,0.300448367,-0.348232419,283-351
SSO0002,SIRV2_25495-26424_622-691 0.946193556,SIRV2_25495-26424_622-691 0.659722595,SSO0002_643-712 null,SSO0002_643-712 -0.084064265,,-0.084064265,643-712
SSO0004,SIRV2_2635-2907_76-145 1.61705613,SIRV2_2635-2907_76-145 1.293358943,SSO0004_434-501 -0.093560176,SSO0004_434-501 null,-0.093560176,,434-501
SSO0005,SIRV2_27839-28369_240-309 2.411195433,SIRV2_27839-28369_240-309 1.878321443,SSO0005_213-264 -0.651683181,SSO0005_213-264 -0.078609835,-0.651683181,-0.078609835,213-264
SSO0006,SIRV2_28451-29521_517-581 0.991369695,SIRV2_28451-29521_517-581 0.849440323,SSO0006_344-408 0.078563669,SSO0006_344-408 -0.580145484,0.078563669,-0.580145484,344-408
SSO0007,SIRV2_2982-3311_101-170 null,SIRV2_2982-3311_101-170 1.621488377,SSO0007_283-336 0.1376478,SSO0007_283-336 0.522840789,0.1376478,0.522840789,283-336
SSO0009,SIRV2_3477-3827_11-77 0.212567535,SIRV2_3477-3827_11-77 0.118726939,SSO0009_454-512 1.035046947,SSO0009_454-512 0.248107862,1.035046947,0.248107862,454-512


### Add the HU_treatment transcript to the the same table

In [27]:
# add tge gene reaction rule to the model
# reactions_ids = [rxn.id for rxn in model.reactions]
df_gene_reaction_rule["HU_treatment"] = ""
df_gene_reaction_rule["gene_reaction_relation"] = ""
for rxn in df_gene_reaction_rule.index:
    
    if rxn != "oxp_redox_1.6.5.3_RXN0__5330":
        if 'spontaneous' not in str(df_gene_reaction_rule['genes'].loc[rxn]):
            c = df_gene_reaction_rule["genes"].loc[rxn].split(",")
            if len(c) > 1:
                gene_rules = []
                gene_numbers = []
                for i in range(len(c)):
                    e = "SSO" + c[i]
                    if e in df_HU_treatment.index:
#                         print (e)
                        gene_rules.append(e)
                        ge = df_HU_treatment["HU TREATED"].loc[e]/df_HU_treatment["CONTROL"].loc[e]
                        
                        gene_numbers.append(math.log(ge,2))
               
        
                d = str(" "+ str(df_gene_reaction_rule['gene_reaction_rule'].loc[rxn]) + " ").join(gene_rules)
                df_gene_reaction_rule["HU_treatment"].loc[rxn] = gene_numbers
                df_gene_reaction_rule["gene_reaction_relation"].loc[rxn] = "(" + d + ')'

            elif len(c) == 1:
                gene_numbers = []
                e = "SSO" + c[0]
                if e in df_HU_treatment.index:
                    d = e 
                    ge = df_HU_treatment["HU TREATED"].loc[e]/df_HU_treatment["CONTROL"].loc[e]
                    if ge != 0:   
                        gene_numbers.append(math.log(ge,2))
                    df_gene_reaction_rule["HU_treatment"].loc[rxn] = gene_numbers
                    df_gene_reaction_rule["gene_reaction_relation"].loc[rxn] = d
    
    elif rxn == "oxp_redox_1.6.5.3_RXN0__5330":
        g = df_gene_reaction_rule["genes"].loc[rxn].split("or")
        gene_rules = []
        gene_numbers = []
        c1 = g[0].split(",")
        for i in range(len(c1)):
        
            e = "SSO" + c1[i]
            if e in df_HU_treatment.index:
#  
                gene_rules.append(e)
                ge = df_HU_treatment["HU TREATED"].loc[e]/df_HU_treatment["CONTROL"].loc[e]

                gene_numbers.append(math.log(ge,2))
        d1 = str(" "+ str(df_gene_reaction_rule['gene_reaction_rule'].loc[rxn]) + " ").join(gene_rules)
        c2 = g[1].split(",")
        for i in range(len(c2)):
        
            e = "SSO" + c2[i]
            if e in df_HU_treatment.index:
#  
                gene_rules.append(e)
                ge = df_HU_treatment["HU TREATED"].loc[e]/df_HU_treatment["CONTROL"].loc[e]

                gene_numbers.append(math.log(ge,2))
        d2 = str(" "+ str(df_gene_reaction_rule['gene_reaction_rule'].loc[rxn]) + " ").join(gene_rules)
        df_gene_reaction_rule["HU_treatment"].loc[rxn] = gene_numbers
        df_gene_reaction_rule["gene_reaction_relation"].loc[rxn] = "((" + d1 + ") or (" +  d2+ "))" 

### Add the Temperature_treatment transcript to the the same table

In [39]:
df_gene_reaction_rule["Temperature_treatment_5 vs. -10"] = ""
df_gene_reaction_rule["Temperature_treatment_30 vs. 5"] = ""
df_gene_reaction_rule["Temperature_treatment_60 vs. 30"] = ""
df_gene_reaction_rule["Temperature_treatment_60 vs.  -10"] = ""
for rxn in df_gene_reaction_rule.index:  
    if rxn != "oxp_redox_1.6.5.3_RXN0__5330":
        if 'spontaneous' not in str(df_gene_reaction_rule['genes'].loc[rxn]):
            c = df_gene_reaction_rule["genes"].loc[rxn].split(",")
            if len(c) > 1:
                gene_numbers1 = []
                gene_numbers2 = []
                gene_numbers3 = []
                gene_numbers4 = []
                for i in range(len(c)):
                    e = "SSO" + c[i]
                    if e in df_temperature_transcript.index:
#                        
                        gene_numbers1.append(df_temperature_transcript["5 vs.  -10"].loc[e])
                        gene_numbers2.append(df_temperature_transcript["30 vs. 5"].loc[e])    
                        gene_numbers3.append(df_temperature_transcript["60 vs. 30"].loc[e])
                        gene_numbers4.append(df_temperature_transcript["60 vs.  -10"].loc[e])
                df_gene_reaction_rule["Temperature_treatment_5 vs. -10"].loc[rxn] = gene_numbers1
                df_gene_reaction_rule["Temperature_treatment_30 vs. 5"].loc[rxn] = gene_numbers2
                df_gene_reaction_rule["Temperature_treatment_60 vs. 30"].loc[rxn] = gene_numbers3
                df_gene_reaction_rule["Temperature_treatment_60 vs.  -10"].loc[rxn] = gene_numbers4
            elif len(c) == 1:
                gene_numbers1 = []
                gene_numbers2 = []
                gene_numbers3 = []
                gene_numbers4 = []
                e = "SSO" + c[0]
                if e in df_temperature_transcript.index:
                    gene_numbers1.append(df_temperature_transcript["5 vs.  -10"].loc[e])
                    gene_numbers2.append(df_temperature_transcript["30 vs. 5"].loc[e])    
                    gene_numbers3.append(df_temperature_transcript["60 vs. 30"].loc[e])
                    gene_numbers4.append(df_temperature_transcript["60 vs.  -10"].loc[e])
                df_gene_reaction_rule["Temperature_treatment_5 vs. -10"].loc[rxn] = gene_numbers1
                df_gene_reaction_rule["Temperature_treatment_30 vs. 5"].loc[rxn] = gene_numbers2
                df_gene_reaction_rule["Temperature_treatment_60 vs. 30"].loc[rxn] = gene_numbers3
                df_gene_reaction_rule["Temperature_treatment_60 vs.  -10"].loc[rxn] = gene_numbers4
                    
    elif rxn == "oxp_redox_1.6.5.3_RXN0__5330":
        g = df_gene_reaction_rule["genes"].loc[rxn].split("or")
        gene_numbers1 = []
        gene_numbers2 = []
        gene_numbers3 = []
        gene_numbers4 = []
        
        c1 = g[0].split(",")
        for i in range(len(c1)):
        
            e = "SSO" + c1[i]
            if e in df_temperature_transcript.index:

                gene_numbers1.append(df_temperature_transcript["5 vs.  -10"].loc[e])
                gene_numbers2.append(df_temperature_transcript["30 vs. 5"].loc[e])    
                gene_numbers3.append(df_temperature_transcript["60 vs. 30"].loc[e])
                gene_numbers4.append(df_temperature_transcript["60 vs.  -10"].loc[e])
        c2 = g[1].split(",")
        for i in range(len(c2)):
            e = "SSO" + c2[i]
            if e in df_temperature_transcript.index:
                gene_numbers1.append(df_temperature_transcript["5 vs.  -10"].loc[e])
                gene_numbers2.append(df_temperature_transcript["30 vs. 5"].loc[e])    
                gene_numbers3.append(df_temperature_transcript["60 vs. 30"].loc[e])
                gene_numbers4.append(df_temperature_transcript["60 vs.  -10"].loc[e])

        df_gene_reaction_rule["Temperature_treatment_5 vs. -10"].loc[rxn] = gene_numbers1
        df_gene_reaction_rule["Temperature_treatment_30 vs. 5"].loc[rxn] = gene_numbers2
        df_gene_reaction_rule["Temperature_treatment_60 vs. 30"].loc[rxn] = gene_numbers3
        df_gene_reaction_rule["Temperature_treatment_60 vs.  -10"].loc[rxn] = gene_numbers4

### Add the oxygen_treatment transcript to the the same table

In [57]:
df_gene_reaction_rule["Oxygen_treatment_26vs1.5 hyb_1 transcript"] = ""
df_gene_reaction_rule["Oxygen_treatment_26vs1.5 hyb_2 transcript"] = ""
df_gene_reaction_rule["Oxygen_treatment_21vs1.5 hyb_1 transcript"] = ""
df_gene_reaction_rule["Oxygen_treatment_21vs1.5 hyb_2 transcript"] = ""
for rxn in df_gene_reaction_rule.index:  
    if rxn != "oxp_redox_1.6.5.3_RXN0__5330":
        if 'spontaneous' not in str(df_gene_reaction_rule['genes'].loc[rxn]):
            c = df_gene_reaction_rule["genes"].loc[rxn].split(",")
            if len(c) > 1:
                gene_numbers1 = []
                gene_numbers2 = []
                gene_numbers3 = []
                gene_numbers4 = []
                for i in range(len(c)):
                    e = "SSO" + c[i]
                    if e in df_oxygen_transcript.index:
#                        
                        gene_numbers1.append(df_oxygen_transcript["26vs1.5 hyb_1 transcript"].loc[e])
                        gene_numbers2.append(df_oxygen_transcript["26vs1.5 hyb_2 transcript"].loc[e])    
                        gene_numbers3.append(df_oxygen_transcript2["21vs1.5 hyb_1 transcript"].loc[e])
                        gene_numbers4.append(df_oxygen_transcript2["21vs1.5 hyb_2 transcript"].loc[e])
                df_gene_reaction_rule["Oxygen_treatment_26vs1.5 hyb_1 transcript"].loc[rxn] = gene_numbers1
                df_gene_reaction_rule["Oxygen_treatment_26vs1.5 hyb_2 transcript"].loc[rxn] = gene_numbers2
                df_gene_reaction_rule["Oxygen_treatment_21vs1.5 hyb_1 transcript"].loc[rxn] = gene_numbers3
                df_gene_reaction_rule["Oxygen_treatment_21vs1.5 hyb_2 transcript"].loc[rxn] = gene_numbers4
            elif len(c) == 1:
                gene_numbers1 = []
                gene_numbers2 = []
                gene_numbers3 = []
                gene_numbers4 = []
                e = "SSO" + c[0]
                if e in df_temperature_transcript.index:
                    gene_numbers1.append(df_oxygen_transcript["26vs1.5 hyb_1 transcript"].loc[e])
                    gene_numbers2.append(df_oxygen_transcript["26vs1.5 hyb_2 transcript"].loc[e])    
                    gene_numbers3.append(df_oxygen_transcript2["21vs1.5 hyb_1 transcript"].loc[e])
                    gene_numbers4.append(df_oxygen_transcript2["21vs1.5 hyb_2 transcript"].loc[e])
                df_gene_reaction_rule["Oxygen_treatment_26vs1.5 hyb_1 transcript"].loc[rxn] = gene_numbers1
                df_gene_reaction_rule["Oxygen_treatment_26vs1.5 hyb_2 transcript"].loc[rxn] = gene_numbers2
                df_gene_reaction_rule["Oxygen_treatment_21vs1.5 hyb_1 transcript"].loc[rxn] = gene_numbers3
                df_gene_reaction_rule["Oxygen_treatment_21vs1.5 hyb_2 transcript"].loc[rxn] = gene_numbers4
                    
    elif rxn == "oxp_redox_1.6.5.3_RXN0__5330":
        g = df_gene_reaction_rule["genes"].loc[rxn].split("or")
        gene_numbers1 = []
        gene_numbers2 = []
        gene_numbers3 = []
        gene_numbers4 = []
        
        c1 = g[0].split(",")
        for i in range(len(c1)):
        
            e = "SSO" + c1[i]
            if e in df_temperature_transcript.index:

                gene_numbers1.append(df_oxygen_transcript["26vs1.5 hyb_1 transcript"].loc[e])
                gene_numbers2.append(df_oxygen_transcript["26vs1.5 hyb_2 transcript"].loc[e])    
                gene_numbers3.append(df_oxygen_transcript2["21vs1.5 hyb_1 transcript"].loc[e])
                gene_numbers4.append(df_oxygen_transcript2["21vs1.5 hyb_2 transcript"].loc[e])
        c2 = g[1].split(",")
        for i in range(len(c2)):
            e = "SSO" + c2[i]
            if e in df_temperature_transcript.index:
                gene_numbers1.append(df_oxygen_transcript["26vs1.5 hyb_1 transcript"].loc[e])
                gene_numbers2.append(df_oxygen_transcript["26vs1.5 hyb_2 transcript"].loc[e])    
                gene_numbers3.append(df_oxygen_transcript2["21vs1.5 hyb_1 transcript"].loc[e])
                gene_numbers4.append(df_oxygen_transcript2["21vs1.5 hyb_2 transcript"].loc[e])

        df_gene_reaction_rule["Oxygen_treatment_26vs1.5 hyb_1 transcript"].loc[rxn] = gene_numbers1
        df_gene_reaction_rule["Oxygen_treatment_26vs1.5 hyb_2 transcript"].loc[rxn] = gene_numbers2
        df_gene_reaction_rule["Oxygen_treatment_21vs1.5 hyb_1 transcript"].loc[rxn] = gene_numbers3
        df_gene_reaction_rule["Oxygen_treatment_21vs1.5 hyb_2 transcript"].loc[rxn] = gene_numbers4

In [58]:
df_gene_reaction_rule.to_csv("1.csv")