In [59]:
import os
from dotenv import load_dotenv, find_dotenv
from os.path import join, dirname, basename, exists, isdir

### Load environmental variables from the project root directory ###
# find .env automagically by walking up directories until it's found
dotenv_path = find_dotenv()

# load up the entries as environment variables
load_dotenv(dotenv_path)

# now you can get the variables using their names

# Check whether a network drive has been specified
DATABASE = os.environ.get("NETWORK_URL")
if DATABASE == 'None':
    pass
else:
    pass
    #mount network drive here

# set up directory pathsa
CURRENT_DIR = os.getcwd()
PROJ = dirname(dotenv_path) # project root directory

DATA = join(PROJ, 'data') #data directory
RAW_EXTERNAL = join(DATA, 'raw_external') # external data raw directory
RAW_INTERNAL = join(DATA, 'raw_internal') # internal data raw directory
INTERMEDIATE = join(DATA, 'intermediate') # intermediate data directory
FINAL = join(DATA, 'final') # final data directory

RESULTS = join(PROJ, 'results') # output directory
FIGURES = join(RESULTS, 'figures') # figure output directory
PICTURES = join(RESULTS, 'pictures') # picture output directory


# make folders specific for certain data
folder_name = ''
if folder_name != '':
    #make folders if they don't exist
    if not exists(join(RAW_EXTERNAL, folder_name)):
        os.makedirs(join(RAW_EXTERNAL, folder_name))

    if not exists(join(INTERMEDIATE, folder_name)):
        os.makedirs(join(INTERMEDIATE, folder_name))

    if not exists(join(FINAL, folder_name)):
        os.makedirs(join(FINAL, folder_name))


print('Standard variables loaded, you are good to go!')

Standard variables loaded, you are good to go!


In [60]:
import cobra
import os
import pandas as pd
import cameo
import wget
import ssl
from scipy.stats import pearsonr


#E. coli model:
#eColi_model = cameo.load_model("iML1515")


#E. coli model:
ssl._create_default_https_context = ssl._create_unverified_context
wget.download("https://raw.githubusercontent.com/BenjaSanchez/notebooks/master/e_coli_simulations/eciML1515.xml")
eColi_Model = cobra.io.read_sbml_model("eciML1515.xml")
os.remove("eciML1515.xml")


# proteomics data:
proteomics_dataset = f"{INTERMEDIATE}/proteomics_concentrations.csv"


In [61]:
from collections import namedtuple
from cobra.medium.boundary_types import find_external_compartment
from cobra.io.dict import reaction_to_dict
import pandas as pd
import numpy as np

In [62]:
from simulations.modeling.driven import (
    adjust_fluxes2model,
    flexibilize_proteomics,
    minimize_distance,
)

In [63]:
def reset_proteomics():
    data = pd.read_csv(proteomics_dataset)  # yeast
    print(data.isna().values.any())
    data = data.dropna()
    print(data.isna().values.any())

    data = data.reset_index(drop=True)
        
    # cols_measurements 
    cols_measurements = data.columns[data.columns.get_loc("glucose"):data.columns.get_loc("fructose")]

    # cols uncertainties
    cols_uncertainties = data.columns[data.columns.get_loc("glucose_uncertainty"):data.columns.get_loc("fructose_uncertainty")]

    # E. coli
    proteomics_all = dict()
    for i in range(0,len(cols_measurements)):
        measurement = cols_measurements[i]
        proteomics = []
        for j in range(0,data.shape[0]):
            protein = {"identifier":data["Uniprot Accession"][j], \
            "measurement":data[cols_measurements[i]][j], \
            "uncertainty":data[cols_uncertainties[i]][j]}
            proteomics.append(protein)
        proteomics_all[cols_measurements[i]] = proteomics
    return(proteomics_all)


In [64]:
proteomics_data_dict = reset_proteomics()
#### change to proteomics concentrations !!!!!!!!!!!!!!!!!
#print(proteomics_data_dict["Acetate"])

True
False


In [65]:
#solution = ecModel.optimize()
# ecModel.reactions.CPGNR1.
# eColi_Model.solver = 'glpk'

In [66]:
# fake_proteomics = [
#     {"identifier": "P0A8V2", "measurement": 5.03e-6, "uncertainty": 0},  # not in model
#     {"identifier": "P0AFG8", "measurement": 8.2e-3, "uncertainty": 8.2e-6},  # will stay
#     {"identifier": "P15254", "measurement": 6.54e-8, "uncertainty": 0},  # to remove
#     {"identifier": "P0A6C5", "measurement": 5.93e-8, "uncertainty": 0},  # to remove
# ]
# measurements = compute_measurements(proteomics_data_dict["Glucose"], ecModel)


In [67]:
# # check if incorporation seems to work
# new_growth_rate, new_proteomics, warnings = flexibilize_proteomics(ecModel, "BIOMASS_Ec_iML1515_core_75p37M", {"measurement":0.1, "uncertainty":0.01}, fake_proteomics)
# print(new_growth_rate)
# print(new_proteomics)
# solution = ecModel.optimize()
# print(solution)

In [68]:
# run on real data and growth rates
data = pd.read_csv(proteomics_dataset)
cols_measurements = data.columns[data.columns.get_loc("glucose"):data.columns.get_loc("fructose")]

growth_rates = pd.read_csv(f"{RAW_INTERNAL}/proteomics/growth_conditions.csv")
growth_rates = growth_rates.drop(growth_rates.columns.difference(['Growth condition','Growth rate (h-1)', 'Stdev']), 1)
growth_rates = growth_rates.drop([0,1], axis=0)

print(cols_measurements)

Index(['glucose', 'lb', 'glycerolaa', 'acetate', 'fumarate', 'glucosamine',
       'glycerol', 'pyruvate', 'chemostatµ05', 'chemostatµ035',
       'chemostatµ020', 'chemostatµ012', 'stationaryphase1day',
       'stationaryphase3days', 'osmoticstressglucose', '42cglucose',
       'ph6glucose', 'xylose', 'mannose', 'galactose', 'succinate'],
      dtype='object')


In [69]:
from cobra.medium import minimal_medium
# create minimal media
## find 
minimal_media = ['Glycerol + AA', 'Glucose', 'Glycerol', 'Acetate', 'Fumarate', 'Glucosamine',\
                 'Pyruvate', 'Xylose', 'Mannose', 'Galactose', 'Succinate']
minimal_medium(eColi_Model)

# all exchange reactions
exchange_reactions = [i for i in eColi_Model.reactions if "EX" in i.id]

# get the basic set of minimal medium exchange reactions
basic_medium = minimal_medium(eColi_Model)

def find_exchange_rxn(compound, model):
    exchange_reactions = [i for i in model.reactions if "EX" in i.id]
    compound_ex_rxn = [i for i in exchange_reactions if compound in i.name]
    compound_ex_rxn = [i for i in compound_ex_rxn if len(list(i._metabolites.keys())) == 1 \
                       & (list(i._metabolites.values())[0] == 1.0) \
                        & (list(i._metabolites.keys())[0].name == compound + " [extracellular space]")]
    return compound_ex_rxn

def find_all_rxns_of_compound(compound, model):
    compound_ex_rxn = [i for i in model.reactions if compound in i.name]


# find glycerol exchange reaction
gly_ex_rxns = [i for i in exchange_reactions if "Glycerol" in i.name]
gly_ex = gly_ex_rxns[0]
gly_ex = find_exchange_rxn("Glycerol", eColi_Model)

# find acetate exchange reaction
ac_ex = find_exchange_rxn("Acetate", eColi_Model)
print(ac_ex[0].id)
# find Fumarate
fum_ex = find_exchange_rxn("Fumarate", eColi_Model)
print(fum_ex)
# find Glucosamine
glcam_ex = find_exchange_rxn("Glucosamine", eColi_Model)
print(glcam_ex)
# find Pyruvate
pyr_ex = find_exchange_rxn("Pyruvate", eColi_Model)
print(pyr_ex)
# find Xylose
xyl_ex = find_exchange_rxn("Xylose", eColi_Model)
print(xyl_ex)
# find Mannose 
man_ex = find_exchange_rxn("Mannose", eColi_Model)
print(man_ex)
# find Galactose
gal_ex = find_exchange_rxn("Galactose", eColi_Model)
print(gal_ex)
# find Succinate
suc_ex = find_exchange_rxn("Succinate", eColi_Model)
print(suc_ex)
list(eColi_Model.reactions.AACPS4No1.metabolites.keys())[0]

EX_ac_e_REV
[<Reaction EX_fum_e_REV at 0x145795160>]
[]
[<Reaction EX_pyr_e_REV at 0x1457b2ef0>]
[]
[]
[]
[<Reaction EX_succ_e_REV at 0x1457b2e80>]


0,1
Metabolite identifier,hdcea_c
Name,Hexadecenoate (n-C16:1) [cytosol]
Memory address,0x01340c50f0
Formula,C16H29O2
Compartment,c
In 9 reaction(s),"APH161No1, FACOAE161No1, LPLIPAL2G161No1, 2AGPGAT161No1, FA161ACPHiNo1, 2AGPEAT161No1, LPLIPAL2E161No1, LPLIPAL2A161No1, AACPS4No1"


In [70]:
# get original medium
original_medium = eColi_Model.medium

# 
print(eColi_Model.optimize())

# minimal medium with acetate
acetate_growth_rate = list(growth_rates['Growth rate (h-1)'].loc[growth_rates['Growth condition'] == "Acetate"])[0]
ac_model = eColi_Model.copy()
ac_medium = original_medium
ac_medium.pop("EX_glc__D_e_REV", None)
ac_medium[f'{ac_ex[0].id}'] = 10
ac_model.medium = ac_medium
# pyr_model.medium = minimal_medium(pyr_model).to_dict()
print(ac_model.optimize())

# minimal medium with succinate
succinate_growth_rate = list(growth_rates['Growth rate (h-1)'].loc[growth_rates['Growth condition'] == "Succinate"])[0]
suc_model = eColi_Model.copy()
suc_medium = original_medium
suc_medium.pop("EX_glc__D_e_REV", None)
suc_medium[f'{suc_ex[0].id}'] = 10
suc_model.medium = suc_medium
# pyr_model.medium = minimal_medium(pyr_model).to_dict()
print(suc_model.optimize())

# minimal medium with fumarate
fumarate_growth_rate = list(growth_rates['Growth rate (h-1)'].loc[growth_rates['Growth condition'] == "Fumarate"])[0]
fum_model = eColi_Model.copy()
fum_medium = original_medium
fum_medium.pop("EX_glc__D_e_REV", None)
fum_medium[f'{fum_ex[0].id}'] = 10
fum_model.medium = fum_medium
# pyr_model.medium = minimal_medium(pyr_model).to_dict()
print(fum_model.optimize())

# minimal medium with pyruvate
pyruvate_growth_rate = list(growth_rates['Growth rate (h-1)'].loc[growth_rates['Growth condition'] == "Pyruvate"])[0]
pyr_model = eColi_Model.copy()
pyr_medium = original_medium
pyr_medium.pop("EX_glc__D_e_REV", None)
pyr_medium[f'{pyr_ex[0].id}'] = 10
pyr_model.medium = pyr_medium
# pyr_model.medium = minimal_medium(pyr_model).to_dict()
print(pyr_model.optimize())


<Solution 0.877 at 0x13c0894e0>
<Solution 0.137 at 0x12fa93550>
<Solution 0.479 at 0x134da8e48>
<Solution 0.781 at 0x136474550>
<Solution 1.073 at 0x13fd85eb8>


In [71]:
# run simulations
# import simulations.src.simulations. as sim
# # simulations.flexibilize_proteomics()

# #proteomics_data_dict["succinate"]

# print(dir(sim))

eColi_Model.solver

<optlang.cplex_interface.Model at 0x12d7d1f98>

In [72]:
# # # f = [i for i in proteomics_data_dict["chemostatµ012"] if i['measurement'] == 0]
# import math
# chemo_data_dict = [{'measurement':i['measurement'], 'uncertainty':0, 'identifier':i['identifier']} \
#  if math.isnan(i['uncertainty']) else i for i in proteomics_data_dict["chemostatµ012"]]
# chemo_data_dict = [{'measurement':0, 'uncertainty':i['uncertainty'], 'identifier':i['identifier']} \
#  if math.isnan(i['measurement']) else i for i in proteomics_data_dict["chemostatµ012"]]



# growth_chemo = {"measurement":float(list(growth_rates['Growth rate (h-1)'].loc[growth_rates['Growth condition'] == "chemostat µ=0.12"])[0]),\
#     "uncertainty":float(list(growth_rates['Stdev'].loc[growth_rates['Growth condition'] == "chemostat µ=0.12"])[0])}
# new_growth_rate_suc, new_proteomics_suc, warnings_suc = flexibilize_proteomics(eColi_Model,  "BIOMASS_Ec_iML1515_core_75p37M", 1, chemo_data_dict)
# #growth_chemo

In [73]:
# # acetate sim
# print("Number of proteins originally: ", len(proteomics_data_dict["acetate"]))
# growth_ac = {"measurement":float(list(growth_rates['Growth rate (h-1)'].loc[growth_rates['Growth condition'] == "Acetate"])[0]),\
#     "uncertainty":float(list(growth_rates['Stdev'].loc[growth_rates['Growth condition'] == "Acetate"])[0])}
# new_growth_rate_ac, new_proteomics_ac, warnings_ac = flexibilize_proteomics(ac_model,  "BIOMASS_Ec_iML1515_core_75p37M", growth_ac, proteomics_data_dict["acetate"])
# print("Number of proteins incorporated: ", len(new_proteomics_ac))
# print("Original growth rate: ", growth_rates['Growth rate (h-1)'].loc[growth_rates['Growth condition'] == "Acetate"])
# print("New growth rate: ", new_growth_rate_ac)


# succinate sim
print("Number of proteins originally: ", len(proteomics_data_dict["succinate"]))
growth_suc = {"measurement":float(list(growth_rates['Growth rate (h-1)'].loc[growth_rates['Growth condition'] == "Succinate"])[0]),\
    "uncertainty":float(list(growth_rates['Stdev'].loc[growth_rates['Growth condition'] == "Succinate"])[0])}
new_growth_rate_suc, new_proteomics_suc, warnings_suc = flexibilize_proteomics(suc_model,  "BIOMASS_Ec_iML1515_core_75p37M", growth_suc, proteomics_data_dict["succinate"], [])
print("Number of proteins incorporated: ", len(new_proteomics_suc))

# pyruvate sim
print("Number of proteins originally: ", len(proteomics_data_dict["pyruvate"]))
growth_pyr = {"measurement":float(list(growth_rates['Growth rate (h-1)'].loc[growth_rates['Growth condition'] == "Pyruvate"])[0]),\
    "uncertainty":float(list(growth_rates['Stdev'].loc[growth_rates['Growth condition'] == "Pyruvate"])[0])}
new_growth_rate_pyr, new_proteomics_pyr, warnings_pyr = flexibilize_proteomics(pyr_model,  "BIOMASS_Ec_iML1515_core_75p37M", growth_pyr, proteomics_data_dict["pyruvate"], [])
print("Number of proteins incorporated: ", len(new_proteomics_pyr))


Number of proteins originally:  2058
Number of proteins incorporated:  2057
Number of proteins originally:  2058
Number of proteins incorporated:  1979


In [74]:
# proteomics_data_dict['succinate']

In [75]:


# usages of ac proteins
# solution = cobra.flux_analysis.pfba(_model)
solution = pyr_model.optimize()

# suc model uages
abs_usages_suc = pd.Series()
perc_usages_suc = pd.Series()
for reaction in suc_model.reactions:
    if reaction.id.startswith("prot_"):
        prot_id = reaction.id.replace("prot_","")
        prot_id = prot_id.replace("_exchange","")
        abs_usage_suc = solution.fluxes[reaction.id]
        abs_usages_suc = abs_usages_suc.append(pd.Series({prot_id:abs_usage_suc}))
        perc_usage_suc = solution.fluxes[reaction.id]/reaction.upper_bound*100
        perc_usages_suc = perc_usages_suc.append(pd.Series({prot_id:perc_usage_suc}))

solution = pyr_model.optimize()

# pyr model uages
abs_usages_pyr = pd.Series()
perc_usages_pyr = pd.Series()
for reaction in pyr_model.reactions:
    if reaction.id.startswith("prot_"):
        prot_id = reaction.id.replace("prot_","")
        prot_id = prot_id.replace("_exchange","")
        abs_usage_pyr = solution.fluxes[reaction.id]
        abs_usages_pyr = abs_usages_pyr.append(pd.Series({prot_id:abs_usage_pyr}))
        perc_usage_pyr = solution.fluxes[reaction.id]/reaction.upper_bound*100
        perc_usages_pyr = perc_usages_pyr.append(pd.Series({prot_id:perc_usage_pyr}))




invalid value encountered in double_scalars


divide by zero encountered in double_scalars


invalid value encountered in double_scalars



In [82]:
# save absolute usages 
abs_usages_pyr.name = "pyruvate"
abs_usages_pyr.to_frame().to_csv(join(INTERMEDIATE,'ec_incorporation_pyr.csv'))


In [53]:

pd.set_option('display.max_rows', 20000)
usages.sort_values(ascending=False)

# [proteomics_data_dict["Acetate"][] for i in usages.index]
proteomics_ac = {i['identifier']:i['measurement'] for i in proteomics_data_dict["Acetate"]}
# pd.DataFrame(computed=usages , measured = [proteomics_ac[i] for i in usages.index])
print("amount of proteins", len(usages)/len(proteomics_ac))
# print(len(proteomics_ac))

#pd.DataFrame(np.array(list(usages[usages.index.isin(proteomics_ac.keys())]), [proteomics_ac[i] for i in usages.index if i in proteomics_ac.keys()]), \
#             columns=["usages", "measured"])

usages_vs_measured = usages[usages.index.isin(proteomics_ac.keys())].to_frame()
usages_vs_measured["measured"] = [proteomics_ac[i] for i in usages.index if i in proteomics_ac.keys()]



NameError: name 'usages' is not defined

In [46]:
# change atpm

# ac_model.reactions.ATPM.lower_bound = 6.86
# ac_model.optimize()

In [21]:
# correlation between measured proteins and actual proteins in the model
non_zero_usage = usages_vs_measured[usages_vs_measured.iloc[:, 0] != 0]

print(usages_vs_measured.shape)
print(non_zero_usage.shape)

pearsonr(non_zero_usage.iloc[:, 0], non_zero_usage.iloc[:, 1])

(946, 2)
(322, 2)


(-0.011983079284280943, 0.8303909964829509)

In [47]:
# run all  
# solutions = dict()
# errors = []
# counter = 0
# for i in cols_measurements:
#     counter += 1
#     print(f"Model {counter} of {len(cols_measurements)}")

#     new_growth_rate, new_proteomics, warnings = flexibilize_proteomics(eColi_Model, "BIOMASS_Ec_iML1515_core_75p37M", \
#     {"measurement":float(list(growth_rates['Growth rate (h-1)'].loc[growth_rates['Growth condition'] == i])[0]),\
#     "uncertainty":float(list(growth_rates['Stdev'].loc[growth_rates['Growth condition'] == i])[0])}, \
#     proteomics_data_dict[i])
#     solutions[i] = eColi_Model.optimize()
