In [45]:
import os
from dotenv import load_dotenv, find_dotenv
from os.path import join, dirname, basename, exists, isdir

### Load environmental variables from the project root directory ###
# find .env automagically by walking up directories until it's found
dotenv_path = find_dotenv()

# load up the entries as environment variables
load_dotenv(dotenv_path)

# now you can get the variables using their names

# Check whether a network drive has been specified
DATABASE = os.environ.get("NETWORK_URL")
if DATABASE == 'None':
    pass
else:
    pass
    #mount network drive here

# set up directory paths
CURRENT_DIR = os.getcwd()
PROJ = dirname(dotenv_path) # project root directory

DATA = join(PROJ, 'data') #data directory
RAW_EXTERNAL = join(DATA, 'raw_external') # external data raw directory
RAW_INTERNAL = join(DATA, 'raw_internal') # internal data raw directory
INTERMEDIATE = join(DATA, 'intermediate') # intermediate data directory
FINAL = join(DATA, 'final') # final data directory

RESULTS = join(PROJ, 'results') # output directory
FIGURES = join(RESULTS, 'figures') # figure output directory
PICTURES = join(RESULTS, 'pictures') # picture output directory


# make folders specific for certain data
folder_name = ''
if folder_name != '':
    #make folders if they don't exist
    if not exists(join(RAW_EXTERNAL, folder_name)):
        os.makedirs(join(RAW_EXTERNAL, folder_name))

    if not exists(join(INTERMEDIATE, folder_name)):
        os.makedirs(join(INTERMEDIATE, folder_name))

    if not exists(join(FINAL, folder_name)):
        os.makedirs(join(FINAL, folder_name))


print('Standard variables loaded, you are good to go!')

Standard variables loaded, you are good to go!


In [46]:
import cobra
import os
import pandas as pd
import cameo
import wget
import ssl
from scipy.stats import pearsonr


#E. coli model:
#eColi_model = cameo.load_model("iML1515")


#E. coli model:
ssl._create_default_https_context = ssl._create_unverified_context
wget.download("https://raw.githubusercontent.com/BenjaSanchez/notebooks/master/e_coli_simulations/eciML1515.xml")
eColi_Model = cobra.io.read_sbml_model("eciML1515.xml")
os.remove("eciML1515.xml")


# proteomics data:
proteomics_dataset = f"{INTERMEDIATE}/proteomics_concentrations.csv"


In [47]:
from collections import namedtuple
from cobra.medium.boundary_types import find_external_compartment
from cobra.io.dict import reaction_to_dict
import pandas as pd
import numpy as np

In [48]:
def reset_proteomics():
    data = pd.read_csv(proteomics_dataset)  # yeast
    print(data.isna().values.any())
    data = data.dropna()
    print(data.isna().values.any())

    data = data.reset_index(drop=True)
        
    # cols_measurements 
    cols_measurements = data.columns[data.columns.get_loc("glucose"):data.columns.get_loc("fructose")]

    # cols uncertainties
    cols_uncertainties = data.columns[data.columns.get_loc("glucose_uncertainty"):data.columns.get_loc("fructose_uncertainty")]

    # E. coli
    proteomics_all = dict()
    for i in range(0,len(cols_measurements)):
        measurement = cols_measurements[i]
        proteomics = []
        for j in range(0,data.shape[0]):
            protein = {"identifier":data["Uniprot Accession"][j], \
            "measurement":data[cols_measurements[i]][j], \
            "uncertainty":data[cols_uncertainties[i]][j]}
            proteomics.append(protein)
        proteomics_all[cols_measurements[i]] = proteomics
    return(proteomics_all)


In [49]:
proteomics_data_dict = reset_proteomics()
    ####### hacky!!! #######

    

True
False


{'glucose': [{'identifier': 'P0A8T7',
   'measurement': 4.901690917919181e-06,
   'uncertainty': 1.0033761308980561e-06},
  {'identifier': 'P0A8V2',
   'measurement': 6.979485772654262e-06,
   'uncertainty': 1.2667766677367487e-06},
  {'identifier': 'P36683',
   'measurement': 1.3398072764488699e-05,
   'uncertainty': 2.308487937321403e-06},
  {'identifier': 'P15254',
   'measurement': 4.331972973878916e-06,
   'uncertainty': 4.033066838681271e-07},
  {'identifier': 'P09831',
   'measurement': 5.0427975294461814e-06,
   'uncertainty': 3.1769624435510943e-07},
  {'identifier': 'P0AFG8',
   'measurement': 1.0256686825368851e-05,
   'uncertainty': 2.280061481279496e-06},
  {'identifier': 'P0A9Q7',
   'measurement': 8.556352156468494e-06,
   'uncertainty': 2.013309662417036e-06},
  {'identifier': 'P0CE47',
   'measurement': 0.0004452830786651792,
   'uncertainty': 4.207925093385943e-05},
  {'identifier': 'P25665',
   'measurement': 9.098554311260992e-05,
   'uncertainty': 5.904961748008385

In [50]:
from simulations.modeling.driven import (
    adjust_fluxes2model,
    flexibilize_proteomics,
    minimize_distance,
)

In [51]:
# eColi_Model.solver = 'glpk'

In [52]:
data = pd.read_csv(proteomics_dataset)
cols_measurements = data.columns[data.columns.get_loc("glucose"):data.columns.get_loc("fructose")]

growth_rates = pd.read_csv(f"{RAW_INTERNAL}/proteomics/growth_conditions.csv")
growth_rates = growth_rates.drop(growth_rates.columns.difference(['Growth condition','Growth rate (h-1)', 'Stdev']), 1)
growth_rates = growth_rates.drop([0,1], axis=0)

print(cols_measurements)

Index(['glucose', 'lb', 'glycerolaa', 'acetate', 'fumarate', 'glucosamine',
       'glycerol', 'pyruvate', 'chemostatµ05', 'chemostatµ035',
       'chemostatµ020', 'chemostatµ012', 'stationaryphase1day',
       'stationaryphase3days', 'osmoticstressglucose', '42cglucose',
       'ph6glucose', 'xylose', 'mannose', 'galactose', 'succinate'],
      dtype='object')


[]

In [59]:
# chemo 12
chemo_data_dict_12 = proteomics_data_dict["chemostatµ012"]

growth_chemo_12 = {"measurement":float(list(growth_rates['Growth rate (h-1)'].loc[growth_rates['Growth condition'] == "chemostat µ=0.12"])[0]),\
    "uncertainty":float(list(growth_rates['Stdev'].loc[growth_rates['Growth condition'] == "chemostat µ=0.12"])[0])}
new_growth_rate_chemo12, new_proteomics_chemo12, warnings_chemo12 = flexibilize_proteomics(eColi_Model,  "BIOMASS_Ec_iML1515_core_75p37M", growth_chemo_12, chemo_data_dict_12, [])

# chemo 2
chemo_data_dict_20 = proteomics_data_dict["chemostatµ020"]

growth_chemo_20 = {"measurement":float(list(growth_rates['Growth rate (h-1)'].loc[growth_rates['Growth condition'] == "chemostat µ=0.20"])[0]),\
    "uncertainty":float(list(growth_rates['Stdev'].loc[growth_rates['Growth condition'] == "chemostat µ=0.20"])[0])}
new_growth_rate_chemo20, new_proteomics_chemo20, warnings_chemo20 = flexibilize_proteomics(eColi_Model,  "BIOMASS_Ec_iML1515_core_75p37M", growth_chemo_20, chemo_data_dict_20, [])

# chemo 35
chemo_data_dict_35 = proteomics_data_dict["chemostatµ035"]

growth_chemo_35 = {"measurement":float(list(growth_rates['Growth rate (h-1)'].loc[growth_rates['Growth condition'] == "chemostat µ=0.35"])[0]),\
    "uncertainty":float(list(growth_rates['Stdev'].loc[growth_rates['Growth condition'] == "chemostat µ=0.35"])[0])}
new_growth_rate_chemo35, new_proteomics_chemo35, warnings_chemo35 = flexibilize_proteomics(eColi_Model,  "BIOMASS_Ec_iML1515_core_75p37M", growth_chemo_35, chemo_data_dict_35, [])

# chemo 50
chemo_data_dict_50 = proteomics_data_dict["chemostatµ05"]

growth_chemo_50 = {"measurement":float(list(growth_rates['Growth rate (h-1)'].loc[growth_rates['Growth condition'] == "chemostat µ=0.5"])[0]),\
    "uncertainty":float(list(growth_rates['Stdev'].loc[growth_rates['Growth condition'] == "chemostat µ=0.5"])[0])}
new_growth_rate_chemo50, new_proteomics_chemo50, warnings_chemo50 = flexibilize_proteomics(eColi_Model,  "BIOMASS_Ec_iML1515_core_75p37M", growth_chemo, chemo_data_dict_50, [])


P36683    3.662313e-05
P09831    3.749847e-06
P0AFG8    1.279594e-05
P0A9Q7    2.193684e-05
P25665    1.147625e-05
              ...     
P77783    0.000000e+00
P33607    1.104363e-08
P43337    3.643411e-08
P61887    2.467943e-08
P0AC78    1.560366e-08
Name: value, Length: 820, dtype: float64
P36683    4.159123e-05
P15254    3.856413e-06
P09831    4.494580e-06
P0AFG8    1.158131e-05
P0A9Q7    2.088777e-05
              ...     
P77783    0.000000e+00
P33607    1.401275e-08
P43337    3.935275e-08
P61887    3.423461e-08
P0AC78    1.435521e-08
Name: value, Length: 856, dtype: float64
P36683    4.359975e-05
P15254    3.770820e-06
P09831    4.304719e-06
P0AFG8    1.141516e-05
P0A9Q7    1.682622e-05
              ...     
P77783    0.000000e+00
P33607    9.087677e-09
P43337    1.915300e-08
P61887    4.086825e-08
P0AC78    9.163651e-09
Name: value, Length: 856, dtype: float64
P36683    3.075390e-05
P15254    4.635079e-06
P09831    4.666612e-06
P0AFG8    1.186984e-05
P0A9Q7    2.210191e-05
   