# Preprocessing of fermentation data

Author: Diana Szeliova \
Last update: 15.7.2024

* calculate mean and standard deviation
* merge everything into one data frame
* convert to mmol/gh
* save data

In [1]:
import pandas as pd
import pickle

## Read data for growth rate, glucose and trehalose

In [2]:
ferm_data = pd.read_csv("../data/experimental/ferm_data.csv")
ferm_data = ferm_data.drop(0, axis=0)

In [3]:
ferm_data = ferm_data.apply(pd.to_numeric, errors='ignore') 

In [4]:
# calculate mean and standard deviation
rates = ferm_data[["Unnamed: 0", "q_Glc", "q_MSG", "Growth rate µ"]]
grouped_rates = rates.groupby("Unnamed: 0")
rates_mean = grouped_rates.mean()
rates_sd = grouped_rates.std()
rates_mean["q_Glc"] = -rates_mean["q_Glc"]
rates_mean["q_MSG"] = -rates_mean["q_MSG"]
rates_mean.index.name = "Metabolite"

## Read AA + trehalose rates

In [5]:
# number of rows to skip
read_info = {"Low Cell Density": 4,
             "High Cell Density": 24,
             "Overfeeding": 44}

all_rates = {}
all_sds = {}
for cond, rows_to_skip in read_info.items():
    data = pd.read_csv("../data/experimental/sulfolobus_rates.csv",
                       skiprows=rows_to_skip,
                       usecols=[0, 2],
                       nrows=16,
                       index_col=0,
                       header=None)
    data.columns = [cond]
    all_rates[cond] = data

    data = pd.read_csv("../data/experimental/sulfolobus_stdev.csv",
                       skiprows=rows_to_skip,
                       usecols=[0, 2],
                       nrows=16,
                       index_col=0,
                       header=None)
    data.columns = [cond]
    all_sds[cond] = data

In [6]:
# merge data frames
merged_rates = pd.merge(all_rates["Low Cell Density"],
                        pd.merge(all_rates["High Cell Density"],
                                 all_rates["Overfeeding"],
                                 left_index=True,
                                 right_index=True),
                        left_index=True, right_index=True)

# no cysteine transport in the model (also not secreted)
merged_rates.drop("Cysteine", inplace=True)

# rename metabolites
merged_rates.index = "q_" + merged_rates.index
merged_rates.index.name = ""


merged_sd = pd.merge(all_sds["Low Cell Density"],
                     pd.merge(all_sds["High Cell Density"],
                              all_sds["Overfeeding"],
                              left_index=True,
                              right_index=True),
                     left_index=True,
                     right_index=True)

# no cysteine transport in the model (also not secreted)
merged_sd.drop("Cysteine", inplace=True)

# rename metabolites
merged_sd.index = "q_" + merged_sd.index
merged_sd.index.name = ""

In [7]:
all_rates = pd.concat([rates_mean.T, merged_rates], axis=0)
all_sds = pd.concat([rates_sd.T, merged_sd], axis=0)

## Convert to mmol/g*h

In [8]:
molar_masses = pd.read_csv("../data/molar_masses.csv", index_col=0)

In [9]:
# convert to mmol/g/h
for col in all_rates.columns:
    all_rates[col].loc[molar_masses.index] /= molar_masses["Molar mass [g/mol]"]/1000
    all_sds[col].loc[molar_masses.index] /= molar_masses["Molar mass [g/mol]"]/1000

In [10]:
all_rates = all_rates.T
all_sds = all_sds.T

## Metabolites not detected in the waste medium set to zero

In [11]:
secretions = ["q_Lactate", "q_Acetate", "q_EtOH", "q_Glycerol",
              "q_Oxaloacetate", "q_Malate", "q_Pyruvate", "q_Citrate"]

for met in secretions:
    all_rates[met] = 0
    all_sds[met] = 0

## Save data

In [12]:
processed_rates = {"rates_mean": all_rates,
                   "rates_sd": all_sds}
with open('../data/processed_rates.pkl', 'wb') as fp:
    pickle.dump(processed_rates, fp)

In [17]:
all_rates.to_csv("../data/processed_rates.csv")
all_sds.to_csv("../data/processed_sds.csv")

In [16]:
all_rates

Unnamed: 0,q_Glc,q_MSG,Growth rate µ,q_Trehalose,q_Glycine,q_Alanine,q_Proline,q_Valine,q_Threonine,q_Isoleucine,...,q_Arginine,q_Tyrosine,q_Lactate,q_Acetate,q_EtOH,q_Glycerol,q_Oxaloacetate,q_Malate,q_Pyruvate,q_Citrate
High Cell Density,-0.227136,-0.529681,0.036818,0.017529,0.000605,1.8e-05,9.7e-05,0.00034,3.2e-05,9e-06,...,0.0,0.0,0,0,0,0,0,0,0,0
Low Cell Density,-0.129521,-0.366316,0.030428,0.004324,0.0,0.0,0.0,0.0,2e-06,0.0,...,0.0,0.0,0,0,0,0,0,0,0,0
Overfeeding,-0.33068,-0.711466,0.054386,0.026731,0.006019,0.003367,0.000127,0.029035,0.000257,0.000512,...,0.0,0.0,0,0,0,0,0,0,0,0
