In [1]:
import os
from dotenv import load_dotenv, find_dotenv
from os.path import join, dirname, basename, exists, isdir

### Load environmental variables from the project root directory ###
# find .env automagically by walking up directories until it's found
dotenv_path = find_dotenv()

# load up the entries as environment variables
load_dotenv(dotenv_path)

# now you can get the variables using their names

# Check whether a network drive has been specified
DATABASE = os.environ.get("NETWORK_URL")
if DATABASE == 'None':
    pass
else:
    pass
    #mount network drive here

# set up directory paths
CURRENT_DIR = os.getcwd()
PROJ = dirname(dotenv_path) # project root directory

DATA = join(PROJ, 'data') #data directory
RAW_EXTERNAL = join(DATA, 'raw_external') # external data raw directory
RAW_INTERNAL = join(DATA, 'raw_internal') # internal data raw directory
INTERMEDIATE = join(DATA, 'intermediate') # intermediate data directory
FINAL = join(DATA, 'final') # final data directory

RESULTS = join(PROJ, 'results') # output directory
FIGURES = join(RESULTS, 'figures') # figure output directory
PICTURES = join(RESULTS, 'pictures') # picture output directory


# make folders specific for certain data
folder_name = ''
if folder_name != '':
    #make folders if they don't exist
    if not exists(join(RAW_EXTERNAL, folder_name)):
        os.makedirs(join(RAW_EXTERNAL, folder_name))

    if not exists(join(INTERMEDIATE, folder_name)):
        os.makedirs(join(INTERMEDIATE, folder_name))

    if not exists(join(FINAL, folder_name)):
        os.makedirs(join(FINAL, folder_name))


print('Standard variables loaded, you are good to go!')

Standard variables loaded, you are good to go!


In [2]:
import pandas as pd
import re

# Extract of supplementary table 5:
data = pd.read_csv(f"{RAW_INTERNAL}/proteomics/protein_values.csv", index_col=0)
data_counts = pd.read_csv(f"{RAW_EXTERNAL}/raw_proteomics_all.csv", index_col=0)
data_counts = data_counts.drop(data_counts.index[0])

# remove original index and duplicates
data_counts.index = data_counts['UP']

# # Molecular weights:
MW = data["Molecular weight (Da)"] # Da = g/mol
MW = MW / 1000 # kDa = g/mmol
print(MW)



Uniprot Accession
P0A8T7    155.045008
P0A8V2    150.520276
P36683     93.420946
P15254    141.295898
P09831    163.176315
             ...    
P36667     31.022762
P0AC78     40.912094
P76164      8.702816
P38506     28.130364
Q46810     21.481914
Name: Molecular weight (Da), Length: 2359, dtype: float64


In [3]:
# start and end columns
start_column = data_counts.columns.get_loc("LB")
end_column = data_counts.columns.get_loc("fructose.2")

# rename columns from .2 to uncertainty
# rename_dict = {col_name:re.sub(r".2$", "_uncertainty", col_name) for col_name in \
#  data.columns[start_column_uncertainty:end_column_uncertainty+1]}
# data = data.rename(columns=rename_dict)
# list of indices of data needed
list_of_indices = list(range(start_column,end_column+1))
data_counts = data_counts.iloc[:, list_of_indices]

# get cell volumes
cell_volumes = pd.read_csv(f"{RAW_INTERNAL}/proteomics/growth_conditions.csv", index_col=0)
cell_volumes = cell_volumes["Single cell volume [fl]1"]
# remove the first two rows of LB
cell_volumes = cell_volumes.loc[~cell_volumes.index.duplicated(keep='first')]
# rename the number 3 in there
cell_volumes = cell_volumes.rename({'Osmotic-stress glucose3':'Osmotic-stress glucose_uncertainty'}, axis='index')


In [4]:
# Convert values to mmol/cell:
data_counts = data_counts.apply(lambda x: pd.to_numeric(x, errors='coerce'))
data_counts = data_counts.astype(float) / 6.022e+23 * 1000

# convert names
cell_volumes.index = [re.sub(r'\W+', '', i).lower() for i in cell_volumes.index]
data_counts.columns = [re.sub(r'\W+', '', i).lower() for i in data_counts.columns]


# Iterate through the dataset and divide by the corresponding cell volume, to get mmol/fL:
for (col_name, d) in data_counts.iteritems():
    chemo_name = re.sub(r'2$', '', re.sub(r'1$', '', col_name))
    try:
        data_counts[col_name] = data_counts[col_name] / cell_volumes.loc[chemo_name]#["cell_volume"]
    except:
        print(chemo_name)

# Finally, convert to mmol/gDW:
water_content = 0.3
cell_density = 1.105e-12
data_counts = data_counts / cell_density / water_content



chemostatµ01
chemostatµ01
stationary1day
stationary1day
stationary1day
stationary3days
stationary3days
stationary3days
glucose3
glucose4
glucose5
50mmnacl
50mmnacl
50mmnacl
42c
42c
42c
ph6
ph6
ph6


In [76]:
data_counts.to_csv(f"{INTERMEDIATE}/proteomics/proteomics_concentrations.csv")