In [1]:
import os
from dotenv import load_dotenv, find_dotenv
from os.path import join, dirname, basename, exists, isdir

### Load environmental variables from the project root directory ###
# find .env automagically by walking up directories until it's found
dotenv_path = find_dotenv()

# load up the entries as environment variables
load_dotenv(dotenv_path)

# now you can get the variables using their names

# Check whether a network drive has been specified
DATABASE = os.environ.get("NETWORK_URL")
if DATABASE == 'None':
    pass
else:
    pass
    #mount network drive here

# set up directory paths
CURRENT_DIR = os.getcwd()
PROJ = dirname(dotenv_path) # project root directory

DATA = join(PROJ, 'data') #data directory
RAW_EXTERNAL = join(DATA, 'raw_external') # external data raw directory
RAW_INTERNAL = join(DATA, 'raw_internal') # internal data raw directory
INTERMEDIATE = join(DATA, 'intermediate') # intermediate data directory
FINAL = join(DATA, 'final') # final data directory

RESULTS = join(PROJ, 'results') # output directory
FIGURES = join(RESULTS, 'figures') # figure output directory
PICTURES = join(RESULTS, 'pictures') # picture output directory


# make folders specific for certain data
folder_name = ''
if folder_name != '':
    #make folders if they don't exist
    if not exists(join(RAW_EXTERNAL, folder_name)):
        os.makedirs(join(RAW_EXTERNAL, folder_name))

    if not exists(join(INTERMEDIATE, folder_name)):
        os.makedirs(join(INTERMEDIATE, folder_name))

    if not exists(join(FINAL, folder_name)):
        os.makedirs(join(FINAL, folder_name))


print('Standard variables loaded, you are good to go!')

Standard variables loaded, you are good to go!


In [5]:
import pandas as pd
import re
# import data 
data = pd.read_csv(join(INTERMEDIATE, "ec_incorporation_pyr.csv"), index_col=0)
data

Unnamed: 0,pyruvate
O32583,2.177791e-06
P00350,0.000000e+00
P00363,0.000000e+00
P00370,4.063374e-08
P00393,0.000000e+00
...,...
Q59385,0.000000e+00
Q6BEX0,0.000000e+00
Q6BF16,0.000000e+00
Q6BF17,0.000000e+00


In [6]:



# get cell volumes
cell_volumes = pd.read_csv(f"{RAW_INTERNAL}/proteomics/growth_conditions.csv", index_col=0)
cell_volumes = cell_volumes["Single cell volume [fl]1"]
# remove the first two rows of LB
cell_volumes = cell_volumes.loc[~cell_volumes.index.duplicated(keep='first')]
# rename the number 3 in there
cell_volumes = cell_volumes.rename({'Osmotic-stress glucose3':'Osmotic-stress glucose_uncertainty'}, axis='index')
rename_dict = {i:re.sub(r'\W+', '', i).lower() for i in cell_volumes.index}
cell_volumes = cell_volumes.rename(rename_dict, axis='index')


# Finally, convert to mmol/gDW:
water_content = 0.3
cell_density = 1.105e-12



# Iterate through the dataset and multiply by the corresponding cell volume, to get mmol/fL:
for (col_name, d) in data.iteritems():
    chemo_name = col_name.replace("_uncertainty", "").replace("_mean", "")
    try:
        data[col_name] = data[col_name] * cell_volumes.loc[chemo_name]#["cell_volume"]
    except:
        print(chemo_name)
        
        
data = data * cell_density * water_content
        

# convert into counts
data = data * 6.022e+23 / 1000


Unnamed: 0,pyruvate
O32583,1086.876995
P00350,0.000000
P00363,0.000000
P00370,20.279213
P00393,0.000000
...,...
Q59385,0.000000
Q6BEX0,0.000000
Q6BF16,0.000000
Q6BF17,0.000000


In [14]:
# rounding to the closest integer value and saving
data['pyruvate'] = pd.to_numeric(data.round()['pyruvate'], downcast='signed')
data.to_csv(join(INTERMEDIATE, 'ec_incorporation_pyr_count.csv'))