In [1]:
import os
from dotenv import load_dotenv, find_dotenv
from os.path import join, dirname, basename, exists, isdir

### Load environmental variables from the project root directory ###
# find .env automagically by walking up directories until it's found
dotenv_path = find_dotenv()

# load up the entries as environment variables
load_dotenv(dotenv_path)

# now you can get the variables using their names

# Check whether a network drive has been specified
DATABASE = os.environ.get("NETWORK_URL")
if DATABASE == 'None':
    pass
else:
    pass
    #mount network drive here

# set up directory paths
CURRENT_DIR = os.getcwd()
PROJ = dirname(dotenv_path) # project root directory

DATA = join(PROJ, 'data') #data directory
RAW_EXTERNAL = join(DATA, 'raw_external') # external data raw directory
RAW_INTERNAL = join(DATA, 'raw_internal') # internal data raw directory
INTERMEDIATE = join(DATA, 'intermediate') # intermediate data directory
FINAL = join(DATA, 'final') # final data directory

RESULTS = join(PROJ, 'results') # output directory
FIGURES = join(RESULTS, 'figures') # figure output directory
PICTURES = join(RESULTS, 'pictures') # picture output directory


# make folders specific for certain data
folder_name = ''
if folder_name != '':
    #make folders if they don't exist
    if not exists(join(RAW_EXTERNAL, folder_name)):
        os.makedirs(join(RAW_EXTERNAL, folder_name))

    if not exists(join(INTERMEDIATE, folder_name)):
        os.makedirs(join(INTERMEDIATE, folder_name))

    if not exists(join(FINAL, folder_name)):
        os.makedirs(join(FINAL, folder_name))


print('Standard variables loaded, you are good to go!')

Standard variables loaded, you are good to go!


1. Abundance [mmol/cell] = Abundance [mmol/gDW] * ( cell volume [fL/cell] * cell density [g/fL] * dry content [gDW/g] )
2. Abundance [molecules/cell] = Abundance [mmol/cell] * Na [molecules/mol] * 1000 [mmol/mol]


In [16]:
import pandas as pd
import re
# import data 
data = pd.read_csv(f"{INTERMEDIATE}/proteomics_concentrations.csv", index_col=0)



# get cell volumes
cell_volumes = pd.read_csv(f"{RAW_INTERNAL}/proteomics/growth_conditions.csv", index_col=0)
cell_volumes = cell_volumes["Single cell volume [fl]1"]
# remove the first two rows of LB
cell_volumes = cell_volumes.loc[~cell_volumes.index.duplicated(keep='first')]
# rename the number 3 in there
cell_volumes = cell_volumes.rename({'Osmotic-stress glucose3':'Osmotic-stress glucose_uncertainty'}, axis='index')
rename_dict = {i:re.sub(r'\W+', '', i).lower() for i in cell_volumes.index}
cell_volumes = cell_volumes.rename(rename_dict, axis='index')


# Finally, convert to mmol/gDW:
water_content = 0.3
cell_density = 1.105e-12



# Iterate through the dataset and multiply by the corresponding cell volume, to get mmol/fL:
for (col_name, d) in data.iteritems():
    chemo_name = col_name.replace("_uncertainty", "").replace("_mean", "")
    try:
        data[col_name] = data[col_name] * cell_volumes.loc[chemo_name]#["cell_volume"]
    except:
        print(chemo_name)
        
        
data = data * cell_density * water_content
        

# convert into counts
data = data * 6.022e+23 / 1000
data

osmoticstressglucose
osmoticstressglucose


Unnamed: 0_level_0,glucose,lb,glycerolaa,acetate,fumarate,glucosamine,glycerol,pyruvate,chemostatµ05,chemostatµ035,...,stationaryphase1day_uncertainty,stationaryphase3days_uncertainty,osmoticstressglucose_uncertainty,42cglucose_uncertainty,ph6glucose_uncertainty,xylose_uncertainty,mannose_uncertainty,galactose_uncertainty,succinate_uncertainty,fructose_uncertainty
Uniprot Accession,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
P0A8T7,2779.0,7164.0,4503.0,2180.0,2444.0,3316.0,2845.0,2730.0,4780.0,3900.0,...,117.2860,230.3656,470.0568,552.3210,205.0146,227.7264,549.2615,516.4046,454.6780,419.6293
P0A8V2,3957.0,8888.0,5199.0,2661.0,3199.0,3999.0,3749.0,3714.0,5245.0,4388.0,...,343.0518,248.9786,568.7992,517.4073,239.0976,189.4599,559.7193,556.9500,524.2800,409.4013
P36683,7596.0,16600.0,17548.0,22844.0,19491.0,13753.0,10792.0,16395.0,15733.0,20261.0,...,36.6669,75.9024,467.2900,463.3794,207.8103,1458.6066,521.9372,266.5125,822.9287,285.4150
P15254,2456.0,821.0,2339.0,1438.0,2071.0,1959.0,2068.0,2708.0,2285.0,1730.0,...,43.3218,29.5470,170.9446,214.5130,30.4024,135.6148,21.5208,120.7330,56.6248,149.1720
P09831,2859.0,604.0,652.0,1363.0,1281.0,1977.0,2204.0,1691.0,2321.0,1959.0,...,40.1120,7.2834,236.9394,518.0481,213.4832,500.4480,368.1909,312.5928,166.0157,343.7478
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
P36667,3.0,26.0,,2.0,10.0,7.0,3.0,4.0,3.0,1.0,...,,,,,,,,,,
P0AC78,1.0,0.0,0.0,4.0,3.0,2.0,4.0,6.0,9.0,4.0,...,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000
P76164,171.0,2.0,5.0,53.0,23.0,41.0,49.0,56.0,45.0,22.0,...,0.0000,10.7809,15.4707,71.9055,14.3737,5.0820,2.2428,7.2192,2.3332,12.9584
P38506,3.0,6.0,4.0,2.0,3.0,2.0,2.0,3.0,2.0,2.0,...,0.0000,0.0000,0.0000,0.0000,0.7951,0.0000,0.0000,0.0000,0.0000,0.0000


In [6]:
original_data = pd.read_csv(f"{RAW_INTERNAL}/proteomics/protein_values.csv", index_col=0)
original_data

Unnamed: 0_level_0,Description,Gene,Peptides.used.for.quantitation,Confidence.score,Molecular weight (Da),Dataset,Glucose,LB,Glycerol + AA,Acetate,...,Mannose.2,Galactose .2,Succinate.2,Fructose.2,Gene.1,Bnumber,Annotated functional COG groups (letter),Annotated functional COG group (description),Annotated functional COG class,Unnamed: 78
Uniprot Accession,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
P0A8T7,DNA-directed RNA polymerase subunit beta' OS=E...,rpoC,91,6045.53,155045.00800,2,2779,7164,4503.0,2180,...,16.15,22.87,16.45,9.29,rpoC,b3988,K,Transcription,INFORMATION STORAGE AND PROCESSING,
P0A8V2,DNA-directed RNA polymerase subunit beta OS=Es...,rpoB,89,5061.29,150520.27580,2,3957,8888,5199.0,2661,...,13.51,19.75,13.60,7.77,rpoB,b3987,K,Transcription,INFORMATION STORAGE AND PROCESSING,
P36683,Aconitate hydratase 2 OS=Escherichia coli (str...,acnB,67,4505.67,93420.94570,2,7596,16600,17548.0,22844,...,3.16,2.07,4.21,3.25,acnB,b0118,C,Energy production and conversion,METABOLISM,
P15254,Phosphoribosylformylglycinamidine synthase OS=...,purL,65,4277.71,141295.89840,2,2456,821,2339.0,1438,...,1.22,7.69,2.96,6.20,purL,b2557,F,Nucleotide transport and metabolism,METABOLISM,
P09831,Glutamate synthase [NADPH] large chain OS=Esch...,gltB,64,4111.74,163176.31530,2,2859,604,652.0,1363,...,17.71,22.36,10.69,8.91,gltB,b3212,E,Amino acid transport and metabolism,METABOLISM,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
P36667,dTDP-Rha:alpha-D-GlcNAc-pyrophosphate polypren...,wbbL,1,87.30,31022.76240,1,3,26,,2,...,,,,,wbbL,,-,,,
P0AC78,Undecaprenyl-phosphate alpha-N-acetylglucosami...,wecA,1,37.97,40912.09384,2,1,0,0.0,4,...,0.20,0.51,0.64,0.51,wecA,,-,,,
P76164,Uncharacterized protein ydfW in Qin prophage r...,ydfW,1,39.51,8702.81560,2,171,2,5.0,53,...,3.56,30.08,12.28,14.56,ydfW,,-,,,
P38506,Protein Xni OS=Escherichia coli (strain K12) G...,ygdG,1,30.22,28130.36423,2,3,6,4.0,2,...,159.16,109.74,1.20,131.15,ygdG,,-,,,
