# Notebook to get percentages from VMH derived substances/media

In [1]:
import pandas as pd

## Calculate flux_percentages, gram_percentages & difference between the percentages

In [2]:
def get_percentages_for_oil(oil_vmh_tsv: str, oil_tsv: str):
   """Calculate flux_percentages from VMH table for a substance/medium &
   gram_percentages from a table containing weight specifications

   Args:
       - oil_vmh_tsv (str): 
           Table with substance/medium composition from VMH
       - oil_tsv (str): 
           Table with weight specifications
   """
   df = pd.read_csv(oil_vmh_tsv, sep='\t')
   df['exchange'] = df['exchange'].map(lambda e: e.lstrip('EX_').rstrip('(e)'))
   df['exchange'] = df['exchange'].str.replace('\[e\]$', '', regex=True)
   
   overall_flux = df['flux'].sum()
   df['flux_percent'] = df['flux']/overall_flux
   
   artseb_df = pd.read_csv('./artificial_sebum_subset.tsv', sep='\t')
   df_merged = df.merge(artseb_df, left_on='exchange', right_on='VMH', how='left')
   df_merged.rename({'name': 'substance'}, axis=1, inplace=True)
   df_merged.drop(['formula', 'source', 'BiGG', 'SEED', 'MetaNetX', 'VMH', 'KEGG', 'percent', 'exchange'], axis=1, inplace=True)
   
   oil_df = pd.read_csv(oil_tsv, sep='\t')
   oil_final = oil_df.merge(df_merged, on='substance', how='right')
   oil_final['gram_percent'] = oil_final['gram']/100
   oil_final['percent_diff'] = (oil_final['flux_percent'] - oil_final['gram_percent'])*100
   oil_final['percent'] = oil_final[['flux_percent', 'gram_percent']].median(axis=1)
   oil_final.to_csv(oil_tsv, sep='\t', index=False)
   

## Get the percentages for the oils in the artificial sebum definition
(Olive, coconut & cottonseed oil)

In [7]:
get_percentages_for_oil('./olive_oil_100g_vmh.tsv', './olive_oil.tsv')

In [None]:
get_percentages_for_oil('./coconut_oil_100g_vmh.tsv', './Coconut_oil.tsv')

In [11]:
get_percentages_for_oil('./cottonseed_oil_100g_vmh.tsv', './Cottonseed_oil.tsv')

## Combine oil content for artificial sebum
10% olive/ coconut oil means that all single components need to add up to 0.10. </br>
25% cottonseed oil means that all single components need to add up to 0.25.

In [4]:
# Get all relevant dataframes
olive_df = pd.read_csv('./olive_oil.tsv', sep='\t')
olive_df = olive_df[['substance', 'percent']].copy().rename({'percent': 'olive_percent'}, axis=1)
cnut_df = pd.read_csv('./Coconut_oil.tsv', sep='\t')
cnut_df = cnut_df[['substance', 'percent']].copy().rename({'percent': 'coconut_percent'}, axis=1)
cseed_df = pd.read_csv('./Cottonseed_oil.tsv', sep='\t')
cseed_df = cseed_df[['substance', 'percent']].copy().rename({'percent': 'cottonseed_percent'}, axis=1)
artseb_df = pd.read_csv('./artificial_sebum_subset.tsv', sep='\t')

# Merge all three oil dataframes
oils_df = olive_df.merge(cnut_df, on='substance', how='outer')
oils_df = oils_df.merge(cseed_df, on='substance', how='outer')

# Multiply percentages according to the ammount in  1l medium
olive_normalise = 0.1/oils_df['olive_percent'].sum()
oils_df['olive_percent'] = oils_df['olive_percent']*olive_normalise
cnut_normalise = 0.1/oils_df['coconut_percent'].sum()
oils_df['coconut_percent'] = oils_df['coconut_percent']*cnut_normalise
cseed_normalise = 0.25/oils_df['cottonseed_percent'].sum()
oils_df['cottonseed_percent'] = oils_df['cottonseed_percent']*cseed_normalise

# Melt numbers for percent into one column & remove irrelevant columns
mask = oils_df.columns[oils_df.columns.str.endswith('percent')]
oils_df.loc[:, mask] = oils_df.loc[:, mask].fillna(0)
oils_df['oils_percent'] = oils_df['olive_percent'] + oils_df['coconut_percent'] + oils_df['cottonseed_percent']
oils_df.drop(['olive_percent', 'coconut_percent', 'cottonseed_percent'], axis=1, inplace=True)

# Merge oils into artificial sebum
artseb_df_merged = artseb_df.merge(oils_df, left_on='name', right_on='substance', how='outer')

# Merge all percent columns into one & Remove irrelevant columns
mask = artseb_df_merged.columns[artseb_df_merged.columns.str.endswith('percent')]
artseb_df_merged.loc[:, mask] = artseb_df_merged.loc[:, mask].fillna(0)
artseb_df_merged['all_percent'] = artseb_df_merged['percent'] + artseb_df_merged['oils_percent']
artseb_df_merged['percent'] = artseb_df_merged['all_percent']
artseb_df_merged.drop(['oils_percent', 'all_percent', 'substance'], axis=1, inplace=True)
artseb_df_merged = artseb_df_merged.round({'percent': 8})
artseb_df_merged.to_csv('./artificial_sebum_subset.tsv', sep='\t', index=False)
artseb_df_merged

Unnamed: 0,name,formula,percent,source,BiGG,SEED,MetaNetX,VMH,KEGG
0,Squalene,C30H50,0.15,Squalene,sql,cpd00559,MNXM1104376,sql,C00751
1,Cholesterol,C27H46O,0.012,Cholesterol,chsterol,cpd00160,MNXM726122,chsterol,C00187
2,Cholesteryl oleate,C45H78O2,0.024,Cholesterol oleate,HC02023,cpd10340,MNXM730830,HC02023,C14641
3,Hexadecyl palmitate,C32H64O2,0.15,Hexadecyl palmitate,,cpd09641,MNXM9612,,C13821
4,Octadecenoate [Oleic acid],C18H34O2,0.2600312,"Oleic acid, Olive oil, Coconut oil, Cottonseed...",ocdcea,cpd00536,MNXM1107708,ocdcea,C00712
5,Tetradecanoate,C14H27O2,0.06488692,"Myristic acid, Coconut oil, Cottonseed oil",ttdca,cpd03847,MNXM314,ttdca,C06424
6,Dodecanoate,C12H23O2,0.1196771,"Lauric acid, Coconut oil",ddca,cpd01741,MNXM402,ddca,C02679
7,Hexadecanoate [Palmitic acid],C16H32O2,0.2148799,"Palmitic acid, Olive oil, Coconut oil, Cottons...",hdca,cpd00214,MNXM108,hdca,C00249
8,Decanoate,C10H19O2,0.01327186,Coconut oil,dca,cpd01107,MNXM1043,dca,C01571
9,Margarate [Heptadecanoate],C17H33O2,6.368e-05,"Olive oil, Coconut oil",hpdca,cpd24916,MNXM11802,hpdca,
