In [1]:
import pathlib

import pandas as pd

## Create media files for those from the carbon source screen

1. First we downloaded media and carbon source screen information for the AT leaf models from the AT-LSPHERE [GitHub repository](https://github.com/VorholtLab/i-At-LSPHERE)
2. Next, we parsed the `.mat` files into `.csv` files and obtained `nutrients.csv`, `vitamins.csv`, `minimal_media.csv`, `csource_screen.csv`
3. Finally, we mapped these metabolites (BIGG ids) to cpd ids (MODELSEED)


In [2]:
all_media_headers = [
    "compounds",
    "name",
    "formula",
    "minFlux",
    "maxFlux",
    "concentration",
]
media_headers = ["compounds", "minFlux", "maxFlux", "concentration"]

Tested that `R2A_M_necessary.tsv` media file with only the necessary headers work with KBase media import


In [3]:
input_folder = pathlib.Path("../../data/raw/at_leaf/media/")
media_folder = pathlib.Path("../../data/processed/at_leaf/media/")

In [4]:
base_minimal_media_df = pd.read_csv(input_folder / "minimal_media.csv")
base_minimal_media_df

Unnamed: 0,compound,cpd_ids
0,ca2[e],cpd29674
1,cl[e],cpd00099
2,cobalt2[e],cpd00149
3,cu2[e],cpd00058
4,fe2[e],cpd10515
5,fe3[e],cpd10516
6,h2o[e],cpd15275
7,h[e],cpd00067
8,k[e],cpd00205
9,mg2[e],cpd00254


In [5]:
vitamins_df = pd.read_csv(input_folder / "vitamins.csv")
vitamins_df

Unnamed: 0,Name,ID,type,cpd_ids
0,Thiamin,thm[e],Vitamin,cpd00305
1,Biotin,btn[e],Vitamin,cpd00104
2,Riboflavin,ribflv[e],Vitamin,cpd00220
3,Pyridoxal,pydx[e],Vitamin,cpd00215
4,Cobalamin,cbl1[e],Vitamin,cpd00635
5,Lipoate,lipoate[e],Vitamin,cpd00541
6,Folate,fol[e],Vitamin,cpd00393
7,Pantothenate,pnto__R[e],Vitamin,cpd00644
8,Nicotinate,nac[e],Vitamin,cpd00218
9,4-Aminobenzoate,4abz[e],Vitamin,cpd00443


In [6]:
base_minimal_media = list(base_minimal_media_df.cpd_ids)
vitamins = list(vitamins_df.cpd_ids)

We create the list of media components by combining the base minimal media components with the vitamin list.
This is the list of media components that are common to all media in the carbon source screen.


In [7]:
minimal_media_wo_csource = base_minimal_media + vitamins

In [8]:
media_data = []
for media_component in base_minimal_media + vitamins:
    media_item = {
        "compounds": media_component,
        "minFlux": -100,
        "maxFlux": 100,
        "concentration": 0.001,
    }
    media_data.append(media_item)
minimal_media_wo_csource = pd.DataFrame(media_data)
minimal_media_wo_csource

Unnamed: 0,compounds,minFlux,maxFlux,concentration
0,cpd29674,-100,100,0.001
1,cpd00099,-100,100,0.001
2,cpd00149,-100,100,0.001
3,cpd00058,-100,100,0.001
4,cpd10515,-100,100,0.001
5,cpd10516,-100,100,0.001
6,cpd15275,-100,100,0.001
7,cpd00067,-100,100,0.001
8,cpd00205,-100,100,0.001
9,cpd00254,-100,100,0.001


In [9]:
# Read nutrient list and drop xylan 4, 8 and 12
nutrients_df = pd.read_csv(input_folder / "nutrients.csv").drop([46, 47, 48], axis=0)

In [10]:
nutrient_info = {
    row.ID: {
        "name": row.Name,
        "cpd_id": row.cpd_ids,
    }
    for _, row in nutrients_df.iterrows()
}

We read the carbon source screen file and create a media file for each carbon source


In [11]:
csource_screen_df = pd.read_csv(input_folder / "csource_screen.csv", index_col=0).drop(
    "xylan4[e]", axis=1
)
csource_screen_df.index = csource_screen_df.index.str.replace("Leaf", "L")
csource_screen_df

Unnamed: 0,h2o[e],malt[e],his__L[e],galur[e],trp__L[e],tyr__L[e],glcn[e],inost[e],arab__L[e],glu__L[e],...,cys__L[e],mma[e],cellb[e],asn__L[e],pro__L[e],oxa[e],sucr[e],gal[e],lys__L[e],phe__L[e]
L1,0,1,1,0,0,0,1,0,0,1,...,0,0,1,1,0,0,1,1,0,0
L2,0,0,0,1,0,0,0,0,0,0,...,0,0,1,0,1,0,0,0,0,0
L3,0,1,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,1,1,0,0
L4,0,1,0,1,0,1,0,0,0,1,...,0,0,1,0,1,0,1,1,0,0
L5,0,1,0,0,0,0,1,0,0,1,...,0,0,1,0,1,0,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
L159,0,1,0,0,0,1,1,0,0,1,...,0,0,1,0,0,0,1,1,0,0
L88,1,0,0,0,0,0,0,0,0,1,...,0,0,0,1,0,1,0,0,0,0
L89,1,0,0,0,0,0,0,0,0,1,...,0,0,0,1,0,0,0,0,0,0
L106,1,0,0,0,0,0,1,0,0,1,...,0,0,0,1,0,1,0,0,0,0


In [20]:
for csource in csource_screen_df.columns:
    csource_name = nutrient_info[csource]["name"]
    csource_id = nutrient_info[csource]["cpd_id"]
    if csource_id in set(minimal_media_wo_csource.compounds):
        continue
    csource_data = {
        "compounds": csource_id,
        "minFlux": -100,
        "maxFlux": 100,
        "concentration": 0.001,
    }
    csource_row = pd.DataFrame([csource_data])
    minimal_media = pd.concat([minimal_media_wo_csource, csource_row], ignore_index=True)  # type: ignore
    media_file = media_folder / f"{csource_name}.tsv"
    minimal_media.to_csv(media_file, sep="\t", index=False)

cpd15275


## Create mapping between experimental microbes and the carbon sources in which they grow


We identify the subset of microbes for which we have experimental data from Table S5


In [13]:
# Obtained from at_leaf/data_analysis.ipynb
exp_leafids = [
    "L145",
    "L15",
    "L154",
    "L164",
    "L179",
    "L202",
    "L233",
    "L257",
    "L304",
    "L34",
    "L8",
]

In [21]:
len(exp_leafids)

11

In [14]:
import numpy as np

In [24]:
microbe_csource_map = {}

for ind, growth_media_inds in csource_screen_df.apply(np.flatnonzero, axis=1).items():
    growth_media_ids = csource_screen_df.columns[growth_media_inds]
    growth_media_names = [
        nutrient_info[media_id]["name"] for media_id in growth_media_ids
    ]
    if ind in exp_leafids:
        microbe_csource_map[ind] = growth_media_names

# Add R2A_M to all microbes
for media_list in microbe_csource_map.values():
    media_list.append("R2A_M")

In [25]:
microbe_csource_map

{'L8': ['Maltose',
  'D-gluconate',
  'Myo-inositol',
  'D-xylose',
  'D-mannitol',
  'Fructose',
  'D-mannose',
  'D-glucose',
  'Trehalose',
  'Cellobiose',
  'Sucrose',
  'D-galactose',
  'R2A_M'],
 'L15': ['L-histidine',
  'D-galacturonate',
  'L-tryptophan',
  'L-tyrosine',
  'D-gluconate',
  'Myo-inositol',
  'L-glutamate',
  'L-glutamine',
  'L-arginine',
  'D-xylose',
  'D-mannitol',
  'Glycerol',
  'Succinate',
  'Coniferol',
  'L-leucine',
  'Fructose',
  'D-mannose',
  'L-ornithine',
  'L-valine',
  'L-aspartate',
  'D-glucose',
  'L-serine',
  'L-isoleucine',
  'L-alanine',
  'Acetate',
  'Trehalose',
  'Pyruvate',
  'L-asparagine',
  'L-proline',
  'Sucrose',
  'D-galactose',
  'L-phenylalanine',
  'R2A_M'],
 'L34': ['Maltose',
  'L-glutamate',
  'D-xylose',
  'Succinate',
  'Fructose',
  'D-mannose',
  'D-glucose',
  'L-isoleucine',
  'L-alanine',
  'Acetate',
  'Trehalose',
  'Pyruvate',
  'Cellobiose',
  'L-proline',
  'Sucrose',
  'D-galactose',
  'R2A_M'],
 'L154': ['