# Micom Co-Culture Experiments
Notes from Casey: This Jupyter Notebook uses Micom to perform a series of simulated co-culture experiments for bacterial species in the Agora genome-scale metabolic model database. Micom enables us to remove individual organisms from the community (knockouts) to determine the resulting impact on growth rates to the remaining community members. For example, a bacterial community $C$ consists of $microbe_i$ and $microbe_j$. If a knockout of $microbe_i$ from $C$ results in a decreased in growth rate in $microbe_j$, then that would signal a positive influence of $microbe_i$ on $microbe_j$.

Unfortunately, the native `micom` workflow has issues with non terminating jobs, and the results are blocked until all jobs are complete. We must instead convert this into a containerized deployment so we can easily terminate jobs that take too long to run. We also need to implement a queue system that submits simulation conditions (the metabolic database, a set of indices from the database to use as a community, the relative abundances of that community, and a base medium formulation) to a job queue. These results are then stored in a postgres database for later analysis.

In [None]:
#import necessary packages/toosl 
from functools import partial
from itertools import combinations
from micom import Community, data
from micom.qiime_formats import load_qiime_model_db, load_qiime_medium
import numpy as np
import pandas as pd
import warnings

In [1]:
def calc_kos(medium_series, agora_df, indices):
  """
  Calculate the effects of single member knockouts on a community.

  Parameters:
    medium_series (pandas Series) : A pandas series containing the base medium formulation for the simulation.
    agora_df (pandas DataFrame) : The Agora database as a pandas dataframe.
    indices (list of ints) : A list of indices from `agora_df` to use in the community.
  """
 
  tax = agora_df.loc[indices,]
  tax['id'] = tax['id'].apply(str)
    
  try:
    com = Community(tax,
                      progress=False)
    com.medium = medium_series
    ko = com.knockout_taxa(fraction=1.0, 
                             method="relative change", 
                             diag=False,
                             progress=False)
 
  # convert dataframe from wide to long format
    ko['knockout'] = ko.index
    ko = ko.melt(id_vars=["knockout"], 
               var_name="kept", 
               value_name="kept_relative_change")
    ko = ko.dropna()
    ko['error'] = 'None'

  # log errors for failed knockout
  except Exception as e:
    ko = pd.DataFrame({"knockout":list(tax.id),
                       "kept":list(tax.id)[::-1],
                       "kept_relative_change":[np.nan]*2})
    ko['error'] = e

  ko['SampleID'] = "_".join(sorted(list(tax.id)))

  return(ko)


In [None]:
#this function is not necessary, but allows us to derive a medium with metabolites/concentrations between two diets (e.g. intermediate of Western Diet and High Fiber Diet)
def interpolate_fluxes(medium_1, medium_2, n, 
                       flux_col_1='flux', flux_col_2='flux'):
    """
    Interpolates fluxes between two chemical environments.
    
    Parameters:
    medium_1 (pd.DataFrame): DataFrame containing the fluxes for the medium_1.
    medium_2 (pd.DataFrame): DataFrame containing the fluxes for the medium_2.
    n (float): Interpolation factor, fraction of medium_1 (0 <= n <= 1) (e.g. 1 is only medium_1, 0 is only medium_2, 0.5 is halfway between medium_1 and medium_2)
    
    Returns:
    pd.Series: Interpolated fluxes.
    """

    if not (0 <= n <= 1):
        raise ValueError("n must be between 0 and 1")
    
    # Extract the flux series from the dataframes
    medium_1_series = medium_1[flux_col_1]
    medium_2_series = medium_2[flux_col_2]
    
    # Combine the indices of both series and remove duplicates
    all_rxns = list(set(medium_1_series.index).union(set(medium_2_series.index)))
    
    # Ensure indices are unique and create DataFrame with both series
    medium_1_series = medium_1_series[~medium_1_series.index.duplicated(keep='first')]
    medium_2_series = medium_2_series[~medium_2_series.index.duplicated(keep='first')]
    
    combined_df = pd.DataFrame(index=all_rxns)
    combined_df['medium_1_flux'] = medium_1_series.reindex(combined_df.index)
    combined_df['medium_2_flux'] = medium_2_series.reindex(combined_df.index)
    
    # Fill NaN values with 0 (assuming missing flux is 0)
    combined_df = combined_df.fillna(0)
    
    # Linearly interpolate the fluxes
    interpolated_flux = (1 - n) * combined_df['medium_1_flux'] + n * combined_df['medium_2_flux']
    return interpolated_flux

In [None]:
warnings.filterwarnings("ignore", category=FutureWarning)

WD_FP = "data/diets/western_diet_gut.qza"
AG_FP = "data/diets/vmh_high_fiber_agora.qza"
MODEL_FP = "data/models/agora103_species.qza"
HITS_INDEX_FP = "data/TLC_sig_interactions_taxonomy/Hits_of_interest_agora_index_values.csv"


agora_df = load_qiime_model_db(MODEL_FP, extract_path = "./data/models")
#test_agora_df = pd.read_csv("data/models/4ce8f8cf-98ea-4f45-bd80-86bf36efb506/provenance/action/meta.tsv", sep="\t")


# western diet
wd = load_qiime_medium(WD_FP)
# agrarian die
ag = load_qiime_medium(AG_FP)

# interpolate between the two diets
medium_series = interpolate_fluxes(wd, ag, 0)

# read in csv containing two rows with indices corresponding to agora_df for each bacterial species of interest
hits_indices_df = pd.read_csv(HITS_INDEX_FP)

# iterate through list of indices assigned to each hit of interest
index_list_wd = []
for index, row in hits_indices_df.iterrows():
    index_1 = row['j_AGORA_df_index']
    index_2 = row['i_AGORA_df_index']

    #check if either index_1 or index_2 is missing and skip if it is
    if pd.isna(index_1) or pd.isna(index_2):
        continue
    
    else:
        # call calc_kos to calculate the values for each j/i pair
        ko_results_wd = calc_kos(medium_series, agora_df, [index_1, index_2])

        # append the results to the list
        index_list_wd.append(ko_results_wd)

# concatenate all results into a single df 
hits_of_interest_ko_results_wd = pd.concat(index_list_wd, ignore_index=True)


In [None]:
xwarnings.filterwarnings("ignore", category=FutureWarning)

WD_FP = "data/diets/western_diet_gut.qza"
AG_FP = "data/diets/vmh_high_fiber_agora.qza"
MODEL_FP = "data/models/agora103_species.qza"
HITS_INDEX_FP = "data/TLC_sig_interactions_taxonomy/Hits_of_interest_agora_index_values.csv"


agora_df = load_qiime_model_db(MODEL_FP, extract_path = "./data/models")
#test_agora_df = pd.read_csv("data/models/4ce8f8cf-98ea-4f45-bd80-86bf36efb506/provenance/action/meta.tsv", sep="\t")


# western diet
wd = load_qiime_medium(WD_FP)
# agrarian die
ag = load_qiime_medium(AG_FP)

# interpolate between the two diets 
#how to I change this number 
medium_series = interpolate_fluxes(wd, ag, 0)

# read in csv containing two rows with indices corresponding to agora_df for each bacterial species of interest
hits_indices_df = pd.read_csv(HITS_INDEX_FP)

# iterate through list of indices assigned to each hit of interest
index_list = []
for index, row in hits_indices_df.iterrows():
    index_1 = row['j_AGORA_df_index']
    index_2 = row['i_AGORA_df_index']

    #check if either index_1 or index_2 is missing and skip if it is
    if pd.isna(index_1) or pd.isna(index_2):
        continue

    # call calc_kos to calculate the values for each j/i pair
    ko_results = calc_kos(medium_series, agora_df, [index_1, index_2])

    # append the results to the list
    index_list.append(ko_results)

# concatenate all results into a single df 
hits_of_interest_ko_results = pd.concat(index_list, ignore_index=True)

In [None]:
agora_df["idx_lab"] = agora_df.index

hits_of_interest_ko_results = hits_of_interest_ko_results_wd.merge(agora_df[["species", "idx_lab"]], left_on='knockout', right_on='species')
hits_of_interest_ko_results = hits_of_interest_ko_results_wd.merge(agora_df[["species", "idx_lab"]], left_on='kept', right_on='species') 

