In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
from scipy.stats import norm

import os
import time
import itertools

from glove.model import *

# Import data

In [2]:
# import file names
files = os.listdir("data/")
files = [f for f in files if "processed" in f and "passage" not in f]

# Design experiments for each strain

In [3]:
def gen_exp_name(species_names):
    exp_name = ""
    for s in species_names:
        exp_name += s
        exp_name += "-"
    return exp_name[:-1]

In [4]:
# define possible evaluation times 
t_eval = np.array([0, 24])

In [5]:
exp_names = []
for file in files:
    # import data
    df = pd.read_csv(f"data/{file}")

    # determine name of strain
    strain = file.split("_")[1]
    
    # make sure that experiment names are sorted alphabetically 
    exp_names = [gen_exp_name(np.sort(name.split("_")[0].replace(strain, "CD").split("-"))) for name in df.Treatments.values]
    df['Treatments'] = exp_names
    
    # determine species names 
    species = df.columns.values[2:]

    # instantiate gLV fit 
    model = gLV(species, df)

    # fit to data 
    model.fit()

    ### design experiment ###

    # create matrix of all possible communities
    dim = len(species)
    Xlist = [np.reshape(np.array(i), (1, dim)) for i in itertools.product([0, 1], repeat = dim)]
    # remove all zeros community
    X = np.array(np.concatenate(Xlist)[1:, :][::-1], float)
    # exclude mono cultures
    non_mono_inds = np.sum(X, 1) > 1
    X = X[non_mono_inds]

    # scale initial conditions 
    total_OD = .01 
    X = total_OD * np.einsum("ij,i->ij", X, 1/np.sum(X, 1))

    # generate design matrix 
    design_df = pd.DataFrame()
    for i,x in enumerate(X):
        exp_name = gen_exp_name(np.sort(species[x>0]))
        if exp_name not in exp_names:
            # eval time [0, 24]
            x_mat = np.empty([2, dim])
            x_mat[:] = np.nan
            x_mat[0] = x
            df_exp = pd.DataFrame()
            df_exp['Treatments'] = 2*[exp_name]
            df_exp['Time'] = t_eval
            df_exp[species] = x_mat
            design_df = pd.concat((design_df, df_exp))
            
    # remove samples that have already been collected 
    prev_exp = np.unique(df.Treatments.values)
    dup_inds = np.in1d(design_df.Treatments.values, prev_exp)
    design_df = design_df.iloc[~dup_inds].copy()

    # determine best set of new experiments to collect 
    # N is the total number of measurements (corresponds to number of wells in 96 well plate)
    new_exp = model.design(design_df, N=31)
    exp_names += new_exp

    # new data to collect
    inds = np.in1d(design_df.Treatments.values, new_exp)
    new_df = design_df.iloc[inds].copy()

    # save design
    fname = f"designs/" + strain + "_design.csv"
    new_df.to_csv(fname, index=False)

Total samples: 38, Initial regularization: 1.00e-03


No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)


Loss: 9.201, Residuals: -0.311
Loss: 5.629, Residuals: 0.134
Loss: 5.099, Residuals: 0.124
Loss: 4.127, Residuals: 0.090
Loss: 3.579, Residuals: 0.072
Loss: 2.831, Residuals: 0.007
Loss: 2.577, Residuals: 0.028
Loss: 2.387, Residuals: 0.021
Loss: 2.194, Residuals: -0.021
Loss: 2.160, Residuals: -0.013
Loss: 2.108, Residuals: -0.019
Loss: 2.107, Residuals: -0.009
Loss: 2.066, Residuals: -0.021
Loss: 2.065, Residuals: -0.016
Loss: 2.054, Residuals: -0.019
Loss: 2.034, Residuals: -0.025
Loss: 2.026, Residuals: -0.022
Loss: 2.013, Residuals: -0.027
Loss: 1.992, Residuals: -0.037
Loss: 1.992, Residuals: -0.038
Loss: 1.991, Residuals: -0.038
Loss: 1.990, Residuals: -0.038
Loss: 1.989, Residuals: -0.038
Loss: 1.986, Residuals: -0.038
Loss: 1.981, Residuals: -0.040
Loss: 1.979, Residuals: -0.041
Loss: 1.979, Residuals: -0.041
Loss: 1.974, Residuals: -0.043
Loss: 1.974, Residuals: -0.043
Optimization terminated successfully.
Evidence -40.170
Updating hyper-parameters...
Total samples: 38, Updat

Evidence 305.363
Updating hyper-parameters...
Total samples: 31, Updated regularization: 2.70e+00
Loss: 70.743, Residuals: 0.042
Loss: 70.223, Residuals: 0.038
Loss: 69.790, Residuals: 0.034
Loss: 69.725, Residuals: 0.033
Loss: 69.694, Residuals: 0.035
Loss: 69.658, Residuals: 0.034
Loss: 69.652, Residuals: 0.033
Loss: 69.651, Residuals: 0.034
Loss: 69.649, Residuals: 0.034
Optimization terminated successfully.
Evidence 377.055
Updating hyper-parameters...
Total samples: 31, Updated regularization: 2.88e+00
Loss: 98.476, Residuals: 0.042
Optimization terminated successfully.
Evidence 400.956
Updating hyper-parameters...
Total samples: 31, Updated regularization: 3.15e+00
Loss: 112.562, Residuals: 0.018
Loss: 112.100, Residuals: 0.019
Loss: 111.629, Residuals: 0.017
Loss: 111.548, Residuals: 0.016
Loss: 111.521, Residuals: 0.016
Loss: 111.498, Residuals: 0.016
Loss: 111.480, Residuals: 0.016
Loss: 111.478, Residuals: 0.016
Loss: 111.476, Residuals: 0.016
Optimization terminated successf

Processing design dataframe...
Computing sensitivies...
Picked BU-BV-CA-CH-CS
Picked BV-CA-CS-DP
Picked BT-BU-BV-CA-CH-CS
Picked BT-BU-CA-CH-CS-DP
Picked BV-CA-CH-CS
Picked BV-CA-CH-CS-DP
Picked BU-CA-CD-CH-CS
Picked BV-CA-CD-CH-CS
Picked BU-BV-CH-CS
Picked duplicate!
Picked BU-BV-CA-CS
Picked BT-BU-BV-CA-CH
Picked BU-BV-CA-CS-DP
Picked duplicate!
Picked duplicate!
Picked BU-BV-CA-CH
Picked duplicate!
Picked duplicate!
Picked BU-CA-CH-CS
Picked duplicate!
Picked BT-BU-CA-CH-CS
Picked duplicate!
Picked duplicate!
Picked BV-CA-DP
Picked duplicate!
Picked duplicate!
Picked BU-CA-CD-CH-CS-DP
Picked duplicate!
Picked duplicate!
Picked duplicate!
Picked BV-CH-CS
Picked duplicate!
Picked duplicate!
Picked duplicate!
Picked BT-BV-CA-CS-DP
Picked duplicate!
Picked duplicate!
Picked duplicate!
Picked CA-CS-DP
Picked duplicate!
Picked duplicate!
Picked duplicate!
Picked duplicate!
Picked duplicate!
Picked duplicate!
Picked BV-CA-CH
Picked duplicate!
Picked duplicate!
Picked duplicate!
Picked dupl

In [6]:
for file in files:
    fname = f"designs/" + file.split("_")[1] + "_design.csv"
    eval_times = pd.read_csv(fname)['Time'].values
    print(fname, sum(eval_times>0))

designs/MS001_design.csv 31
designs/MS014_design.csv 31
designs/MS008_design.csv 31
designs/DSM_design.csv 31
