In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
from scipy.stats import norm

import os
import time
import itertools

from glove.model import *

# Import data

In [2]:
# import file names
files = os.listdir("data/")
files = [f for f in files if "processed" in f and "passage" not in f]


# Design experiments for each strain

In [3]:
def gen_exp_name(species_names):
    exp_name = ""
    for s in species_names:
        exp_name += s
        exp_name += "-"
    return exp_name[:-1]

In [4]:
# define possible evaluation times 
t_eval = np.array([0, 24])

In [5]:
exp_names = []
for file in files:
    # import data
    df = pd.read_csv(f"data/{file}")

    # determine species names 
    species = df.columns.values[2:]

    # instantiate gLV fit 
    model = gLV(species, df)

    # fit to data 
    model.fit()

    ### design experiment ###

    # create matrix of all possible communities
    dim = len(species)
    Xlist = [np.reshape(np.array(i), (1, dim)) for i in itertools.product([0, 1], repeat = dim)]
    # remove all zeros community
    X = np.array(np.concatenate(Xlist)[1:, :][::-1], float)
    # exclude mono cultures
    non_mono_inds = np.sum(X, 1) > 1
    X = X[non_mono_inds]

    # scale initial conditions 
    total_OD = .01 
    X = total_OD * np.einsum("ij,i->ij", X, 1/np.sum(X, 1))

    # generate design matrix 
    design_df = pd.DataFrame()
    for i,x in enumerate(X):
        exp_name = gen_exp_name(species[x>0])
        if exp_name not in exp_names:
            # eval time [0, 24]
            x_mat = np.empty([2, dim])
            x_mat[:] = np.nan
            x_mat[0] = x
            df_exp = pd.DataFrame()
            df_exp['Treatments'] = 2*[exp_name]
            df_exp['Time'] = t_eval
            df_exp[species] = x_mat
            design_df = pd.concat((design_df, df_exp))
            
    # remove samples that have already been collected 
    strain = file.split("_")[1]
    prev_exp = np.array([exp.split("_")[0].replace(strain, "CD") for exp in np.unique(df.Treatments.values) if "Mono" not in exp])
    dup_inds = np.in1d(design_df.Treatments.values, prev_exp)
    design_df = design_df.iloc[~dup_inds].copy()

    # determine best set of new experiments to collect 
    # N is the total number of measurements (corresponds to number of wells in 96 well plate)
    new_exp = model.design(design_df, N=31)
    exp_names += new_exp

    # new data to collect
    inds = np.in1d(design_df.Treatments.values, new_exp)
    new_df = design_df.iloc[inds].copy()

    # save design
    fname = f"designs/" + strain + "_design.csv"
    new_df.to_csv(fname, index=False)

No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)


Total samples: 38, Initial regularization: 1.00e-03
Loss: 9.201, Residuals: -0.311
Loss: 5.629, Residuals: 0.134
Loss: 5.099, Residuals: 0.124
Loss: 4.127, Residuals: 0.090
Loss: 3.578, Residuals: 0.072
Loss: 2.824, Residuals: 0.015
Loss: 2.538, Residuals: 0.000
Loss: 2.257, Residuals: -0.017
Loss: 2.222, Residuals: 0.010
Loss: 2.164, Residuals: -0.002
Loss: 2.151, Residuals: 0.001
Loss: 2.127, Residuals: -0.006
Loss: 2.088, Residuals: -0.018
Loss: 2.087, Residuals: -0.011
Loss: 2.055, Residuals: -0.023
Loss: 2.044, Residuals: -0.020
Loss: 2.025, Residuals: -0.029
Loss: 2.024, Residuals: -0.020
Loss: 2.013, Residuals: -0.024
Loss: 2.013, Residuals: -0.025
Loss: 2.003, Residuals: -0.030
Loss: 2.002, Residuals: -0.029
Loss: 1.996, Residuals: -0.033
Loss: 1.987, Residuals: -0.037
Loss: 1.985, Residuals: -0.037
Loss: 1.985, Residuals: -0.037
Loss: 1.979, Residuals: -0.040
Loss: 1.979, Residuals: -0.040
Optimization terminated successfully.
Evidence -40.159
Updating hyper-parameters...
Tota

Loss: 12.963, Residuals: 0.043
Optimization terminated successfully.
Evidence 171.313
Updating hyper-parameters...
Total samples: 31, Updated regularization: 1.35e+00
Loss: 36.292, Residuals: 0.041
Loss: 35.975, Residuals: 0.042
Loss: 35.766, Residuals: 0.045
Loss: 35.676, Residuals: 0.046
Loss: 35.622, Residuals: 0.044
Loss: 35.590, Residuals: 0.044
Loss: 35.581, Residuals: 0.044
Loss: 35.511, Residuals: 0.045
Loss: 35.471, Residuals: 0.048
Loss: 35.470, Residuals: 0.047
Optimization terminated successfully.
Evidence 306.194
Updating hyper-parameters...
Total samples: 31, Updated regularization: 2.40e+00
Loss: 70.490, Residuals: 0.039
Loss: 70.022, Residuals: 0.037
Loss: 69.702, Residuals: 0.031
Loss: 69.648, Residuals: 0.033
Loss: 69.625, Residuals: 0.033
Loss: 69.592, Residuals: 0.032
Loss: 69.590, Residuals: 0.032
Optimization terminated successfully.
Evidence 377.961
Updating hyper-parameters...
Total samples: 31, Updated regularization: 2.74e+00
Loss: 98.628, Residuals: 0.040
Opt

Loss: 149.319, Residuals: 0.010
Loss: 149.249, Residuals: 0.010
Loss: 149.241, Residuals: 0.010
Loss: 149.231, Residuals: 0.010
Loss: 149.230, Residuals: 0.010
Optimization terminated successfully.
Evidence 499.961
Updating hyper-parameters...
Total samples: 38, Updated regularization: 2.25e+00
Loss: 149.860, Residuals: 0.010
Loss: 149.823, Residuals: 0.010
Loss: 149.817, Residuals: 0.010
Loss: 149.808, Residuals: 0.010
Loss: 149.808, Residuals: 0.010
Optimization terminated successfully.
Evidence 500.502
Updating hyper-parameters...
Total samples: 38, Updated regularization: 2.22e+00
Loss: 150.169, Residuals: 0.009
Loss: 150.147, Residuals: 0.010
Loss: 150.133, Residuals: 0.009
Loss: 150.133, Residuals: 0.010
Optimization terminated successfully.
Evidence 500.882
Pass count  1
Processing design dataframe...
Computing sensitivies...
Picked BT-BV-CH-CS-CA-DP
Picked BV-CH-DP
Picked BT-CH-CS-CA
Picked CH-BU-CA
Picked BT-BV-CH-BU-DP
Picked CH-BU-CS
Picked BT-BV-BU-CS-CA
Picked BT-BV-CH-CA-

Total samples: 38, Updated regularization: 1.29e+00
Loss: 148.442, Residuals: 0.008
Loss: 148.034, Residuals: 0.010
Loss: 147.993, Residuals: 0.009
Loss: 147.651, Residuals: 0.011
Loss: 147.641, Residuals: 0.011
Optimization terminated successfully.
Evidence 486.618
Updating hyper-parameters...
Total samples: 38, Updated regularization: 1.37e+00
Loss: 148.940, Residuals: 0.016
Loss: 148.838, Residuals: 0.011
Loss: 148.655, Residuals: 0.012
Loss: 148.394, Residuals: 0.014
Loss: 148.391, Residuals: 0.015
Optimization terminated successfully.
Evidence 488.530
Updating hyper-parameters...
Total samples: 38, Updated regularization: 1.42e+00
Loss: 149.293, Residuals: 0.016
Loss: 149.205, Residuals: 0.014
Loss: 149.073, Residuals: 0.016
Loss: 149.071, Residuals: 0.017
Optimization terminated successfully.
Evidence 489.540
Updating hyper-parameters...
Total samples: 38, Updated regularization: 1.43e+00
Loss: 149.701, Residuals: 0.017
Loss: 149.642, Residuals: 0.015
Loss: 149.637, Residuals: 0.

In [6]:
for file in files:
    fname = f"designs/" + file.split("_")[1] + "_design.csv"
    eval_times = pd.read_csv(fname)['Time'].values
    print(fname, sum(eval_times>0))

designs/MS001_design.csv 31
designs/MS014_design.csv 31
designs/MS008_design.csv 31
designs/DSM_design.csv 31
