In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
from scipy.stats import norm

import os
import time
import itertools

from glove.model import *

In [2]:
# import file names
files = os.listdir("data/")
strains = ["DSM", "MS008", "MS014", "Strain292", "Strain296"]

# define names of species
species = ['BA', 'BO', 'BU', 'PJ', 'ER', 'AC', 
           'CC','CD', 'DP', 'EL', 'BY', 'CG', 
           'CH', 'BL', 'BF', 'BT', 'BV', 'DL',
           'BH', 'RI', 'BP', 'PC', 'DF', 'CA', 
           'BC', 'CS']

In [3]:
files

['DSM_processed_withmono.csv',
 'MS008_processed_withmono.csv',
 'MS014_processed_withmono.csv',
 'Strain292_processed_withmono.csv',
 'Strain296_processed_withmono.csv',
 'Universal_processed_withmono.csv']

# fit gLV models

In [4]:
exp_names = []
for strain in strains[1:]:
    # import data
    df_universal = pd.read_csv("data/Universal_processed_withmono.csv")
    df_strain = pd.concat([pd.read_csv(f"data/{fname}") for fname in files if strain in fname])
    df = pd.concat((df_universal, df_strain))
    
    # rename treatments with replicate num
    treatment_names = [t+"_"+str(rep) for t, rep in zip(df["Treatment"].values, df["Replicate"].values)]    
    df.sort_values(by=["Treatment", "Time"], inplace=True)
    df.rename(columns={"Treatment":"Treatments"}, inplace=True)

    # instantiate gLV fit 
    model = gLV(species, df)

    # fit to data 
    model.fit()
    
    # list of parameter names 
    param_names = []
    for s1 in species:
        for s2 in species:
            param_names += [s1+"*"+s2]
    param_names = list(species) + param_names
    
    # plot parameter distribution
    n_species = len(species)
    Avec = model.params[n_species:]
    Aij_std = np.sqrt(np.diag(model.Ainv))[n_species:]

    '''plt.figure(figsize=(18,18))
    # set counter for parameter std. 
    k = 0

    for i in range(n_species):
        for j in range(n_species):
            plt.subplot(n_species, n_species, k+1)
            a = np.linspace(Avec[k]-np.std(Avec), Avec[k]+np.std(Avec))
            plt.plot(a, norm.pdf(a,Avec[k],Aij_std[k]))
            plt.axvline(x=0, c='k', alpha=.5)
            k += 1
            if j == 0:
                plt.ylabel(species[i], fontsize=18)
            if i == n_species-1:
                plt.xlabel(species[j], fontsize=18)
            #plt.xlim([-2,2])

    plt.suptitle(strain, fontsize=24)
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.savefig("figures/"+strain+".pdf")
    plt.close()'''
    
    # compute Wald test for each parameter
    std_errors = np.sqrt(np.diag(model.Ainv))
    walds = model.params/std_errors
    wald_p_vals = 2*norm.cdf(-np.abs(walds))

    # save to df 
    df = pd.DataFrame()
    df["Param name"] = param_names
    df["Param value"] = model.params
    df["Param stdv"]  = np.sqrt(np.diag(model.Ainv))
    df["Param p-value"] = wald_p_vals
    df = pd.concat([df, pd.DataFrame(columns=param_names, data=model.Ainv)], axis=1)
    df.to_csv("params/"+strain+".csv", index=False)

No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)


Total samples: 99, Initial regularization: 1.00e-03
Loss: 91.057, Residuals: -0.922
Loss: 72.280, Residuals: -0.948
Loss: 64.886, Residuals: -1.413
Loss: 63.090, Residuals: -1.464
Loss: 60.330, Residuals: -1.476
Loss: 59.670, Residuals: -1.371
Loss: 58.496, Residuals: -1.399
Loss: 58.273, Residuals: -1.373
Loss: 57.871, Residuals: -1.385
Loss: 57.250, Residuals: -1.396
Loss: 57.222, Residuals: -1.393
Loss: 57.003, Residuals: -1.391
Loss: 56.965, Residuals: -1.366
Loss: 56.892, Residuals: -1.371
Loss: 56.761, Residuals: -1.380
Loss: 56.757, Residuals: -1.382
Loss: 56.612, Residuals: -1.392
Loss: 56.609, Residuals: -1.386
Loss: 56.495, Residuals: -1.394
Loss: 56.494, Residuals: -1.394
Optimization terminated successfully.
Evidence -167.567
Updating hyper-parameters...
Total samples: 99, Updated regularization: 1.02e+00
Loss: 362.669, Residuals: -1.395
Optimization terminated successfully.
Evidence 3652.242
Updating hyper-parameters...
Total samples: 99, Updated regularization: 1.06e+00
L

Evidence 5093.263
Updating hyper-parameters...
Total samples: 99, Updated regularization: 1.06e+00
Loss: 1234.540, Residuals: -1.163
Optimization terminated successfully.
Evidence 5220.902
Updating hyper-parameters...
Total samples: 99, Updated regularization: 1.09e+00
Loss: 1255.214, Residuals: -1.165
Loss: 1254.332, Residuals: -1.162
Optimization terminated successfully.
Evidence 5233.010
Updating hyper-parameters...
Total samples: 99, Updated regularization: 1.11e+00
Loss: 1262.642, Residuals: -1.162
Optimization terminated successfully.
Evidence 5237.994
Pass count  1


In [5]:
df

Unnamed: 0,Param name,Param value,Param stdv,Param p-value,BA,BO,BU,PJ,ER,AC,...,CS*BV,CS*DL,CS*BH,CS*RI,CS*BP,CS*PC,CS*DF,CS*CA,CS*BC,CS*CS
0,BA,0.487680,0.015165,6.650098e-227,2.299691e-04,-7.493688e-07,3.126438e-06,5.223333e-09,-1.128923e-06,2.058439e-06,...,4.442064e-06,-7.147605e-07,1.089745e-07,7.436448e-08,2.130356e-07,6.514188e-08,-3.885328e-07,3.653031e-08,5.625532e-07,-2.370606e-07
1,BO,0.605301,0.012791,0.000000e+00,-7.493688e-07,1.636199e-04,-2.420940e-07,-6.694026e-10,1.873122e-07,6.703133e-06,...,1.002237e-06,-1.859628e-06,8.267831e-09,6.976543e-08,4.693362e-07,1.484938e-07,2.133828e-06,2.460335e-08,-7.765987e-07,3.086324e-08
2,BU,0.485129,0.022274,3.611067e-105,3.126438e-06,-2.420940e-07,4.961454e-04,3.609088e-09,-5.784074e-07,1.156719e-06,...,6.164378e-06,-3.767978e-06,1.362139e-07,2.168609e-07,1.282700e-06,3.762640e-07,3.965208e-06,7.548419e-08,1.138431e-06,-1.450912e-07
3,PJ,0.154275,0.003202,0.000000e+00,5.223333e-09,-6.694026e-10,3.609088e-09,1.025305e-05,-1.348516e-09,-3.025613e-10,...,1.590569e-08,7.057094e-09,7.169798e-10,3.152432e-10,1.710952e-09,3.458197e-10,3.665895e-09,1.315768e-10,1.251958e-08,-4.653870e-10
4,ER,0.042354,0.004213,8.895512e-24,-1.128923e-06,1.873122e-07,-5.784074e-07,-1.348516e-09,1.774963e-05,7.446175e-07,...,5.306784e-07,1.095036e-06,3.523031e-08,-1.131264e-08,-9.642536e-08,-3.087016e-08,7.308251e-08,3.863703e-09,-5.884100e-07,4.984901e-08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
697,CS*PC,-0.097120,0.999882,9.226216e-01,6.514188e-08,1.484938e-07,3.762640e-07,3.458197e-10,-3.087016e-08,-1.746747e-06,...,1.200751e-03,2.430799e-03,4.162147e-05,-1.024598e-04,-1.077812e-03,9.997635e-01,-5.069405e-03,-9.337306e-06,-1.494726e-03,3.333052e-06
698,CS*DF,-0.093351,0.707846,8.950789e-01,-3.885328e-07,2.133828e-06,3.965208e-06,3.665895e-09,7.308251e-08,-3.554975e-05,...,1.749735e-02,1.992934e-02,4.983331e-05,-1.724041e-03,-1.939326e-02,-5.069405e-03,5.010464e-01,-1.350965e-04,-4.263864e-02,6.636873e-05
699,CS*CA,-0.100026,1.000493,9.203632e-01,3.653031e-08,2.460335e-08,7.548419e-08,1.315768e-10,3.863703e-09,3.111897e-08,...,-4.244515e-04,-1.083510e-04,-1.585264e-05,-7.715620e-06,-3.983369e-05,-9.337306e-06,-1.350965e-04,1.000987e+00,-1.250326e-04,1.154874e-06
700,CS*BC,-0.003646,0.321769,9.909589e-01,5.625532e-07,-7.765987e-07,1.138431e-06,1.251958e-08,-5.884100e-07,5.355931e-06,...,-1.601278e-02,-3.526258e-02,-1.664964e-03,-6.476417e-04,-7.710071e-03,-1.494726e-03,-4.263864e-02,-1.250326e-04,1.035350e-01,7.290659e-05
