In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
from scipy.stats import norm, pearsonr

import os
import time
import itertools

from glove.model3 import *

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (
Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd
Matplotlib created a temporary cache directory at /tmp/matplotlib-lasqfr_8 because the default path (/home/jaron/.cache/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


In [2]:
# import file names
files = os.listdir("data/SET3_Thirdtrial/")
files

['CDanc_CDmono12h_PROVIDET0.csv', 'CDevo_CDmono12h_PROVIDET0.csv']

# fit gLV models

In [3]:
exp_names = []
for file in files:
    # define strain name
    strain = file.split("_")[0]
    
    # import data
    df = pd.read_csv(f"data/SET3_Thirdtrial/{file}")
    df.sort_values(by=["Treatments", "Time"], inplace=True)
    
    # make sure that conditions have at least one measurement
    dfs = []
    for treatment, df_t in df.groupby("Treatments"):
        if df_t.shape[0] > 1:
            dfs.append(df_t)
    df = pd.concat(dfs)

    # determine species names 
    species = df.columns.values[2:]

    # instantiate gLV fit 
    model = gLV(species, df)

    # fit to data 
    model.fit()
    
    # plot parameter distribution
    n_species = len(species)
    Avec = model.params[n_species:n_species + n_species**2]
    Aij_std = np.sqrt(np.diag(model.Ainv))[n_species:n_species + n_species**2]

    plt.figure(figsize=(18,18))
    # set counter for parameter std. 
    k = 0

    for i in range(n_species):
        for j in range(n_species):
            plt.subplot(n_species, n_species, k+1)
            a = np.linspace(Avec[k]-np.std(Avec), Avec[k]+np.std(Avec))
            plt.plot(a, norm.pdf(a,Avec[k],Aij_std[k]))
            plt.axvline(x=0, c='k', alpha=.5)
            k += 1
            if j == 0:
                plt.ylabel(species[i], fontsize=18)
            if i == n_species-1:
                plt.xlabel(species[j], fontsize=18)
            #plt.xlim([-2,2])
    
    plt.suptitle(strain, fontsize=24)
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.savefig("params/SET3/"+strain+"_3.pdf")
    plt.close()
    
    # list of parameter names 
    param_names2 = []
    for s1 in species:
        for s2 in species:
            param_names2 += [s1+"<-"+s2]

    param_names3 = []
    for i,s1 in enumerate(species):
        for j,s2 in enumerate(species):
            if j != i:
                for k,s3 in enumerate(species):
                    if k > j and k !=i:
                        param_names3 += [s1+"<-["+s2+"*"+s3+"]"]
            
    param_names = list(species) + param_names2 + param_names3
    
    # compute Wald test for each parameter
    std_errors = np.sqrt(np.diag(model.Ainv))
    walds = model.params/std_errors
    wald_p_vals = 2*norm.cdf(-np.abs(walds))

    # save to df 
    df = pd.DataFrame()
    df["Param name"] = param_names
    df["Param value"] = model.params
    df["Param stdv"]  = np.sqrt(np.diag(model.Ainv))
    df["Param p-value"] = wald_p_vals
    for j, param_name in enumerate(param_names):
        df[param_name]  = model.Ainv[:, j]
    df.to_csv("params/SET3/"+strain+"_3.csv", index=False)

Total samples: 271, Initial regularization: 1.00e-03
Loss: 31.115, Residuals: -0.315
Loss: 16.901, Residuals: 0.175
Loss: 14.077, Residuals: 0.123
Loss: 10.086, Residuals: 0.076
Loss: 7.696, Residuals: 0.053
Loss: 7.149, Residuals: 0.037
Loss: 6.795, Residuals: 0.019
Loss: 6.367, Residuals: 0.031
Loss: 6.302, Residuals: 0.037
Loss: 6.179, Residuals: 0.035
Loss: 5.960, Residuals: 0.030
Loss: 5.750, Residuals: 0.029
Loss: 5.528, Residuals: 0.038
Loss: 5.465, Residuals: 0.037
Loss: 5.446, Residuals: 0.039
Loss: 5.279, Residuals: 0.031
Loss: 5.273, Residuals: 0.037
Loss: 5.081, Residuals: 0.025
Loss: 5.076, Residuals: 0.026
Loss: 5.067, Residuals: 0.028
Loss: 4.986, Residuals: 0.023
Loss: 4.978, Residuals: 0.027
Loss: 4.904, Residuals: 0.023
Loss: 4.903, Residuals: 0.026
Loss: 4.855, Residuals: 0.024
Loss: 4.838, Residuals: 0.024
Loss: 4.709, Residuals: 0.017
Loss: 4.704, Residuals: 0.017
Loss: 4.701, Residuals: 0.021
Loss: 4.697, Residuals: 0.021
Loss: 4.660, Residuals: 0.020
Loss: 4.660,

  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
 

Total samples: 225, Initial regularization: 1.00e-03
Loss: 28.595, Residuals: -0.313
Loss: 12.015, Residuals: 0.221
Loss: 8.490, Residuals: 0.010
Loss: 6.073, Residuals: 0.009
Loss: 4.310, Residuals: 0.011
Loss: 4.200, Residuals: 0.014
Loss: 3.992, Residuals: 0.016
Loss: 3.622, Residuals: 0.018
Loss: 3.105, Residuals: 0.016
Loss: 2.614, Residuals: 0.003
Loss: 2.598, Residuals: 0.009
Loss: 2.458, Residuals: 0.010
Loss: 2.246, Residuals: 0.012
Loss: 2.240, Residuals: 0.017
Loss: 2.031, Residuals: 0.010
Loss: 2.024, Residuals: 0.017
Loss: 2.011, Residuals: 0.019
Loss: 1.898, Residuals: 0.013
Loss: 1.876, Residuals: 0.023
Loss: 1.872, Residuals: 0.023
Loss: 1.837, Residuals: 0.019
Loss: 1.776, Residuals: 0.013
Loss: 1.775, Residuals: 0.014
Loss: 1.772, Residuals: 0.015
Loss: 1.747, Residuals: 0.012
Loss: 1.744, Residuals: 0.014
Loss: 1.717, Residuals: 0.011
Loss: 1.717, Residuals: 0.013
Loss: 1.682, Residuals: 0.008
Loss: 1.681, Residuals: 0.008
Optimization terminated successfully.
Eviden

  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
  df[param_name]  = model.Ainv[:, j]
 

In [4]:
species = ['s1', 's2', 's3', 's4']

In [5]:
param_names3 = []
for i,s1 in enumerate(species):
    for j,s2 in enumerate(species):
        if j != i:
            for k,s3 in enumerate(species):
                if k > j and k !=i:
                    param_names3 += [s1+"<-["+s2+"*"+s3+"]"]

In [6]:
param_names3 = np.array(param_names3)
param_names3

array(['s1<-[s2*s3]', 's1<-[s2*s4]', 's1<-[s3*s4]', 's2<-[s1*s3]',
       's2<-[s1*s4]', 's2<-[s3*s4]', 's3<-[s1*s2]', 's3<-[s1*s4]',
       's3<-[s2*s4]', 's4<-[s1*s2]', 's4<-[s1*s3]', 's4<-[s2*s3]'],
      dtype='<U11')

In [7]:
dim1 = len(species)
dim2 = int(comb(dim1-1, 2))

In [8]:
np.reshape(param_names3, [dim1, dim2])

array([['s1<-[s2*s3]', 's1<-[s2*s4]', 's1<-[s3*s4]'],
       ['s2<-[s1*s3]', 's2<-[s1*s4]', 's2<-[s3*s4]'],
       ['s3<-[s1*s2]', 's3<-[s1*s4]', 's3<-[s2*s4]'],
       ['s4<-[s1*s2]', 's4<-[s1*s3]', 's4<-[s2*s3]']], dtype='<U11')

In [9]:
v = np.arange(1, 5)

dim1 = len(v)
dim2 = int(comb(dim1-1, 2))

def basis3(v):
    
    basis_mat = jnp.zeros([dim1, dim2])
    for i, vi in enumerate(v):
        l = 0
        for j, vj in enumerate(v):
            if j != i:
                for k, vk in enumerate(v):
                    if k > j and k != i:
                        basis_mat = basis_mat.at[i,l].set(vj * vk)
                        l += 1
                        
    return basis_mat

basis3(v)

Array([[ 6.,  8., 12.],
       [ 3.,  4., 12.],
       [ 2.,  4.,  8.],
       [ 2.,  3.,  6.]], dtype=float32)

In [11]:
model.get_params()

Unnamed: 0,Param name,Param value,Param stdv,Param p-value
0,CA,0.287265,0.014278,5.000093e-90
1,BT,0.880132,0.013317,0.000000e+00
2,BU,0.952500,0.012914,0.000000e+00
3,CD,0.653271,0.010054,0.000000e+00
4,BV,0.781893,0.011183,0.000000e+00
...,...,...,...,...
235,CH<-[CD*CS],-0.186234,1.000960,8.524017e-01
236,CH<-[CD*DP],0.534182,1.170947,6.482489e-01
237,CH<-[BV*CS],0.851035,1.654507,6.069905e-01
238,CH<-[BV*DP],-1.644912,1.253445,1.894144e-01
