In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
from scipy.stats import norm

import os
import time
import itertools

from glove.model import *

params = {
    'figure.figsize': (18, 18),       # Figure size in inches
    'figure.dpi': 300,               # Resolution in dots per inch
    'axes.labelsize': 24,            # Font size of the axes labels
    'axes.titlesize': 24,            # Font size of the subplot titles
    'axes.titlepad': 20,             # Spacing between the subplot title and the plot
    'axes.linewidth': 2.,           # Width of the axis lines
    'xtick.labelsize': 16,           # Font size of the x-axis tick labels
    'ytick.labelsize': 16,           # Font size of the y-axis tick labels
    'legend.fontsize': 16,           # Font size of the legend
    'legend.frameon': True,          # Whether to draw a frame around the legend
    'legend.facecolor': 'inherit',   # Background color of the legend
    'legend.edgecolor': '0.8',       # Edge color of the legend
    'legend.framealpha': 0.8,        # Transparency of the legend frame
    'legend.loc': 'best',            # Location of the legend ('best', 'upper right', 'upper left', 'lower left', etc.)
    'legend.title_fontsize': 20,     # Font size of the legend title
    'pdf.fonttype': 42               # Output font type for PDF files
}

plt.rcParams.update(params)

In [2]:
# import file names
files = os.listdir("data/")
files = [f for f in files if "processed" in f and "passage" not in f]
files

['EXP0019_MS001_processed.csv',
 'EXP0019_MS014_processed.csv',
 'EXP0019_MS008_processed.csv',
 'EXP0019_DSM27147_processed.csv']

# fit gLV models

In [3]:
exp_names = []
for file in files:
    # import data
    df = pd.read_csv(f"data/{file}")

    # determine species names 
    species = df.columns.values[2:]

    # instantiate gLV fit 
    model = gLV(species, df)

    # fit to data 
    model.fit()
    
    # list of parameter names 
    param_names = []
    for s1 in species:
        for s2 in species:
            param_names += [s1+"*"+s2]
    param_names = list(species) + param_names
    
    # plot parameter distribution
    n_species = len(species)
    Avec = model.params[n_species:]
    Aij_std = np.sqrt(np.diag(model.Ainv))[n_species:]

    # plot
    k = 0
    for i in range(n_species):
        for j in range(n_species):
            plt.subplot(n_species, n_species, k+1)
            a = np.linspace(Avec[k]-np.std(Avec), Avec[k]+np.std(Avec))
            plt.plot(a, norm.pdf(a,Avec[k],Aij_std[k]), linewidth=3)
            plt.axvline(x=0, c='k', alpha=.5)
            k += 1
            if j == 0:
                plt.ylabel(species[i])
            if i == n_species-1:
                plt.xlabel(species[j])

    plt.suptitle(file.split("_")[0], fontsize=24)
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.savefig("figures/"+file.split("_")[0]+".pdf")
    plt.show()
    plt.close()
    
    # compute Wald test for each parameter
    std_errors = np.sqrt(np.diag(model.Ainv))
    walds = model.params/std_errors
    wald_p_vals = 2*norm.cdf(-np.abs(walds))

    # save to df 
    df = pd.DataFrame()
    df["Param name"] = param_names
    df["Param value"] = model.params
    df["Param stdv"]  = np.sqrt(np.diag(model.Ainv))
    df["Param p-value"] = wald_p_vals
    for j, param_name in enumerate(param_names):
        df[param_name]  = model.Ainv[:, j]
    df.to_csv("params/"+file.split("_")[1]+".csv", index=False)

No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)


Total samples: 38, Initial regularization: 1.00e-03
Loss: 9.201, Residuals: -0.311
Loss: 5.629, Residuals: 0.134
Loss: 5.099, Residuals: 0.124
Loss: 4.127, Residuals: 0.090
Loss: 3.578, Residuals: 0.072
Loss: 2.824, Residuals: 0.015
Loss: 2.538, Residuals: 0.000
Loss: 2.257, Residuals: -0.017
Loss: 2.222, Residuals: 0.010
Loss: 2.164, Residuals: -0.002
Loss: 2.151, Residuals: 0.001
Loss: 2.127, Residuals: -0.006
Loss: 2.088, Residuals: -0.018
Loss: 2.087, Residuals: -0.011
Loss: 2.055, Residuals: -0.023
Loss: 2.044, Residuals: -0.020
Loss: 2.025, Residuals: -0.029
Loss: 2.024, Residuals: -0.020
Loss: 2.013, Residuals: -0.024
Loss: 2.013, Residuals: -0.025
Loss: 2.003, Residuals: -0.030
Loss: 2.002, Residuals: -0.029
Loss: 1.996, Residuals: -0.033
Loss: 1.987, Residuals: -0.037
Loss: 1.985, Residuals: -0.037
Loss: 1.985, Residuals: -0.037
Loss: 1.979, Residuals: -0.040
Loss: 1.979, Residuals: -0.040
Optimization terminated successfully.
Evidence -40.159
Updating hyper-parameters...
Tota

Loss: 121.664, Residuals: 0.000
Loss: 121.644, Residuals: -0.002
Loss: 121.637, Residuals: -0.002
Loss: 121.635, Residuals: -0.002
Loss: 121.634, Residuals: -0.002
Optimization terminated successfully.
Evidence 420.335
Updating hyper-parameters...
Total samples: 31, Updated regularization: 2.16e+00
Loss: 121.959, Residuals: -0.002
Loss: 121.951, Residuals: -0.003
Optimization terminated successfully.
Evidence 420.602
Pass count  1
Total samples: 38, Initial regularization: 1.00e-03
Loss: 9.346, Residuals: -0.313
Loss: 5.660, Residuals: 0.133
Loss: 5.093, Residuals: 0.127
Loss: 4.053, Residuals: 0.088
Loss: 3.248, Residuals: 0.055
Loss: 2.696, Residuals: 0.006
Loss: 2.485, Residuals: 0.005
Loss: 2.229, Residuals: -0.008
Loss: 2.225, Residuals: -0.013
Loss: 2.192, Residuals: -0.015
Loss: 2.135, Residuals: -0.017
Loss: 2.128, Residuals: -0.001
Loss: 2.115, Residuals: -0.005
Loss: 2.091, Residuals: -0.012
Loss: 2.053, Residuals: -0.022
Loss: 2.052, Residuals: -0.015
Loss: 2.050, Residuals:

Loss: 149.071, Residuals: 0.017
Optimization terminated successfully.
Evidence 489.540
Updating hyper-parameters...
Total samples: 38, Updated regularization: 1.43e+00
Loss: 149.701, Residuals: 0.017
Loss: 149.642, Residuals: 0.015
Loss: 149.637, Residuals: 0.016
Optimization terminated successfully.
Evidence 490.148
Updating hyper-parameters...
Total samples: 38, Updated regularization: 1.44e+00
Loss: 149.991, Residuals: 0.016
Loss: 149.982, Residuals: 0.015
Optimization terminated successfully.
Evidence 490.509
Pass count  1
