In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
from scipy.stats import norm, pearsonr

import os
import time
import itertools

from glove_L1.glv3 import *

  from pandas.core import (


In [2]:
# number of random trials
n_trials = 10

# range of L1 penalties to try
lmbdas = [0., 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1.]
len(lmbdas)

10

In [3]:
# import file names
files = os.listdir("data/SET3_Thirdtrial/")
files

['CDanc_CDmono12h_PROVIDET0.csv', 'CDevo_CDmono12h_PROVIDET0.csv']

In [None]:
# loop over trials
for trial in range(n_trials):

    exp_names = []
    for file in files:

        # define strain name
        strain = file.split("_")[0]

        # import data
        df = pd.read_csv(f"data/SET3_Thirdtrial/{file}")
        df.sort_values(by=["Treatments", "Time"], inplace=True)

        # make sure that conditions have at least one measurement
        dfs = []
        for treatment, df_t in df.groupby("Treatments"):
            if df_t.shape[0] > 1:
                dfs.append(df_t)
        df = pd.concat(dfs)

        # determine species names 
        species = df.columns.values[2:]

        # list of parameter names 
        param_names2 = []
        for s1 in species:
            for s2 in species:
                param_names2 += [s1+"<-"+s2]

        param_names3 = []
        for i,s1 in enumerate(species):
            for j,s2 in enumerate(species):
                if j != i:
                    for k,s3 in enumerate(species):
                        if k > j and k !=i:
                            param_names3 += [s1+"<-["+s2+"*"+s3+"]"]

        param_names = list(species) + param_names2 + param_names3

        # init dataframe
        param_df = pd.DataFrame()
        param_df['parameter'] = param_names

        # scan range of L1 penalties
        for lmbda in lmbdas:

            # init model 
            model = gLV(dataframe=df, 
                        species=species,
                        lmbda=lmbda)
            print(f"Number of parameters: {model.n_params}")

            # fit to data 
            f = model.fit_rmse(epochs=200)

            # save parameters
            param_df[lmbda] = model.z
            param_df.to_csv(f"L1scan/{strain}_params_{trial}.csv", index=False)

Number of parameters: 240
Epoch 0, RMSE: 3.61655
Epoch 10, RMSE: 0.18387
Epoch 20, RMSE: 0.04749
Epoch 30, RMSE: 0.04179
Epoch 40, RMSE: 0.03916
Epoch 50, RMSE: 0.03819
Epoch 60, RMSE: 0.03582
Epoch 70, RMSE: 0.03488
Epoch 80, RMSE: 0.03403
Epoch 90, RMSE: 0.03260
Epoch 100, RMSE: 0.03190
Epoch 110, RMSE: 0.03136
Epoch 120, RMSE: 0.03127
Epoch 130, RMSE: 0.03182
Epoch 140, RMSE: 0.03113
Epoch 150, RMSE: 0.03039
Epoch 160, RMSE: 0.03114
Epoch 170, RMSE: 0.02983
Epoch 180, RMSE: 0.03016
Epoch 190, RMSE: 0.03028
Epoch 200, RMSE: 0.03079
Number of parameters: 240
Epoch 0, RMSE: 3.19665
Epoch 10, RMSE: 0.12387
Epoch 20, RMSE: 0.11248
Epoch 30, RMSE: 0.10603
