In [16]:
import pickle

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import linregress
from scipy.special import comb

from armored.newmodels import *
from armored.preprocessing import *

from sklearn.model_selection import KFold

import itertools

from tqdm import tqdm

In [2]:
species = ['ACabs', 'BAabs', 'BHabs', 'BLabs', 'BUabs', 'CAabs', 'CCabs', 'CHabs',
           'DFabs', 'ELabs', 'ERabs', 'FPabs', 'PCabs', 'PJabs', 'RIabs']
controls = ['AcGum', 'ArGal', 'Inulin', 'Pectin', 'Starch', 'Xylan']
metabolites = ['pH', 'Lactate', 'Butyrate', 'Acetate']

# concatenate all observed and all system variables 
observed = np.concatenate((np.array(species), np.array(metabolites)))
system_variables = np.concatenate((np.array(species), np.array(metabolites), np.array(controls)))
system_variables

array(['ACabs', 'BAabs', 'BHabs', 'BLabs', 'BUabs', 'CAabs', 'CCabs',
       'CHabs', 'DFabs', 'ELabs', 'ERabs', 'FPabs', 'PCabs', 'PJabs',
       'RIabs', 'pH', 'Lactate', 'Butyrate', 'Acetate', 'AcGum', 'ArGal',
       'Inulin', 'Pectin', 'Starch', 'Xylan'], dtype='<U8')

In [3]:
# import simulated matrix
df = pd.read_csv("data/simulated_matrix.csv")
df

Unnamed: 0,Experiments,ACabs,BAabs,BHabs,BLabs,BUabs,CAabs,CCabs,CHabs,DFabs,...,PCabs,PJabs,RIabs,AcGum,ArGal,Inulin,Pectin,Starch,Xylan,Butyrate
0,RI-AcGum,0,0,0,0,0,0,0,0,0,...,0,0,1,1,0,0,0,0,0,0.035571
1,RI-ArGal,0,0,0,0,0,0,0,0,0,...,0,0,1,0,1,0,0,0,0,0.013569
2,RI-Inulin,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,1,0,0,0,0.204378
3,RI-Pectin,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,1,0,0,0.262526
4,RI-Starch,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,1,0,0.057020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
196597,AC-BA-BH-BL-BU-CA-CC-CH-DF-EL-ER-FP-PC-PJ-RI-A...,1,1,1,1,1,1,1,1,1,...,1,1,1,0,1,0,0,0,0,0.069957
196598,AC-BA-BH-BL-BU-CA-CC-CH-DF-EL-ER-FP-PC-PJ-RI-I...,1,1,1,1,1,1,1,1,1,...,1,1,1,0,0,1,0,0,0,0.739902
196599,AC-BA-BH-BL-BU-CA-CC-CH-DF-EL-ER-FP-PC-PJ-RI-P...,1,1,1,1,1,1,1,1,1,...,1,1,1,0,0,0,1,0,0,0.317867
196600,AC-BA-BH-BL-BU-CA-CC-CH-DF-EL-ER-FP-PC-PJ-RI-S...,1,1,1,1,1,1,1,1,1,...,1,1,1,0,0,0,0,1,0,0.560113


In [4]:
# AC-Inulin
df.iloc[np.in1d(df.Experiments.values, ['AC-Inulin'])]['Butyrate']

98300    0.245662
Name: Butyrate, dtype: float64

In [5]:
# AC-BU-Inulin
df.iloc[np.in1d(df.Experiments.values, ['AC-BU-Inulin'])]['Butyrate']

104444    0.619136
Name: Butyrate, dtype: float64

In [6]:
# AC-PC-Inulin
df.iloc[np.in1d(df.Experiments.values, ['AC-PC-Inulin'])]['Butyrate']

98324    0.544133
Name: Butyrate, dtype: float64

In [7]:
# AC-BU-PC-Inulin
df.iloc[np.in1d(df.Experiments.values, ['AC-BU-PC-Inulin'])]['Butyrate']

104468    0.675937
Name: Butyrate, dtype: float64

In [8]:
# data with initial and end point measurements
df_mono = pd.read_csv("data/exp0/exp0_mono_reps.csv")
df_exp0 = pd.read_csv("data/exp0/exp0_comm.csv")
df_exp1 = pd.read_csv("data/exp1/exp1_metabolites.csv")
df_exp2 = pd.read_csv("data/exp2/exp2_metabolites.csv")
df_exp3 = pd.read_csv("data/exp3/exp3_metabolites.csv")

# import validation data
df_val = pd.read_csv("data/exp4/exp4_metabolites_best_reps.csv")
df_bad = pd.read_csv("data/exp4/exp4_metabolites_new_worst.csv")
df_bst = pd.read_csv("data/exp4/exp4_metabolites_new_best.csv")

# make metabolite initial condition 0 instead of NaN 
t0_inds = df_mono.Time.values == 0
df_mono.loc[t0_inds, ['Lactate', 'Butyrate', 'Acetate']] = 0.

t0_inds = df_exp0.Time.values == 0
df_exp0.loc[t0_inds, ['Lactate', 'Butyrate', 'Acetate']] = 0.

t0_inds = df_exp1.Time.values == 0
df_exp1.loc[t0_inds, ['Lactate', 'Butyrate', 'Acetate']] = 0.

t0_inds = df_exp2.Time.values == 0
df_exp2.loc[t0_inds, ['Lactate', 'Butyrate', 'Acetate']] = 0.

t0_inds = df_exp3.Time.values == 0
df_exp3.loc[t0_inds, ['Lactate', 'Butyrate', 'Acetate']] = 0.

t0_inds = df_val.Time.values == 0
df_val.loc[t0_inds, ['Lactate', 'Butyrate', 'Acetate']] = 0.

t0_inds = df_bad.Time.values == 0
df_bad.loc[t0_inds, ['Lactate', 'Butyrate', 'Acetate']] = 0.

t0_inds = df_bst.Time.values == 0
df_bst.loc[t0_inds, ['Lactate', 'Butyrate', 'Acetate']] = 0.

In [9]:
# bin the last measurement time 
Time = df_mono['Time'].values
for i, t in enumerate(Time):
    if t > 40:
        Time[i] = 1.
df_mono['Time'] = Time

# bin the last measurement time 
Time = df_exp0['Time'].values
for i, t in enumerate(Time):
    if t > 40:
        Time[i] = 1.
df_exp0['Time'] = Time

In [10]:
# concatenate dataframes
df = pd.concat((df_exp0, df_exp1, df_exp2, df_exp3, df_val, df_bad, df_bst))

In [11]:
# take mean of replicates
df_mean = []

for exp_name, df_exp in df.groupby("Experiments"):
    df_groups = df_exp.groupby("Time")
    df_avg = df_groups[system_variables].mean().reset_index()
    df_avg.insert(0, "Experiments", [exp_name]*df_avg.shape[0])
    df_mean.append(df_avg)
    
df = pd.concat(df_mean)

In [12]:
# determine names of experimental conditions 
all_treatments = df.Experiments.values
unique_treatments = np.unique(all_treatments) 

In [13]:
# add mono culture just to training data 
test_df = df.copy()
train_df = pd.concat((df_mono, df))

# scale data 
scaler = MinMaxScaler(observed, system_variables)
scaler.fit(train_df)
train_df_scaled = scaler.transform(train_df.copy())
test_df_scaled = scaler.transform(test_df.copy())

# format data into matrix [n_samples, n_timepoints, dt+n_outputs+n_controls]
train_data = format_data(train_df, species, metabolites, controls, observed=observed)
train_data_scaled = format_data(train_df_scaled, species, metabolites, controls, observed=observed)
test_data = format_data(test_df, species, metabolites, controls, observed=observed)
test_data_scaled = format_data(test_df_scaled, species, metabolites, controls, observed=observed)

# instantiate model
brnn = miRNN(n_species=len(species), 
             n_metabolites=len(metabolites), 
             n_controls=len(controls), 
             n_hidden=32)

# fit model
brnn.fit(train_data_scaled, 
         alpha_0=0, alpha_1=1.,
         evd_tol=1e-3)

Total measurements: 27827, Number of parameters: 2515, Initial regularization: 0.00e+00
Loss: 1407.377, Residuals: 0.00337
Loss: 1317.528, Residuals: 0.00479
Loss: 1267.409, Residuals: -0.00294
Loss: 1254.292, Residuals: -0.00121
Loss: 1144.826, Residuals: 0.00133
Loss: 1115.580, Residuals: -0.00157
Loss: 1059.931, Residuals: -0.00072
Loss: 959.464, Residuals: 0.00195
Loss: 901.888, Residuals: 0.00283
Loss: 860.463, Residuals: 0.00035
Loss: 800.567, Residuals: -0.00012
Loss: 768.055, Residuals: 0.00060
Loss: 760.357, Residuals: 0.00094
Loss: 699.928, Residuals: -0.00024
Loss: 694.737, Residuals: 0.00061
Loss: 648.879, Residuals: 0.00056
Loss: 606.708, Residuals: 0.00030
Loss: 603.728, Residuals: 0.00002
Loss: 577.472, Residuals: 0.00016
Loss: 551.501, Residuals: 0.00018
Loss: 547.901, Residuals: -0.00076
Loss: 541.278, Residuals: -0.00068
Loss: 529.664, Residuals: -0.00058
Loss: 513.145, Residuals: -0.00056
Loss: 512.386, Residuals: -0.00058
Loss: 505.164, Residuals: -0.00062
Loss: 495

In [18]:
# instantiate model
rnn = miRNN(n_species=len(species), 
             n_metabolites=len(metabolites), 
             n_controls=len(controls), 
             n_hidden=32)

In [19]:
# Save the object to a file
with open("models/mirnn_dtl01234.pkl", "wb") as file:
    pickle.dump(rnn, file)

# # Reload the object from the file
# with open("path", "rb") as file:
#     model = pickle.load(file)

PicklingError: Can't pickle <function miRNN.forward at 0x7ff53832fec0>: it's not the same object as armored.newmodels.miRNN.forward