# Slope-based Machine Learning for Syngas Fermentation
In this notebook we will use raw data and polynomial smoothed data to train machine learning models to predict slopes. 

## Set up
import neccessary packages and set current directory to lib. This will help with importing the data files

In [1]:
import pandas as pd
import numpy as np
import os
import warnings
warnings.filterwarnings('ignore')

cur_dir = os.getcwd()
cur_dir


'/scratch/garrettroell/machine_learning_clostridium/lib'

## Get Starting Data
This data was generated from the data processing notebook

In [2]:
raw_data = pd.read_csv(f'{cur_dir}/processed_data/raw_data.csv')
raw_data.set_index(['composition','trial','time'],drop=True,inplace=True)

smooth_data = pd.read_csv(f'{cur_dir}/processed_data/smooth_data.csv')
smooth_data.set_index(['composition','trial','time'],drop=True,inplace=True)

Check that imports worked correctly

In [3]:
display(raw_data.head())
display(smooth_data.head())

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,acetate,biomass,butanol,butyrate,ethanol,flow rate,H2,CO,CO2,acetate_0,biomass_0,butanol_0,butyrate_0,ethanol_0,acetate_Δ,biomass_Δ,butanol_Δ,butyrate_Δ,ethanol_Δ
composition,trial,time,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1,0.58,21.61,0.41,0.04,0.06,10.94,20,0.125,0.5,0.375,16.905029,0.429546,0.029818,0.075529,15.89524,0.0,0.0,0.0,0.0,0.0
1,1,0.65,44.31,0.39,0.05,0.08,15.89,20,0.125,0.5,0.375,16.905029,0.429546,0.029818,0.075529,15.89524,324.285714,-0.285714,0.142857,0.285714,70.714286
1,1,1.02,46.19,0.46,0.21,0.64,8.14,20,0.125,0.5,0.375,16.905029,0.429546,0.029818,0.075529,15.89524,5.081081,0.189189,0.432432,1.513514,-20.945946
1,1,1.67,46.16,0.49,1.18,3.64,10.81,20,0.125,0.5,0.375,16.905029,0.429546,0.029818,0.075529,15.89524,-0.046154,0.046154,1.492308,4.615385,4.107692
1,1,3.7,34.39,0.64,8.44,9.76,20.34,20,0.125,0.5,0.375,16.905029,0.429546,0.029818,0.075529,15.89524,-5.79803,0.073892,3.576355,3.014778,4.694581


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,CO,CO2,H2,acetate,biomass,butanol,butyrate,ethanol,flow rate,acetate_0,biomass_0,butanol_0,butyrate_0,ethanol_0,acetate_Δ,biomass_Δ,butanol_Δ,butyrate_Δ,ethanol_Δ
composition,trial,time,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1,0.0,0.5,0.375,0.125,16.905029,0.429546,0.029818,0.075529,15.89524,20.0,16.905029,0.429546,0.029818,0.075529,15.89524,0.0,0.0,0.0,0.0,0.0
1,1,0.1,0.5,0.375,0.125,18.001195,0.421076,0.02826,0.061033,15.607023,20.0,16.905029,0.429546,0.029818,0.075529,15.89524,10.961665,-0.084697,-0.015584,-0.14495,-2.882172
1,1,0.2,0.5,0.375,0.125,19.831073,0.410803,0.028316,0.051013,15.144135,20.0,16.905029,0.429546,0.029818,0.075529,15.89524,18.298778,-0.102733,0.000561,-0.100208,-4.628878
1,1,0.3,0.5,0.375,0.125,22.196249,0.403602,0.031357,0.050367,14.522308,20.0,16.905029,0.429546,0.029818,0.075529,15.89524,23.651756,-0.072009,0.030416,-0.006462,-6.218271
1,1,0.4,0.5,0.375,0.125,25.293925,0.403709,0.0287,0.035514,13.956015,20.0,16.905029,0.429546,0.029818,0.075529,15.89524,30.97676,0.001068,-0.026579,-0.148525,-5.662923


First, we'll define a function that generates the X array and y array for ML model training from the imported data. 

The parameter 'input_data' is used to specify whether raw data or the polynomial smoothed data will be used to train the model <br>
The parameter 'conditions_to_include' is a list of the conditions to include in the returned arrays

In [4]:
def get_X_y_arrays_slope(imported_data, conditions_to_include):


    imported_data_copy = imported_data.copy()
    imported_data_copy = imported_data_copy.loc[conditions_to_include]
    imported_data_copy.reset_index(inplace=True)
    X  = imported_data_copy [['time','acetate', 'biomass', 'butanol', 'butyrate', 'ethanol', 'CO', 'CO2', 'H2', 'flow rate']]
    y = imported_data_copy  [['acetate_Δ', 'biomass_Δ', 'butanol_Δ', 'butyrate_Δ', 'ethanol_Δ']]
 
    return np.array(X), np.array(y)

Next we make a dictionary of models that predict slopes.

The models differ in three dimensions: training data style (raw vs smoothed), machine learning algorithm, and test set to be excluded.

The models are generated by looping over these three dimensions.

In [5]:
from machine_learning.model_selector import model_selector

trained_models = {}

for training_data in ['raw', 'smooth']:
    for regressor in ['gradient boosting', 'random forest', 'support vector', 'neural net', 'lasso']:
        for test_comp in ['none', 1, 2, 3, 4, 5, 6, 7]:
            model_name = regressor + ', ' + training_data + ', test comp = ' + str(test_comp)
            print(model_name)

            # set up training set
            if training_data == 'raw':
                data = raw_data
            else:
                data = smooth_data

            # set up training comps
            training_comps = [1, 2, 3, 4, 5, 6, 7]
            if test_comp != 'none':
                training_comps.remove(test_comp)

            # get input and output arrays
            X, y = get_X_y_arrays_slope(data, training_comps)

            # get ML model to use, and fit it
            model = model_selector(regressor)
            trained_models[model_name] = model.fit(X, y)

gradient boosting, raw, test comp = none




gradient boosting, raw, test comp = 1
gradient boosting, raw, test comp = 2
gradient boosting, raw, test comp = 3




gradient boosting, raw, test comp = 4




gradient boosting, raw, test comp = 5




gradient boosting, raw, test comp = 6




gradient boosting, raw, test comp = 7




random forest, raw, test comp = none




random forest, raw, test comp = 1
random forest, raw, test comp = 2
random forest, raw, test comp = 3




random forest, raw, test comp = 4




random forest, raw, test comp = 5




random forest, raw, test comp = 6




random forest, raw, test comp = 7




support vector, raw, test comp = none




support vector, raw, test comp = 1
support vector, raw, test comp = 2
support vector, raw, test comp = 3




support vector, raw, test comp = 4




support vector, raw, test comp = 5




support vector, raw, test comp = 6




support vector, raw, test comp = 7




neural net, raw, test comp = none




neural net, raw, test comp = 1
neural net, raw, test comp = 2
neural net, raw, test comp = 3




neural net, raw, test comp = 4




neural net, raw, test comp = 5




neural net, raw, test comp = 6




neural net, raw, test comp = 7




lasso, raw, test comp = none
lasso, raw, test comp = 1
lasso, raw, test comp = 2
lasso, raw, test comp = 3
lasso, raw, test comp = 4
lasso, raw, test comp = 5
lasso, raw, test comp = 6
lasso, raw, test comp = 7
gradient boosting, smooth, test comp = none




gradient boosting, smooth, test comp = 1




gradient boosting, smooth, test comp = 2




gradient boosting, smooth, test comp = 3
gradient boosting, smooth, test comp = 4
gradient boosting, smooth, test comp = 5




gradient boosting, smooth, test comp = 6
gradient boosting, smooth, test comp = 7




random forest, smooth, test comp = none




random forest, smooth, test comp = 1




random forest, smooth, test comp = 2




random forest, smooth, test comp = 3
random forest, smooth, test comp = 4
random forest, smooth, test comp = 5




random forest, smooth, test comp = 6
random forest, smooth, test comp = 7




support vector, smooth, test comp = none




support vector, smooth, test comp = 1




support vector, smooth, test comp = 2




support vector, smooth, test comp = 3
support vector, smooth, test comp = 4
support vector, smooth, test comp = 5




support vector, smooth, test comp = 6
support vector, smooth, test comp = 7




neural net, smooth, test comp = none




neural net, smooth, test comp = 1




neural net, smooth, test comp = 2




neural net, smooth, test comp = 3
neural net, smooth, test comp = 4
neural net, smooth, test comp = 5




neural net, smooth, test comp = 6
neural net, smooth, test comp = 7




lasso, smooth, test comp = none
lasso, smooth, test comp = 1
lasso, smooth, test comp = 2
lasso, smooth, test comp = 3
lasso, smooth, test comp = 4
lasso, smooth, test comp = 5
lasso, smooth, test comp = 6
lasso, smooth, test comp = 7


Check how many models are in the trained model array. We expect 80 models (5 algoritms \* 8 training sets \* 2 imported data sets)

In [6]:
len(trained_models)

80

Define a function to predict slopes for all rows of an input dataframe.:

This function takes in the model dictionary and the data set to predict (raw times or smooth times). 

It first gets the ml input to put into all the models using the get_X_y_arrays_slope function defined above. 

Then it loops over the models, and saves the predictions in a new dictionary using the same name as the model from the model dictionary.

In [7]:
def get_model_predictions_slope(model_dict, data_set_to_predict):
    model_predictions = {}
    ml_input, _ = get_X_y_arrays_slope(data_set_to_predict, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

    for model_name in model_dict.keys():
        model = model_dict[model_name]
        prediction = model.predict(ml_input)
        prediction_df = pd.DataFrame(data=prediction, index=data_set_to_predict.index, columns=['acetate_Δ', 'biomass_Δ', 'butanol_Δ', 'butyrate_Δ', 'ethanol_Δ'])
        model_predictions[model_name] = prediction_df

    return model_predictions

Run function for both measured times and smoothed times

In [8]:
measured_time_slope_predictions = get_model_predictions_slope(trained_models, raw_data)
smoothed_time_slope_predictions = get_model_predictions_slope(trained_models, smooth_data)

Define evaluation metrics

In [9]:
from scipy.stats import linregress
from sklearn import metrics

def get_pearson_r2 (measured_list, predicted_list):
    # slope, intercept, r_value, p_value, std_err
    _, _, r_value, _, _ = linregress(measured_list, predicted_list)
    r2 = (r_value**2)
    return r2

def get_rmse (measured_list, predicted_list):
    mse = metrics.mean_squared_error(measured_list, predicted_list)
    rmse = (mse**0.5)
    return rmse

def get_norm_rmse (measured_list, predicted_list):
    mse = metrics.mean_squared_error(measured_list, predicted_list)
    rmse = (mse**0.5)
    avg_meas = sum(measured_list) / len(measured_list) 
    return rmse/avg_meas

Define a function to evaluate slope predictions of test set for condition 1-7:

This function takes in the prediction dictionary, the ground truth data frame, and the metric to evaluate.

It loops over the species, test compositions, and models used to generate the data in the prediction dataframe.

For each species it outputs a dataframe that contains the metric for each model for each test condition.

In [10]:
def evaluate_models(pred_df_dict, ground_truth_df, metric):
    species_set = ['acetate_Δ', 'biomass_Δ', 'butanol_Δ', 'butyrate_Δ', 'ethanol_Δ']
    test_comp_set=[1,2,3,4,5,6,7]
    index_set = ['gradient boosting, raw', 'random forest, raw', 'support vector, raw', 'neural net, raw', 'lasso, raw', 'gradient boosting, smooth', 'random forest, smooth', 'support vector, smooth', 'neural net, smooth', 'lasso, smooth']
    
    for species in species_set:
        data = {}
        for test_comp in test_comp_set:
            data[test_comp] = []
            for model_name in pred_df_dict.keys():
                # print(model_name)
                if str(test_comp) in model_name:
                    predicted_species_values = list(pred_df_dict[model_name].loc[test_comp][species])
                    measured_species_values = list(ground_truth_df.loc[test_comp][species])

                    r2 = get_pearson_r2(measured_species_values, predicted_species_values)
                    rmse = get_rmse (measured_species_values, predicted_species_values)
                    norm_rmse = get_norm_rmse (measured_species_values, predicted_species_values)
                    
                    if metric == 'r2':
                        data[test_comp].append(r2)
                    elif metric == 'rmse':
                        data[test_comp].append(rmse)
                    elif metric == 'norm_rmse':
                        data[test_comp].append(norm_rmse)
                    else:
                        print('unknown metric')
        species_data = pd.DataFrame.from_dict(data)
        species_data[f'model for {species}'] = index_set
        species_data.set_index(f'model for {species}', inplace=True, drop=True)
        display(species_data)

In [11]:
# smoothed_time_predictions

In [12]:
evaluate_models(smoothed_time_slope_predictions, smooth_data, 'r2')

Unnamed: 0_level_0,1,2,3,4,5,6,7
model for acetate_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"gradient boosting, raw",0.475537,0.426291,0.3217802,0.471678,0.542766,0.090041,0.4173125
"random forest, raw",0.529545,0.575727,0.4881891,0.182101,0.213805,0.117386,0.7230897
"support vector, raw",0.231647,0.353718,0.2320805,0.245466,0.855429,0.676839,0.2889825
"neural net, raw",0.434027,0.474147,0.2656959,0.186995,0.770212,0.67878,0.3996625
"lasso, raw",0.192064,0.062111,1.267071e-32,0.208621,0.789081,0.556465,7.018235e-33
"gradient boosting, smooth",0.479071,0.802158,0.5569903,0.625297,0.639581,0.481888,0.7983849
"random forest, smooth",0.569463,0.824173,0.6549933,0.578539,0.828853,0.597859,0.7204292
"support vector, smooth",0.352511,0.645141,0.4391808,0.386255,0.730527,0.519169,0.6143512
"neural net, smooth",0.495493,0.727149,0.6327979,0.350228,0.820926,0.54253,0.6377657
"lasso, smooth",0.339419,0.643496,0.3899394,0.293371,0.790751,0.551674,0.5749356


Unnamed: 0_level_0,1,2,3,4,5,6,7
model for biomass_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"gradient boosting, raw",0.112222,0.07893256,0.201187,0.001933512,0.000458,0.006811,0.320213
"random forest, raw",0.028802,0.242784,0.072639,0.04582054,0.266134,0.011332,0.360284
"support vector, raw",0.62952,0.006814793,0.100266,0.6612762,0.138399,0.125767,0.051194
"neural net, raw",0.035546,0.009431669,0.021151,0.2477814,0.067421,0.042627,0.002118
"lasso, raw",0.277537,1.02535e-32,0.015552,3.94963e-33,0.073045,0.110634,0.006841
"gradient boosting, smooth",0.169839,0.5109482,0.435032,0.3803816,0.757209,0.815868,0.16502
"random forest, smooth",0.201504,0.532519,0.225927,0.3674503,0.480138,0.45918,0.092755
"support vector, smooth",0.372019,0.007997951,0.140707,0.7056972,0.146659,0.264343,0.058054
"neural net, smooth",0.028207,0.003581697,0.014972,0.2330855,0.000123,0.031949,0.009065
"lasso, smooth",0.422873,0.004219499,0.094894,0.6918539,0.150858,0.14114,0.014952


Unnamed: 0_level_0,1,2,3,4,5,6,7
model for butanol_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"gradient boosting, raw",0.531948,0.853819,0.873629,0.323321,0.715717,0.787281,0.014683
"random forest, raw",0.521081,0.903046,0.841976,0.157349,0.820882,0.805616,0.10254
"support vector, raw",0.696164,0.916931,0.823524,0.101575,0.953439,0.893121,0.169715
"neural net, raw",0.688222,0.926487,0.85746,0.135017,0.197174,0.840208,0.047524
"lasso, raw",0.769566,0.913559,0.922754,0.165163,0.647089,0.803932,0.047742
"gradient boosting, smooth",0.007851,0.787377,0.752815,0.12883,0.930856,0.315634,0.045261
"random forest, smooth",0.128418,0.834268,0.849788,0.17004,0.970391,0.39418,0.125587
"support vector, smooth",0.637279,0.833844,0.907383,0.160737,0.940583,0.93762,0.248061
"neural net, smooth",0.610647,0.741375,0.717094,0.220281,0.946078,0.921776,0.256482
"lasso, smooth",0.711167,0.865957,0.904524,0.218024,0.941014,0.937428,0.348345


Unnamed: 0_level_0,1,2,3,4,5,6,7
model for butyrate_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"gradient boosting, raw",0.03398511,0.057478,0.333426,0.393535,0.030826,0.028328,0.404888
"random forest, raw",0.1479262,0.231113,0.469364,0.550792,0.09349,0.205939,0.530472
"support vector, raw",0.1308409,0.139084,0.688523,0.738174,0.003546,0.039342,0.138108
"neural net, raw",0.1014396,0.509428,0.657486,0.079771,0.017704,0.029806,0.341603
"lasso, raw",0.02817718,0.771908,0.7602,0.591833,0.137938,0.113058,0.203958
"gradient boosting, smooth",0.0005741253,0.390018,0.317109,0.452572,0.52104,0.794117,0.216589
"random forest, smooth",0.002049191,0.225048,0.436713,0.599054,0.291806,0.829956,0.316254
"support vector, smooth",0.7779322,0.178558,0.470536,0.702642,0.352959,0.029166,0.619266
"neural net, smooth",0.4712262,0.010174,0.324224,0.722675,4.3e-05,0.000288,0.087834
"lasso, smooth",1.767219e-35,0.833399,0.001413,0.0,0.157958,0.493097,0.547535


Unnamed: 0_level_0,1,2,3,4,5,6,7
model for ethanol_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"gradient boosting, raw",0.354188,0.007035,0.18801,0.113687,0.314922,0.731467,0.471937
"random forest, raw",0.310095,0.032309,0.034068,0.020431,0.637395,0.831367,0.403735
"support vector, raw",0.693912,0.595948,0.546413,0.006881,0.426212,0.716987,0.798555
"neural net, raw",0.672137,0.555327,0.502028,0.071883,0.430116,0.731868,0.574489
"lasso, raw",0.498676,0.405013,0.357613,4.6e-05,0.371744,0.39042,0.598797
"gradient boosting, smooth",0.360439,0.00792,0.742259,0.048851,0.724754,0.140457,0.631915
"random forest, smooth",0.375879,0.05229,0.717701,0.059149,0.784641,0.720416,0.489838
"support vector, smooth",0.691145,0.6125,0.571568,0.013314,0.442617,0.735025,0.825654
"neural net, smooth",0.698769,0.302846,0.530383,1e-05,0.430809,0.770008,0.827678
"lasso, smooth",0.504757,0.616834,0.694187,0.00941,0.401545,0.721179,0.742372


In [13]:
evaluate_models(smoothed_time_slope_predictions, smooth_data, 'rmse')

Unnamed: 0_level_0,1,2,3,4,5,6,7
model for acetate_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"gradient boosting, raw",15.533772,20.194126,26.328697,12.597879,6.213273,13.772285,33.224895
"random forest, raw",14.867001,17.581196,22.402514,18.649524,19.909953,20.773358,15.869519
"support vector, raw",18.583611,24.558953,25.706182,13.829184,8.622325,4.030923,16.569899
"neural net, raw",15.590032,24.266793,24.314968,15.391824,4.590911,8.726855,15.559435
"lasso, raw",18.133557,26.473865,30.278,20.086319,8.711265,4.946963,19.290239
"gradient boosting, smooth",16.887379,15.140932,20.776617,10.407707,4.865192,6.823248,12.18729
"random forest, smooth",13.586476,13.378912,16.812499,12.381758,6.791098,5.469075,12.234057
"support vector, smooth",16.893001,22.972515,22.548765,13.603664,4.978103,6.425983,16.382357
"neural net, smooth",14.601859,17.437628,25.619078,13.279007,3.792433,9.388641,18.45411
"lasso, smooth",15.912721,19.737765,23.376407,14.048118,4.457262,6.964043,15.379498


Unnamed: 0_level_0,1,2,3,4,5,6,7
model for biomass_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"gradient boosting, raw",0.053447,0.060077,0.050019,0.067489,0.057976,0.039289,0.042844
"random forest, raw",0.05684,0.06435,0.0765,0.067074,0.058313,0.050932,0.045504
"support vector, raw",0.059555,0.059477,0.053752,0.050283,0.032184,0.038439,0.054827
"neural net, raw",0.053304,0.099393,0.065131,0.059865,0.031914,0.040132,0.057059
"lasso, raw",0.061665,0.076541,0.057445,0.060649,0.030022,0.036961,0.088768
"gradient boosting, smooth",0.043719,0.060509,0.041321,0.046555,0.018876,0.019057,0.056604
"random forest, smooth",0.044939,0.049623,0.051728,0.047979,0.037307,0.031522,0.056194
"support vector, smooth",0.042459,0.061846,0.052868,0.042511,0.035658,0.047195,0.070249
"neural net, smooth",0.048311,0.067276,0.055073,0.061291,0.042769,0.042789,0.051407
"lasso, smooth",0.041581,0.061815,0.053526,0.044551,0.029773,0.038277,0.05908


Unnamed: 0_level_0,1,2,3,4,5,6,7
model for butanol_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"gradient boosting, raw",1.964349,2.683131,0.702985,1.863338,0.823447,1.019716,2.18612
"random forest, raw",2.072259,2.773748,0.916206,2.609437,0.572003,0.328362,2.356598
"support vector, raw",1.551925,2.297144,1.306581,2.252405,0.314782,0.236329,2.810094
"neural net, raw",2.123943,1.741294,2.370016,2.113765,0.489047,0.404438,2.042874
"lasso, raw",1.298023,2.723505,1.434449,2.495622,0.312863,0.482338,1.94
"gradient boosting, smooth",2.930056,2.498689,1.004849,2.157604,1.206127,1.254558,1.590201
"random forest, smooth",3.067468,2.929609,0.848276,2.934757,0.471546,1.036427,2.104755
"support vector, smooth",2.76267,2.229869,0.997246,2.04414,0.562078,1.020306,3.567839
"neural net, smooth",2.165343,3.368178,1.704527,2.031629,1.214017,0.404072,2.766427
"lasso, smooth",2.165803,2.634558,0.834049,2.382208,0.353705,0.645161,4.003879


Unnamed: 0_level_0,1,2,3,4,5,6,7
model for butyrate_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"gradient boosting, raw",3.871983,2.334736,1.005991,1.419861,3.458298,2.697505,2.598986
"random forest, raw",3.485203,1.860167,1.527988,0.735519,3.059089,2.259542,3.300811
"support vector, raw",3.548774,1.86981,0.510442,0.496885,3.324703,2.874825,3.63244
"neural net, raw",3.814966,1.464103,1.258628,0.897244,3.422857,3.484538,3.614067
"lasso, raw",3.937652,2.913156,0.885464,1.293891,3.603167,2.992704,8.575508
"gradient boosting, smooth",4.252716,2.777382,1.12387,1.002379,2.69409,1.888909,2.822909
"random forest, smooth",4.456104,3.221962,0.713237,1.218171,2.881159,1.612452,2.712499
"support vector, smooth",3.1815,2.672081,0.885002,1.481192,3.127246,2.902908,2.475616
"neural net, smooth",3.710444,2.422465,2.673964,1.703794,3.181856,2.58693,3.07709
"lasso, smooth",3.736577,3.068498,1.078871,1.704994,3.16079,2.795782,2.750178


Unnamed: 0_level_0,1,2,3,4,5,6,7
model for ethanol_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"gradient boosting, raw",5.04167,11.858435,9.577108,12.466413,3.600427,4.613626,4.409907
"random forest, raw",4.794171,12.290522,10.46095,12.536438,2.055974,2.115476,6.873843
"support vector, raw",6.537329,8.652421,10.975658,11.100671,2.096761,3.492083,4.472798
"neural net, raw",8.891137,8.705557,7.676302,10.150694,5.765033,4.680454,4.245341
"lasso, raw",6.932172,8.465609,7.778798,10.315429,4.232619,5.552603,13.124992
"gradient boosting, smooth",6.467885,11.666571,6.471915,11.415082,2.204849,7.024454,3.707076
"random forest, smooth",5.344936,11.153095,6.012367,13.47309,1.46232,1.89528,4.972392
"support vector, smooth",7.06702,7.750786,10.765242,12.739545,2.220373,2.275773,5.761849
"neural net, smooth",7.916431,8.974248,9.574384,13.916989,5.130312,2.813813,4.18043
"lasso, smooth",7.511677,8.047749,13.039112,11.680264,4.029639,3.72955,4.283689


In [14]:
evaluate_models(smoothed_time_slope_predictions, smooth_data, 'norm_rmse')

Unnamed: 0_level_0,1,2,3,4,5,6,7
model for acetate_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"gradient boosting, raw",15.127988,3.288633,5.278399,5.951571,0.818569,1.40819,5.836442
"random forest, raw",14.478635,2.863115,4.491274,8.810528,2.62304,2.124037,2.787715
"support vector, raw",18.098157,3.999449,5.153596,6.533272,1.135949,0.412154,2.910747
"neural net, raw",15.182779,3.951871,4.874685,7.271505,0.60483,0.892305,2.733244
"lasso, raw",17.65986,4.311294,6.070158,9.489308,1.147667,0.505818,3.388615
"gradient boosting, smooth",16.446235,2.465715,4.165313,4.916876,0.640966,0.697664,2.140877
"random forest, smooth",13.231561,2.178769,3.370584,5.84947,0.894694,0.559203,2.149092
"support vector, smooth",16.451711,3.741096,4.520595,6.42673,0.655841,0.657045,2.877802
"neural net, smooth",14.22042,2.839735,5.136134,6.273354,0.499635,0.959971,3.241736
"lasso, smooth",15.497038,3.214314,4.686521,6.636702,0.587223,0.71206,2.701635


Unnamed: 0_level_0,1,2,3,4,5,6,7
model for biomass_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"gradient boosting, raw",1.688382,1.053882,1.792927,5.440164,-7.839678,14.995893,1.842654
"random forest, raw",1.795583,1.128834,2.742142,5.406733,-7.885285,19.439624,1.957058
"support vector, raw",1.881334,1.043345,1.926734,4.053265,-4.351977,14.671213,2.358023
"neural net, raw",1.683853,1.743563,2.334619,4.825652,-4.315554,15.317448,2.454025
"lasso, raw",1.947992,1.342696,2.059113,4.888784,-4.059675,14.107105,3.817748
"gradient boosting, smooth",1.381079,1.061463,1.481145,3.752724,-2.552451,7.273563,2.434427
"random forest, smooth",1.419615,0.8705,1.854196,3.867512,-5.044761,12.031107,2.416817
"support vector, smooth",1.341288,1.084913,1.895068,3.426749,-4.821806,18.013175,3.021286
"neural net, smooth",1.526142,1.180158,1.974083,4.940568,-5.783407,16.331704,2.210944
"lasso, smooth",1.313528,1.08436,1.918653,3.591163,-4.025998,14.609678,2.54094


Unnamed: 0_level_0,1,2,3,4,5,6,7
model for butanol_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"gradient boosting, raw",0.537673,0.778583,0.322142,0.931283,1.093726,1.058248,1.007927
"random forest, raw",0.56721,0.804878,0.41985,1.304177,0.759751,0.340769,1.086527
"support vector, raw",0.424786,0.666579,0.598739,1.125736,0.418103,0.245259,1.295614
"neural net, raw",0.581356,0.505284,1.086056,1.056444,0.649567,0.419721,0.941882
"lasso, raw",0.355289,0.790299,0.657334,1.247293,0.415554,0.500564,0.894451
"gradient boosting, smooth",0.802002,0.725062,0.460471,1.078355,1.602013,1.301964,0.733174
"random forest, smooth",0.839614,0.850106,0.388721,1.46677,0.626321,1.07559,0.970412
"support vector, smooth",0.756186,0.647057,0.456986,1.021646,0.746569,1.05886,1.644978
"neural net, smooth",0.592688,0.977368,0.781097,1.015393,1.612492,0.41934,1.275481
"lasso, smooth",0.592814,0.764488,0.382202,1.19061,0.469801,0.66954,1.846018


Unnamed: 0_level_0,1,2,3,4,5,6,7
model for butyrate_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"gradient boosting, raw",4.110904,0.906039,1.214659,12.18239,1.564612,1.006476,1.956032
"random forest, raw",3.700258,0.721873,1.844931,6.310745,1.384001,0.843066,2.484235
"support vector, raw",3.767751,0.725615,0.61632,4.263271,1.504171,1.072636,2.733824
"neural net, raw",4.050368,0.568173,1.519699,7.698348,1.548578,1.300128,2.719996
"lasso, raw",4.180625,1.130506,1.069131,11.101568,1.630154,1.116619,6.454044
"gradient boosting, smooth",4.51513,1.077816,1.356989,8.6004,1.218867,0.704778,2.12456
"random forest, smooth",4.731068,1.250344,0.86118,10.451895,1.303501,0.601628,2.041464
"support vector, smooth",3.377814,1.036952,1.068573,12.708609,1.414837,1.083114,1.863182
"neural net, smooth",3.939397,0.940084,3.22861,14.618532,1.439543,0.965218,2.31586
"lasso, smooth",3.967142,1.190789,1.302656,14.628835,1.430013,1.043144,2.069821


Unnamed: 0_level_0,1,2,3,4,5,6,7
model for ethanol_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"gradient boosting, raw",1.899395,1.252208,0.757284,1.121131,4.292412,2.830276,0.677649
"random forest, raw",1.806153,1.297835,0.827172,1.127429,2.451122,1.29776,1.05627
"support vector, raw",2.462869,0.913664,0.867871,0.998307,2.499748,2.142254,0.687313
"neural net, raw",3.349641,0.919275,0.606983,0.912874,6.873044,2.871272,0.652361
"lasso, raw",2.611622,0.893938,0.615088,0.927689,5.046108,3.406301,2.016854
"gradient boosting, smooth",2.436707,1.231948,0.511749,1.026583,2.62861,4.309223,0.569649
"random forest, smooth",2.013648,1.177726,0.475412,1.211664,1.743371,1.162679,0.764083
"support vector, smooth",2.662425,0.818455,0.851233,1.145695,2.647118,1.396096,0.885396
"neural net, smooth",2.982431,0.947648,0.757069,1.251585,6.116333,1.726162,0.642386
"lasso, smooth",2.829944,0.849813,1.031033,1.050431,4.804116,2.28793,0.658254


In [15]:
def validate_models(pred_df_dict, ground_truth_df, metric):
    species_set =  ['acetate_Δ', 'biomass_Δ', 'butanol_Δ', 'butyrate_Δ', 'ethanol_Δ']
    test_comp_set=[8, 9, 10]
    index_set = ['gradient boosting, raw', 'random forest, raw', 'support vector, raw', 'neural net, raw', 'lasso, raw', 'gradient boosting, smooth', 'random forest, smooth', 'support vector, smooth', 'neural net, smooth', 'lasso, smooth']
    
    for species in species_set:
        data = {}
        for test_comp in test_comp_set:
            data[test_comp] = []
            for model_name in pred_df_dict.keys():
                # print(model_name)
                if 'none' in model_name:
                    predicted_species_values = list(pred_df_dict[model_name].loc[test_comp][species])
                    measured_species_values = list(ground_truth_df.loc[test_comp][species])

                    r2 = get_pearson_r2(measured_species_values, predicted_species_values)
                    rmse = get_rmse (measured_species_values, predicted_species_values)
                    norm_rmse = get_norm_rmse (measured_species_values, predicted_species_values)
                    
                    if metric == 'r2':
                        data[test_comp].append(r2)
                    elif metric == 'rmse':
                        data[test_comp].append(rmse)
                    elif metric == 'norm_rmse':
                        data[test_comp].append(norm_rmse)
                    else:
                        print('unknown metric')
        species_data = pd.DataFrame.from_dict(data)
        species_data[f'model for {species}'] = index_set
        species_data.set_index(f'model for {species}', inplace=True, drop=True)
        display(species_data)

In [16]:
validate_models(smoothed_time_slope_predictions, smooth_data, 'r2')

Unnamed: 0_level_0,8,9,10
model for acetate_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"gradient boosting, raw",0.695251,0.708648,0.000445
"random forest, raw",0.721632,0.633395,0.181526
"support vector, raw",0.36306,0.713025,0.243179
"neural net, raw",0.686394,0.751332,0.383157
"lasso, raw",0.372231,0.660566,0.259625
"gradient boosting, smooth",0.767657,0.858289,0.474537
"random forest, smooth",0.804404,0.84285,0.049263
"support vector, smooth",0.270464,0.146586,0.339739
"neural net, smooth",0.429027,0.685609,0.356117
"lasso, smooth",0.430116,0.705787,0.307735


Unnamed: 0_level_0,8,9,10
model for biomass_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"gradient boosting, raw",0.003275,0.085362,0.016089
"random forest, raw",0.051139,0.257112,0.146336
"support vector, raw",0.316864,0.201779,0.141968
"neural net, raw",0.031837,0.04897,0.02894
"lasso, raw",0.035892,0.05088,0.090307
"gradient boosting, smooth",0.542142,0.614601,0.044809
"random forest, smooth",0.365453,0.485336,0.000291
"support vector, smooth",0.018332,0.037543,0.018058
"neural net, smooth",0.025637,0.029825,0.01363
"lasso, smooth",0.393666,0.241786,0.158358


Unnamed: 0_level_0,8,9,10
model for butanol_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"gradient boosting, raw",0.319005,0.745806,0.158613
"random forest, raw",0.227973,0.697631,0.264641
"support vector, raw",0.368041,0.82478,0.263369
"neural net, raw",0.402286,0.921607,0.259474
"lasso, raw",0.218129,0.782198,0.217995
"gradient boosting, smooth",0.073723,0.561313,0.392139
"random forest, smooth",0.042544,0.616304,0.352418
"support vector, smooth",0.148892,0.0076,0.309014
"neural net, smooth",0.246876,0.509851,0.176611
"lasso, smooth",0.408112,0.930375,0.24747


Unnamed: 0_level_0,8,9,10
model for butyrate_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"gradient boosting, raw",0.01657614,0.02087628,0.07710156
"random forest, raw",0.101482,0.0008602205,0.1593137
"support vector, raw",0.03142307,0.3101749,0.2345679
"neural net, raw",0.113188,0.002502899,0.1582667
"lasso, raw",0.002536334,0.320915,0.07488922
"gradient boosting, smooth",0.0005382683,0.0007525883,0.08896176
"random forest, smooth",0.001889632,0.06521785,0.107514
"support vector, smooth",0.05219715,0.08231679,0.1180374
"neural net, smooth",0.02001413,0.08077968,0.06347363
"lasso, smooth",1.283687e-32,1.905273e-33,8.114036e-34


Unnamed: 0_level_0,8,9,10
model for ethanol_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"gradient boosting, raw",0.03042,0.421418,0.141704
"random forest, raw",0.07992,0.466753,0.148555
"support vector, raw",0.001005,0.359859,0.192543
"neural net, raw",0.01089,0.432147,0.13367
"lasso, raw",0.009201,0.327195,0.139779
"gradient boosting, smooth",0.076464,0.452119,0.076178
"random forest, smooth",0.076369,0.577691,0.347931
"support vector, smooth",0.046665,0.346754,0.31227
"neural net, smooth",0.001531,0.495182,0.40761
"lasso, smooth",0.038421,0.502265,0.297872


In [17]:
validate_models(smoothed_time_slope_predictions, smooth_data, 'rmse')

Unnamed: 0_level_0,8,9,10
model for acetate_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"gradient boosting, raw",8.654742,11.834665,13.115404
"random forest, raw",6.552701,8.781728,11.627483
"support vector, raw",19.233369,19.746178,15.735269
"neural net, raw",20.413365,21.916809,16.984171
"lasso, raw",14.632897,16.08242,14.045106
"gradient boosting, smooth",10.572703,6.645935,10.869752
"random forest, smooth",8.276958,5.849904,13.088815
"support vector, smooth",9.717637,14.005021,12.532432
"neural net, smooth",15.098219,17.152881,15.571038
"lasso, smooth",24.349038,25.256436,30.371057


Unnamed: 0_level_0,8,9,10
model for biomass_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"gradient boosting, raw",0.194344,0.233497,0.130087
"random forest, raw",0.19453,0.219298,0.129871
"support vector, raw",0.233498,0.189661,0.251193
"neural net, raw",0.207742,0.243774,0.12804
"lasso, raw",0.203507,0.176616,0.198805
"gradient boosting, smooth",0.162831,0.18477,0.127766
"random forest, smooth",0.177852,0.206761,0.127516
"support vector, smooth",0.197605,0.235763,0.125688
"neural net, smooth",0.197024,0.225099,0.124625
"lasso, smooth",0.185156,0.17855,0.142441


Unnamed: 0_level_0,8,9,10
model for butanol_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"gradient boosting, raw",1.370665,0.93658,2.016449
"random forest, raw",2.004503,1.225821,2.66921
"support vector, raw",3.312765,2.671572,3.945932
"neural net, raw",8.630696,8.366137,9.417103
"lasso, raw",3.531241,2.807239,4.376548
"gradient boosting, smooth",2.399982,1.449705,2.33431
"random forest, smooth",3.485592,2.43571,4.333463
"support vector, smooth",2.696543,4.029084,1.931204
"neural net, smooth",2.558064,2.053102,2.870866
"lasso, smooth",8.774744,8.218876,9.551562


Unnamed: 0_level_0,8,9,10
model for butyrate_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"gradient boosting, raw",0.989678,1.778727,2.248571
"random forest, raw",0.814531,1.42581,2.242288
"support vector, raw",1.218,1.778893,2.724475
"neural net, raw",2.722307,3.379482,3.584216
"lasso, raw",1.151016,1.848233,2.761423
"gradient boosting, smooth",0.836383,1.704847,2.308495
"random forest, smooth",1.576266,2.184537,3.220226
"support vector, smooth",5.08617,4.962346,4.258935
"neural net, smooth",1.588342,3.384464,5.565261
"lasso, smooth",0.723742,1.57911,2.343697


Unnamed: 0_level_0,8,9,10
model for ethanol_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"gradient boosting, raw",14.54576,7.406742,7.507447
"random forest, raw",14.866616,6.615571,6.941652
"support vector, raw",15.176726,12.042278,5.059071
"neural net, raw",16.659967,7.550533,10.128783
"lasso, raw",14.540479,8.858667,5.630326
"gradient boosting, smooth",14.014263,7.587955,7.40232
"random forest, smooth",15.986867,6.479254,5.39544
"support vector, smooth",14.758085,8.927652,9.849678
"neural net, smooth",14.435292,9.427256,9.730308
"lasso, smooth",14.570995,6.88097,9.468837


In [18]:
validate_models(smoothed_time_slope_predictions, smooth_data, 'norm_rmse')

Unnamed: 0_level_0,8,9,10
model for acetate_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"gradient boosting, raw",3.529819,4.008567,19.129185
"random forest, raw",2.672506,2.974494,16.95901
"support vector, raw",7.84429,6.688307,22.950331
"neural net, raw",8.325549,7.42353,24.77189
"lasso, raw",5.967997,5.447341,20.485181
"gradient boosting, smooth",4.312055,2.251071,15.853838
"random forest, smooth",3.375741,1.981444,19.090404
"support vector, smooth",3.963319,4.743697,18.278904
"neural net, smooth",6.157778,5.809921,22.710795
"lasso, smooth",9.930706,8.554708,44.29704


Unnamed: 0_level_0,8,9,10
model for biomass_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"gradient boosting, raw",2.117868,1.409905,4.644482
"random forest, raw",2.119905,1.324172,4.636763
"support vector, raw",2.544557,1.145212,8.968305
"neural net, raw",2.263879,1.47196,4.571404
"lasso, raw",2.217732,1.066447,7.097905
"gradient boosting, smooth",1.774459,1.115683,4.561588
"random forest, smooth",1.938151,1.248465,4.552688
"support vector, smooth",2.153414,1.423586,4.487417
"neural net, smooth",2.147078,1.359197,4.449471
"lasso, smooth",2.017747,1.078121,5.085539


Unnamed: 0_level_0,8,9,10
model for butanol_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"gradient boosting, raw",0.997823,0.426891,1.783418
"random forest, raw",1.459248,0.558726,2.360742
"support vector, raw",2.411643,1.217697,3.48992
"neural net, raw",6.283017,3.813268,8.328815
"lasso, raw",2.57069,1.279534,3.870772
"gradient boosting, smooth",1.747151,0.660772,2.064545
"random forest, smooth",2.537459,1.110192,3.832666
"support vector, smooth",1.963042,1.836448,1.708024
"neural net, smooth",1.862232,0.9358,2.539094
"lasso, smooth",6.387881,3.746146,8.447735


Unnamed: 0_level_0,8,9,10
model for butyrate_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"gradient boosting, raw",1.165584,4.161147,2.539274
"random forest, raw",0.959307,3.335534,2.532178
"support vector, raw",1.434489,4.161537,3.076704
"neural net, raw",3.206172,7.905947,4.047596
"lasso, raw",1.355599,4.323749,3.118429
"gradient boosting, smooth",0.985042,3.988314,2.606945
"random forest, smooth",1.856433,5.110499,3.636548
"support vector, smooth",5.99019,11.608895,4.809545
"neural net, smooth",1.870655,7.917602,6.284757
"lasso, smooth",0.852381,3.694163,2.646698


Unnamed: 0_level_0,8,9,10
model for ethanol_Δ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"gradient boosting, raw",1.917731,0.577069,5.042026
"random forest, raw",1.960034,0.515428,4.662036
"support vector, raw",2.000919,0.93823,3.397689
"neural net, raw",2.196471,0.588272,6.802524
"lasso, raw",1.917035,0.690191,3.781345
"gradient boosting, smooth",1.847658,0.591188,4.971422
"random forest, smooth",2.107729,0.504807,3.623595
"support vector, smooth",1.945725,0.695565,6.615076
"neural net, smooth",1.903167,0.73449,6.534906
"lasso, smooth",1.921058,0.536106,6.359302


In [19]:
def get_feature_importances_slope(model):
    compounds = ['Δ acetate', 'Δ biomass', 'Δ butanol', 'Δ butyrate', 'Δ ethanol']
    features = ['time','acetate', 'biomass', 'butanol', 'butyrate', 'ethanol', 'CO', 'CO2', 'H2', 'flow rate']

    array_list = []

    for i in range(5):
        feature_importance_array = model.estimators_[i].steps[1][1].best_estimator_.feature_importances_
        array_list.append(list(feature_importance_array))
    df = pd.DataFrame(array_list, columns = features, index = compounds)  
    return df

In [20]:
print('gradient boosting, smooth, test comp = none 7')
display(get_feature_importances_slope(trained_models['gradient boosting, smooth, test comp = none']))
print('random forest, smooth, test comp = none 7')
display(get_feature_importances_slope(trained_models['random forest, smooth, test comp = none']))

gradient boosting, smooth, test comp = none 7


Unnamed: 0,time,acetate,biomass,butanol,butyrate,ethanol,CO,CO2,H2,flow rate
Δ acetate,0.031234,0.210618,0.050929,0.484389,0.173268,0.037531,0.003288,0.001658,0.007084,0.0
Δ biomass,0.309585,0.3345,0.074891,0.067898,0.132752,0.044317,0.002137,0.006888,0.026866,0.000165
Δ butanol,0.005838,0.036855,0.026657,0.747261,0.002289,0.028885,0.002031,8.9e-05,0.149846,0.000248
Δ butyrate,0.107786,0.247724,0.131797,0.184469,0.047701,0.255685,0.012812,0.000609,0.011412,4e-06
Δ ethanol,0.014842,0.057495,0.200669,0.126635,0.03684,0.541995,0.001172,0.018409,0.001922,2e-05


random forest, smooth, test comp = none 7


Unnamed: 0,time,acetate,biomass,butanol,butyrate,ethanol,CO,CO2,H2,flow rate
Δ acetate,0.030923,0.121911,0.056506,0.649889,0.081526,0.035074,0.003749,0.01126,0.005789,0.003372266
Δ biomass,0.400425,0.564511,0.0,0.010213,0.024851,0.0,0.0,0.0,0.0,0.0
Δ butanol,0.012904,0.039293,0.026376,0.734226,0.016492,0.013682,0.000522,0.0,0.156505,0.0
Δ butyrate,0.081806,0.205956,0.148799,0.146159,0.057887,0.326842,0.001918,0.004567,0.026066,2.684906e-07
Δ ethanol,0.058845,0.0511,0.165482,0.092359,0.030632,0.56154,0.005146,0.027542,0.007336,1.74364e-05
