# Cross-Validation

##### Author: Hrönn Kjartansdóttir
##### Cross-Validation of parameters of interest using CO2 model

In [None]:
# Import all packages needed for the model
import numpy as np
import scipy
from scipy.integrate import solve_ivp
from scipy.optimize import least_squares

import matplotlib.pyplot as plt
import pandas as pd

import os
import statistics as st

In [None]:
# All code for pressure simulation

        # Molecular weight of liquids:
M_H2  = 2.016*1e-3
M_CO2 = 44.009*1e-3
M_CH4 = 16.043*1e-3
M_H2O = 18.015*1e-3

        #Liquid density:
density_l = 992.2 # kg/m³
        
        #Sandrock porosity:
pore = 0.2 # %

        # Dissolution/evaporation rate constants:
K_diss_H2 = 1.05e-2
K_diss_CO2 = 1.87e-3
K_diss_CH4 = 9.32e-5

K_evap_CH4 = 1.81e-2

        
        # Maximum biomass growth rate:
mu_max = 0.0427 # 1/s
        
        #temperature:
temperature = 273+40 # Kelvin (K)
        
        #max uptake rates:
r_H2_max = 1 # mmol/g*h
r_CH4_min = 0 # mmol/g*h
r_CO2_max = r_H2_max / 4 # mmol/g*h

        #Henry solubility constants:
H_H2 = 7.13e-9*1e3
H_CO2 = 2.31e-7*1e3
H_CH4 = 1.08e-8*1e3

        # Initial mole fraction for H2 and CO2 in gas phase -> 10% H2, 2.5% CO2:
x_g_H2_th = 0.10 # <- Change accordingly
x_g_CO2_th = 0.025 # <- Change accordingly

        # Pressure in the reactor at end of the evaporation phase
pressureCH4resid = 2.06e5

#########################################################################################################################################


############################### RK45 ###############################################
# RK45() to track changes in parameters over time
def rk45(t, y, U_CO2, K_Monod_H2, K_Monod_CO2, d):
    w_g_H2, w_g_CO2, w_g_CH4, w_l_H2, w_l_CO2, w_l_CH4, m, density_g, S_liq = y
    
            #molecular weight of gasses:
    M_g = ((w_g_H2/M_H2) + (w_g_CO2/M_CO2) + (w_g_CH4/M_CH4))**-1
    
            #gas pressure:
    R_gas = 8.314 / M_g
    pressure_g = density_g * R_gas * temperature

              #mole fractions of gas phase:
    x_g_H2 = (w_g_H2/M_H2)*M_g
    x_g_CO2 = (w_g_CO2/M_CO2)*M_g
    x_g_CH4 = (w_g_CH4/M_CH4)*M_g

            #mass fraction of liquid H2O:
    w_l_H2O = 1 - (w_l_H2 + w_l_CO2 + w_l_CH4)

            #Mass(molecular weight) of all liquids
    M_l = ((w_l_H2/M_H2) + (w_l_CO2/M_CO2) + (w_l_CH4/M_CH4) + (w_l_H2O/M_H2O))**-1

            #mole fractions of liquid phase:
    x_l_H2 = (w_l_H2/M_H2)*M_l
    x_l_CO2 = (w_l_CO2/M_CO2)*M_l
    x_l_CH4 = (w_l_CH4/M_CH4)*M_l

            #saturated mole fraction
    x_l_sat_H2 = (M_l/density_l) * H_H2 * x_g_H2 * pressure_g
    x_l_sat_CO2 = (M_l/density_l) * H_CO2 * x_g_CO2 * pressure_g
    x_l_sat_CH4 = (M_l/density_l) * H_CH4 * x_g_CH4 * pressure_g
             
            #source term due to dissolution/evaporation
    R_diss_H2  = K_diss_H2*(x_l_sat_H2 - x_l_H2)
    R_diss_CO2 = K_diss_CO2*(x_l_sat_CO2 - x_l_CO2)

    if x_l_CH4 < x_l_sat_CH4:
        R_diss_CH4 = K_diss_CH4*(x_l_sat_CH4 - x_l_CH4)
    else:
        R_diss_CH4 = K_evap_CH4*(x_l_sat_CH4 - x_l_CH4)


    R_diss_tot = R_diss_H2 + R_diss_CO2 + R_diss_CH4

            #Source for saturation of liquid phase
    source_S_liq = (1/(pore*density_l))*R_diss_tot
       
            #saturation of gas phase
    S_gas = 1 - S_liq
       
            #uptake/production rates of H2, CO2, CH4 (r_(i)):

    r_CO2  = min(U_CO2 * x_l_CO2 , r_CO2_max) #mol of CO2 / kg of dry biomass / hour
    mu_monod = mu_max * (x_l_H2 / (K_Monod_H2 + x_l_H2)) * (x_l_CO2 / (K_Monod_CO2 + x_l_CO2))
    mu = min(mu_monod, (r_CO2 * mu_max / r_CO2_max))
        

    r_H2 = r_CO2 * 4
    r_CH4 = r_CO2 - (5.855 * mu)

            #Biomass change over time
    source_m = ((mu - d) * m)/3600 # divide by 3600 to get all units in si units (from hour to seconds)

            # Source term for production/uptake rates:
    R_uptake_H2 = (-r_H2*m*M_H2*S_liq*pore)/3600
    R_uptake_CO2 = (-r_CO2*m*M_CO2*S_liq*pore)/3600
    R_uptake_CH4 = (r_CH4*m*M_CH4*S_liq*pore)/3600


            # Mass fractions change over time:
    source_w_l_H2 = 1/(pore*density_l*S_liq) * (R_uptake_H2 + R_diss_H2 - (w_l_H2 * R_diss_tot))
    source_w_l_CO2 = 1/(pore*density_l*S_liq) * (R_uptake_CO2 + R_diss_CO2 - (w_l_CO2 * R_diss_tot))
    source_w_l_CH4 = 1/(pore*density_l*S_liq) * (R_uptake_CH4 + R_diss_CH4 - (w_l_CH4 * R_diss_tot))

            # Gas density change over time:
    source_density_g = -(R_diss_tot / (pore * S_gas)) * (1 - (density_g / density_l))
        
             # Mass fraction change over time:
    source_w_g_H2 = -(1/(pore*density_g*S_gas)) * (R_diss_H2 - w_g_H2 * R_diss_tot) 
    source_w_g_CO2 = -(1/(pore*density_g*S_gas)) * (R_diss_CO2 - w_g_CO2 * R_diss_tot) 
    source_w_g_CH4 = -(1/(pore*density_g*S_gas)) * (R_diss_CH4 - w_g_CH4 * R_diss_tot) 
 
            # Variables to be returned:
    return [source_w_g_H2, source_w_g_CO2, source_w_g_CH4, source_w_l_H2, source_w_l_CO2, source_w_l_CH4, source_m, source_density_g, source_S_liq]


#########################################################################################################################################



############################### Simulated pressure fill 1 ###############################################
# Pressure generation function for fill 1
def pressure_fill1(data, time, in_pressure, theta):
     
                # Initial mass fraction of components in liquid phase:
    w_l_H2_in  = 0
    w_l_CO2_in = 0
    w_l_CH4_in = 0
       
                # Initial Saturation of the liquid phase:
    S_liq_in = 0.35
    
            #Initial gas pressure:
    pressure_g_in = in_pressure*1e5 #Pa
    
            # Initial mole fraction of components in gas phase:
    x_g_H2_in  = (pressure_g_in - 1e5) / pressure_g_in * x_g_H2_th
    x_g_CO2_in = (pressure_g_in - 1e5) / pressure_g_in * x_g_CO2_th 
    x_g_CH4_in = 1 - x_g_H2_in - x_g_CO2_in
    
            # Initial molecular mass of all gasses:
    M_g = (x_g_H2_in*M_H2) + (x_g_CO2_in*M_CO2) + (x_g_CH4_in*M_CH4)

            # Initial gas density:
    R_gas_in = 8.314/M_g
    density_g_in = pressure_g_in/R_gas_in/temperature

            # Initial mass fraction of components in gas phase:
    w_g_H2_in  = (x_g_H2_in * M_H2) / M_g
    w_g_CO2_in = (x_g_CO2_in * M_CO2) / M_g
    w_g_CH4_in = (x_g_CH4_in * M_CH4) / M_g
    
            # Time array in seconds and end time:
    time_sim = np.array(time)-np.array(time[0])
    time_sim = time_sim*24*60*60                
    time_end = time_sim[-1]
    
            # Initial values for the parameters of interest:
    BM_in = 10**theta[0]
    U_CO2 = 10**theta[1]
    K_Monod_H2 = 10**theta[2]
    K_Monod_CO2 = 10**theta[3]
    d = 10**theta[4]
    
            # Solve_ivp to solve for RK45():
    sol = solve_ivp(rk45, [0,time_end],y0=[w_g_H2_in, w_g_CO2_in, w_g_CH4_in, w_l_H2_in, w_l_CO2_in, w_l_CH4_in, BM_in, density_g_in, S_liq_in], 
                    args=(U_CO2, K_Monod_H2, K_Monod_CO2, d), t_eval=time_sim, atol=1e-8, rtol=1e-6)
    
             # Save the tracked and returned values from solve_ivp():
    w_g_H2 = sol.y[0]
    w_g_CO2 = sol.y[1]
    w_g_CH4 = sol.y[2]
    w_l_H2 = sol.y[3] 
    w_l_CO2 = sol.y[4]
    w_l_CH4 = sol.y[5]
    biomass = sol.y[6]
    density_g = sol.y[7]
    S_liq = sol.y[8]

            # Calculate the new initial saturation of liquid phase to use as input for the next fill:
    S_liq_in_new = S_liq[-1] * (1- (1-0.557)*w_l_CH4[-1]-w_l_H2[-1]-w_l_CO2[-1])
    
            # Calculate the new initial molecular weight of gasses:
    M_g_new = ((w_g_H2/M_H2) + (w_g_CO2/M_CO2) + (w_g_CH4/M_CH4))**-1
    
            # Calculate the pressure change over time:
    R_gas = 8.314 / M_g_new
    pressure_g_bar = density_g * R_gas * temperature * 1e-5 #to change from pascal to bar * 1e-5
    pressure_g_pascal = density_g * R_gas * temperature

            # Calculate the initial mass fraction of CH4 to input into next fill:
    w_l_CH4_in_new = w_l_CH4[-1]*0.557 # 55.7% of the end value go to the next fill
    
            # Calculate the initial biomass to input into next fill:
    BM_in_new = biomass[-1] /(1- (1-0.557)*w_l_CH4[-1]-w_l_H2[-1]-w_l_CO2[-1])

            # Variables to be returned:
    return w_l_CH4_in_new, pressure_g_pascal, S_liq_in_new, BM_in_new, biomass


#########################################################################################################################################


############################### Simulated pressure all other fills ###############################################
# Pressure generation function for all fills except 1  
def pressure_otherfills(data, time, in_pressure, theta, w_l_CH4_in_new, S_liq_in, BM_in_new):
    
            # Initial mass fraction of H2 and CO2 in liquid phase:
    w_l_H2_in  = 0
    w_l_CO2_in = 0
    
             # Initial gas pressure:
    pressure_g_in = in_pressure*1e5  # Pa

            # Initial mole fraction of components in gas phase:
    x_g_H2_in  = (pressure_g_in - pressureCH4resid) / pressure_g_in * x_g_H2_th
    x_g_CO2_in = (pressure_g_in - pressureCH4resid) / pressure_g_in * x_g_CO2_th 
    x_g_CH4_in = 1 - x_g_H2_in - x_g_CO2_in
   
            # Initial molecular weight of gasses:
    M_g = (x_g_H2_in*M_H2) + (x_g_CO2_in*M_CO2) + (x_g_CH4_in*M_CH4)

            #Initial gas density:
    R_gas_in = 8.314/M_g
    density_g_in = pressure_g_in/R_gas_in/temperature

            # Initial mass fraction of components in gas phase:
    w_g_H2_in  = (x_g_H2_in * M_H2) / M_g
    w_g_CO2_in = (x_g_CO2_in * M_CO2) / M_g
    w_g_CH4_in = (x_g_CH4_in * M_CH4) / M_g
    
            # Time array in seconds and end time:
    time_sim = np.array(time) - np.array(time[0])
    time_sim = time_sim*24*60*60               
    time_end = time_sim[-1]
    
           # Initial values for the parameters of interest - except biomass:
    U_CO2 = 10**theta[1]
    K_Monod_H2 = 10**theta[2]
    K_Monod_CO2 = 10**theta[3]
    d = 10**theta[4]

            # Solve_ivp to solve for RK45():
    sol = solve_ivp(rk45, [0,time_end],y0=[w_g_H2_in, w_g_CO2_in, w_g_CH4_in, w_l_H2_in, w_l_CO2_in, w_l_CH4_in_new, BM_in_new, density_g_in, S_liq_in], 
                    args=(U_CO2, K_Monod_H2, K_Monod_CO2, d), t_eval=time_sim, atol=1e-8, rtol=1e-6)
    
                # Save the tracked and returned values from solve_ivp(): 
    w_g_H2 = sol.y[0]
    w_g_CO2 = sol.y[1]
    w_g_CH4 = sol.y[2]
    w_l_H2 = sol.y[3] 
    w_l_CO2 = sol.y[4]
    w_l_CH4 = sol.y[5]
    biomass = sol.y[6]
    density_g = sol.y[7]
    S_liq = sol.y[8]
    
                # Calculate the new initial saturation of liquid phase to use as input for the next fill:
    S_liq_in_new = S_liq[-1] * (1- (1-0.557)*w_l_CH4[-1]-w_l_H2[-1]-w_l_CO2[-1])
    
                # Calculate the new initial molecular weight of gasses:
    M_g_new = ((w_g_H2/M_H2) + (w_g_CO2/M_CO2) + (w_g_CH4/M_CH4))**-1
   
                # Calculate the pressure change over time: 
    R_gas = 8.314 / M_g_new
    pressure_g_bar = density_g * R_gas * temperature * 1e-5 #to change from pascal to bar * 1e-5
    pressure_g_pascal = density_g * R_gas * temperature

                # Calculate the initial mass fraction of CH4 to input into next fill:    
    w_l_CH4_in_new = w_l_CH4[-1]*0.557 # 55.7% of the end value go to the next fill
    
                # Calculate the initial biomass to input into next fill:
    BM_in_new = biomass[-1] /(1- (1-0.557)*w_l_CH4[-1]-w_l_H2[-1]-w_l_CO2[-1])

                # Variables to be returned:
    return w_l_CH4_in_new, pressure_g_pascal, S_liq_in_new, BM_in_new, biomass


#########################################################################################################################################


############################### Generate simulated pressure for all fills together ###############################################
#Generate pressure_g_sim -> simulated pressure
def simulated_pressure(df_time, in_pressure, theta0):
    pressure = []
    biomass = []
    pressure_g_sim_concatenated = []
    biomass_sim = []
    w_l_CH4_end_all = []
    S_liq_end_all = []
    
    for i in range(1):
        [w_l_CH4_in, pressure_g_sim_fill1, S_liq_in, BM_in, biomass_fill1] = pressure_fill1(df_time[0], in_pressure[0], theta0)
        pressure.append(pressure_g_sim_fill1)
        biomass.append(biomass_fill1)
        w_l_CH4_end_all.append(w_l_CH4_in)
        S_liq_end_all.append(S_liq_in)
        
    for i in range(1, len(df_pressure)):
        [w_l_CH4_in, pressure_g_sim_fills, S_liq_in, BM_in, biomass_fills] = pressure_otherfills(df_time[i], in_pressure[i], theta0, w_l_CH4_in, S_liq_in, BM_in)
        pressure.append(pressure_g_sim_fills)
        biomass.append(biomass_fills)
        w_l_CH4_end_all.append(w_l_CH4_in)
        S_liq_end_all.append(S_liq_in)
    
    gas_pressure = np.concatenate(pressure[0:len(pressure)])
    pressure_g_sim_concatenated.append(gas_pressure)
    pressure_g_sim_concatenated = pressure_g_sim_concatenated[0]
    
    biomass_concatenated = np.concatenate(biomass[0:len(biomass)])
    biomass_sim.append(biomass_concatenated)
    biomass_sim = biomass_sim[0]
    biomass_endvalue = biomass_sim[-1]


    w_l_CH4_endvalue = w_l_CH4_end_all[-1]
    
    S_liq_endvalue = S_liq_end_all[-1]
    
    return pressure, biomass, biomass_endvalue, biomass_concatenated, w_l_CH4_endvalue, S_liq_endvalue
    
    
[pressure_g_sim, biomass, biomass_endvalue, biomass_concatenated, w_l_CH4_endvalue, S_liq_endvalue] = simulated_pressure(df_time, in_pressure, theta0)


#########################################################################################################################################



############################### Generate experimental pressure in Pa ###############################################
# Generate experimental pressure in pascals

exp_pressure = [[] for i in range(len(df_pressure))]

def experimental_pressure(df_pressure):
    
    for i in range(len(df_pressure)):

        for pressure in df_pressure[i]:
            pres = pressure * 1e5
            exp_pressure[i].append(pres)
    return exp_pressure

exp_pressure = experimental_pressure(df_pressure)

In [None]:
# Read in the experimental data for all reactors

df_reactor2 = pd.read_csv("C:\\Users\\hronn\\Desktop\\Jupyter\\reactor1\\U_CO2\\reactor1_exppressure.csv", sep=";")
df_reactor4 = pd.read_csv("C:\\Users\\hronn\\Desktop\\Jupyter\\reactor2\\U_CO2\\reactor2exppressure.csv", sep=";")
df_reactor6 = pd.read_csv("C:\\Users\\hronn\\Desktop\\Jupyter\\reactor3\\U_CO2\\reactor3_exppressure.csv", sep=";")
df_reactor8 = pd.read_csv("C:\\Users\\hronn\\Desktop\\Jupyter\\reactor4\\U_CO2\\reactor4_exppressure.csv", sep=";")


############################### Generate the dataframes for each reactor which hold indexes of each fill and pressure without gap between fills ###############################################

def dataframe(dataframe, time):
            #dataframe indexes for NaN's to list
    df = dataframe.index.get_indexer(dataframe.index[dataframe.notnull()]).tolist()
    NaN = dataframe.index.get_indexer(dataframe.index[dataframe.isnull()]).tolist()
    
            #Group the indexes of NaN's for the dataframes pressure values -> where the shift between fills occurs
    def indices(values):
        
        prev_number = min(values) if values else None
        indices = list()

        for number in sorted(values):
            if number != prev_number+1:
                indices.append([number])
            elif len(indices[-1]) > 1:
                indices[-1][-1] = number
            else:
                indices[-1].append(number)
            prev_number = number
            
        return indices
    
    df_indices = indices(df)
    nan_indices = indices(NaN)
    
    df_pressure = []

    
    def values(dataframe):
        for i in df_indices:
        
            values = dataframe.iloc[i[0]:(i[1]+1)].tolist()
            df_pressure.append(values)
            
        return df_pressure
    
    df_pressure = values(dataframe) 
    
    in_pres = []
    end_pres = []
    
    for i in range(len(df_pressure)):
    
        inn = df_pressure[i][0]
        out = df_pressure[i][-1]
    
        in_pres.append(inn)
        end_pres.append(out)
    
    
    df_time = []
    
    for i in df_indices:
        time_values = time.iloc[i[0]:(i[1]+1)].tolist()
        df_time.append(time_values)
            
    
    return df_indices, nan_indices, df_pressure, in_pres, end_pres, df_time

[reactor1_df_indices, reactor1_nan_indices, reactor1_df_pressure, reactor1_in_pressure, reactor1_end_pressure, reactor1_df_time ] = dataframe(df_reactor1["pressure_bar"], df_reactor1["days"])
[reactor2_df_indices, reactor2_nan_indices, reactor2_df_pressure, reactor2_in_pressure, reactor2_end_pressure, reactor2_df_time ] = dataframe(df_reactor2["pressure_bar"], df_reactor2["days"])
[reactor3_df_indices, reactor3_nan_indices, reactor3_df_pressure, reactor3_in_pressure, reactor3_end_pressure, reactor3_df_time ] = dataframe(df_reactor3["pressure_bar"], df_reactor3["days"])
[reactor4_df_indices, reactor4_nan_indices, reactor4_df_pressure, reactor4_in_pressure, reactor4_end_pressure, reactor4_df_time ] = dataframe(df_reactor4["pressure_bar"], df_reactor4["days"])


############################### Generate experimental pressure and time in days for each reactor ###############################################

reactor1_exp_pressure = experimental_pressure(reactor1_df_pressure)
reactor2_exp_pressure = experimental_pressure(reactor2_df_pressure)
reactor3_exp_pressure = experimental_pressure(reactor3_df_pressure)
reactor4_exp_pressure = experimental_pressure(reactor4_df_pressure)

reactor1_days = time(reactor1_df_time)
reactor2_days = time(reactor2_df_time)
reactor3_days = time(reactor3_df_time)
reactor4_days = time(reactor4_df_time)


In [None]:
############################### Read in the optimal values and theta0 generated previously for each reactor  ###############################################

reactor1_df_new = pd.read_csv("C:\\Users\\hronn\\Desktop\\Jupyter\\cross_validation\\optimal_values_reactor1_10.csv", sep=",")
reactor1_optimal_values = reactor1_df_new["optimal_values"]
reactor1_theta0 = reactor1_df_new["theta0"]

reactor2_df_new = pd.read_csv("C:\\Users\\hronn\\Desktop\\Jupyter\\cross_validation\\optimal_values_reactor2_10.csv", sep=",")
reactor2_optimal_values = reactor2_df_new["optimal_values"]
reactor2_theta0 = reactor2_df_new["theta0"]


reactor3_df_new = pd.read_csv("C:\\Users\\hronn\\Desktop\\Jupyter\\cross_validation\\optimal_values_reactor3_10.csv", sep=",")
reactor3_optimal_values = reactor3_df_new["optimal_values"]
reactor3_theta0 = reactor3_df_new["theta0"]

reactor4_df_new = pd.read_csv("C:\\Users\\hronn\\Desktop\\Jupyter\\cross_validation\\optimal_values_reactor4_10.csv", sep=",")
reactor4_optimal_values = reactor4_df_new["optimal_values"]
reactor4_theta0 = reactor4_df_new["theta0"]


############################### Fix the optimal values to an array so they can be used for the crosss validation  ###############################################

reactor1_optimal_list = []
reactor1_optimal_array = []
reactor1_theta0_list = []
reactor1_theta0_array = []

reactor2_optimal_list = []
reactor2_optimal_array = []
reactor2_theta0_list = []
reactor2_theta0_array = []

reactor3_optimal_list = []
reactor3_optimal_array = []
reactor3_theta0_list = []
reactor3_theta0_array = []

reactor4_optimal_list = []
reactor4_optimal_array = []
reactor4_theta0_list = []
reactor4_theta0_array = []

def format_dataframe(theta0, theta0_list, theta0_array, optimal_values, optimal_list, optimal_array):
    
    #Format theta0 to numpy array
    for i in range(len(theta0)):
        list_theta = theta0[i].strip('][').split(', ')
        theta0_list.append(list_theta)
    for i in range(len(theta0_list)):
        theta = np.array(theta0_list[i],dtype=float)
        theta0_array.append(theta)
    
    #Format optimal_values to numpy array of floats
    for i in range(len(optimal_values)):
        optimal = ' '.join(optimal_values[i].split())
        optimal = optimal.replace(" ", ", ").strip('][').split(', ')
        optimal_list.append(optimal)
    for i in range(len(optimal_list)):
        if "" in optimal_list[i]:
            optimal_list[i].remove('')
        else:
            continue
    for i in range(len(optimal_list)):
        if "" in optimal_list[i]:
            optimal_list[i].remove('')
        else:
            continue
    for i in range(len(optimal_list)):
        float_optimal = np.array(optimal_list[i],dtype=float)
        optimal_array.append(float_optimal)
    return theta0_array, optimal_array


[reactor1_theta0_array, reactor1_opt_array] = format_dataframe(reactor1_theta0, reactor1_theta0_list, reactor1_theta0_array, reactor1_optimal_values, reactor1_optimal_list, reactor1_optimal_array)
[reactor2_theta0_array, reactor2_opt_array] = format_dataframe(reactor2_theta0, reactor2_theta0_list, reactor2_theta0_array, reactor2_optimal_values, reactor2_optimal_list, reactor2_optimal_array)
[reactor3_theta0_array, reactor3_opt_array] = format_dataframe(reactor3_theta0, reactor3_theta0_list, reactor3_theta0_array, reactor3_optimal_values, reactor3_optimal_list, reactor3_optimal_array)
[reactor4_theta0_array, reactor4_opt_array] = format_dataframe(reactor4_theta0, reactor4_theta0_list, reactor4_theta0_array, reactor4_optimal_values, reactor4_optimal_list, reactor4_optimal_array)


def add_arrays_to_dataframe(reactor_df_new, reactor_theta0_array, reactor_opt_array):
    reactor_df_new["theta0_array"] = reactor_theta0_array
    reactor_df_new["optimal_array"] = reactor_opt_array
    del reactor_df_new["optimal_values"]
    del reactor_df_new["theta0"]
    return reactor_df_new


reactor1_df_new = add_arrays_to_dataframe(reactor1_df_new, reactor1_theta0_array, reactor1_opt_array)
reactor2_df_new = add_arrays_to_dataframe(reactor2_df_new, reactor2_theta0_array, reactor2_opt_array)
reactor3_df_new = add_arrays_to_dataframe(reactor3_df_new, reactor3_theta0_array, reactor3_opt_array)
reactor4_df_new = add_arrays_to_dataframe(reactor4_df_new, reactor4_theta0_array, reactor4_opt_array)

############################### generate final sorted dataframes and find the optimal_values that have the lowest norm_res  ###############################################

reactor1_df_final = reactor1_df_new.sort_values(by=['norm_res'])
reactor1_min_idnormres = reactor1_df_final['norm_res'].idxmin()
reactor1_min_normres = reactor1_df_final['norm_res'][reactor1_min_idnormres]
reactor1_min_opt = reactor1_df_final["optimal_array"][reactor1_min_idnormres]

reactor2_df_final = reactor2_df_new.sort_values(by=['norm_res'])
reactor2_min_idnormres = reactor2_df_final['norm_res'].idxmin()
reactor2_min_normres = reactor2_df_final['norm_res'][reactor2_min_idnormres]
reactor2_min_opt = reactor2_df_final["optimal_array"][reactor2_min_idnormres]

reactor3_df_final = reactor3_df_new.sort_values(by=['norm_res'])
reactor3_min_idnormres = reactor3_df_final['norm_res'].idxmin()
reactor3_min_normres = reactor3_df_final['norm_res'][reactor3_min_idnormres]
reactor3_min_opt = reactor3_df_final["optimal_array"][reactor3_min_idnormres]

reactor4_df_final = reactor4_df_new.sort_values(by=['norm_res'])
reactor4_min_idnormres = reactor4_df_final['norm_res'].idxmin()
reactor4_min_normres = reactor4_df_final['norm_res'][reactor4_min_idnormres]
reactor4_min_opt = reactor4_df_final["optimal_array"][reactor4_min_idnormres]

In [None]:
############################### pressure_diff function for each reactor ###############################################

def reactor1_pressure_diff(theta0):

    [pressure_g_sim, biomass_sim] = simulated_pressure(reactor1_df_pressure, reactor1_in_pressure, reactor1_df_time, theta0)
    pressure_g_exp = reactor1_exp_pressure

    diff = pressure_g_exp - pressure_g_sim
    return diff


def reactor2_pressure_diff(theta0):

    [pressure_g_sim, biomass_sim] = simulated_pressure(reactor2_df_pressure, reactor2_in_pressure, reactor2_df_time, theta0)
    pressure_g_exp = reactor2_exp_pressure

    diff = pressure_g_exp - pressure_g_sim
    return diff


def reactor3_pressure_diff(theta0):

    [pressure_g_sim, biomass_sim] = simulated_pressure(reactor3_df_pressure, reactor3_in_pressure, reactor3_df_time, theta0)
    pressure_g_exp = reactor3_exp_pressure

    diff = pressure_g_exp - pressure_g_sim
    return diff


def reactor4_pressure_diff(theta0):

    [pressure_g_sim, biomass_sim] = simulated_pressure(reactor4_df_pressure, reactor4_in_pressure, reactor4_df_time, theta0)
    pressure_g_exp = reactor4_exp_pressure

    diff = pressure_g_exp - pressure_g_sim
    return diff

# Testing Optimal Values from Reactors 2, 3, and 4 as Theta0 for Reactor 1

In [None]:
############################### Test optimal values from other reactors as theta0 for reactor 1  ###############################################

u_bound = [2, 9, -2, -2, -2]
l_bound = [-6, 3, -8, -8, -5]

R1_norm_res_theta0fromR2 = []
R1_optimal_values_theta0fromR2 = []

R1_norm_res_theta0fromR3 = []
R1_optimal_values_theta0fromR3 = []

R1_norm_res_theta0fromR4 = []
R1_optimal_values_theta0fromR4 = []

def optimal_values(reactor_pressure_diff, normres_array, optval_array, theta0):
    
    res = least_squares(reactor_pressure_diff, theta0, bounds=(l_bound, u_bound))
    optimal = res.x
    optval_array.append(optimal)
    
    residuals = res.fun
    norm = np.linalg.norm(residuals)
    normres_array.append(norm)
    return optval_array, normres_array

[opt_R1_CV_R2, normres_R1_CV_R2] = optimal_values(reactor1_pressure_diff, R1_norm_res_theta0fromR2, R1_optimal_values_theta0fromR4, reactor2_min_opt)
[opt_R1_CV_R3, normres_R1_CV_R3] = optimal_values(reactor1_pressure_diff, R1_norm_res_theta0fromR3, R1_optimal_values_theta0fromR6, reactor3_min_opt)
[opt_R1_CV_R4, normres_R1_CV_R4] = optimal_values(reactor1_pressure_diff, R1_norm_res_theta0fromR4, R1_optimal_values_theta0fromR8, reactor4_min_opt)

In [None]:
#Fix the formatting of newly generated norm_res and optimal_values to fit the dataframe already existing for least_squares tries

opt_R1_CV_R2 = np.concatenate(opt_R1_CV_R2, axis=0)
opt_R1_CV_R3 = np.concatenate(opt_R1_CV_R3, axis=0)
opt_R1_CV_R4 = np.concatenate(opt_R1_CV_R4, axis=0)

normres_R1_CV_R2 = str(normres_R1_CV_R2).strip('[]')
normres_R1_CV_R2 = float(normres_R1_CV_R2)
normres_R1_CV_R3 = str(normres_R1_CV_R3).strip('[]')
normres_R1_CV_R3 = float(normres_R1_CV_R3)
normres_R1_CV_R4 = str(normres_R1_CV_R4).strip('[]')
normres_R1_CV_R4 = float(normres_R1_CV_R4)

In [None]:
#Create a dataframe with the new values and add it to the already existing dataframe
data = {'Unnamed: 0': ["optimal values from reactor 2", "optimal values from reactor 3", "optimal values from reactor 4"], 'theta0_array': [reactor2_min_opt, reactor3_min_opt, reactor4_min_opt],'norm_res': [normres_R1_CV_R2, normres_R1_CV_R3, normres_R1_CV_R4], 'optimal_array': [opt_R1_CV_R2, opt_R1_CV_R3, opt_R1_CV_R4]}  
dataframe = pd.DataFrame(data)  


reactor1_dataframe = reactor1_df_final.append(dataframe, ignore_index=True)
reactor1_dataframe = reactor1_dataframe.rename(columns={'Unnamed: 0': 'ID'})
reactor1_dataframe = reactor1_dataframe.sort_values(by=['norm_res'])
newindex_reactor1_min_normres = reactor1_dataframe['norm_res'].idxmin()
new_reactor1_min_normres = reactor1_dataframe['norm_res'][newindex_reactor1_min_normres]
new_reactor1_min_opt = reactor1_dataframe["optimal_array"][newindex_reactor1_min_normres]

reactor1_dataframe

In [None]:
reactor1_dataframe.to_csv('crossvalidation_optimal_values_reactor1.csv')

# Testing Optimal Values from Reactors 1, 3, and 4 as Theta0 for Reactor 2

In [None]:
############################### generate norm_res for the new theta0 values (optimal_values for reactors used as cross validation)  ###############################################

u_bound = [2, 9, -2, -2, -2]
l_bound = [-6, 3, -8, -8, -5]

R2_norm_res_theta0fromR1 = []
R2_optimal_values_theta0fromR1 = []

R2_norm_res_theta0fromR3 = []
R2_optimal_values_theta0fromR3 = []

R2_norm_res_theta0fromR4 = []
R2_optimal_values_theta0fromR4 = []

def norm_residuals(reactor_pressure_diff, normres_array, optval_array, theta0):
    
    res = least_squares(reactor_pressure_diff, theta0, bounds=(l_bound, u_bound))
    optimal = res.x
    optval_array.append(optimal)
    
    residuals = res.fun
    norm = np.linalg.norm(residuals)
    normres_array.append(norm)
    return optval_array, normres_array

[opt_R2_CV_R1, normres_R2_CV_R1] = norm_residuals(reactor2_pressure_diff, R2_norm_res_theta0fromR1, R2_optimal_values_theta0fromR1, new_reactor1_min_opt)
[opt_R2_CV_R3, normres_R2_CV_R3] = norm_residuals(reactor2_pressure_diff, R2_norm_res_theta0fromR3, R2_optimal_values_theta0fromR3, reactor3_min_opt)
[opt_R2_CV_R4, normres_R2_CV_R4] = norm_residuals(reactor2_pressure_diff, R2_norm_res_theta0fromR4, R2_optimal_values_theta0fromR4, reactor4_min_opt)

In [None]:
#Fix the formatting of newly generated norm_res and optimal_values to fit the dataframe already existing for least_squares tries

opt_R2_CV_R1 = np.concatenate(opt_R2_CV_R1, axis=0)
opt_R2_CV_R3 = np.concatenate(opt_R2_CV_R3, axis=0)
opt_R2_CV_R4 = np.concatenate(opt_R2_CV_R4, axis=0)

normres_R2_CV_R1 = str(normres_R2_CV_R1).strip('[]')
normres_R2_CV_R1 = float(normres_R2_CV_R1)
normres_R2_CV_R3 = str(normres_R2_CV_R3).strip('[]')
normres_R2_CV_R3 = float(normres_R2_CV_R3)
normres_R2_CV_R4 = str(normres_R2_CV_R4).strip('[]')
normres_R2_CV_R4 = float(normres_R2_CV_R4)

In [None]:
#Create a dataframe with the new values and add it to the already existing dataframe
data = {'Unnamed: 0': ["optimal values from reactor 1", "optimal values from reactor 3", "optimal values from reactor 4"], 'theta0_array': [new_reactor1_min_opt, reactor3_min_opt, reactor4_min_opt],'norm_res': [normres_R2_CV_R1, normres_R2_CV_R3, normres_R2_CV_R4], 'optimal_array': [opt_R2_CV_R1, opt_R2_CV_R3, opt_R2_CV_R4]}  
dataframe = pd.DataFrame(data)  

reactor2_dataframe = reactor2_df_final.append(dataframe, ignore_index=True)
reactor2_dataframe = reactor2_dataframe.rename(columns={'Unnamed: 0': 'ID'})
reactor2_dataframe = reactor2_dataframe.sort_values(by=['norm_res'])
newindex_reactor2_min_normres = reactor2_dataframe['norm_res'].idxmin()
new_reactor2_min_normres = reactor2_dataframe['norm_res'][newindex_reactor2_min_normres]
new_reactor2_min_opt = reactor2_dataframe["optimal_array"][newindex_reactor2_min_normres]

reactor2_dataframe

In [None]:
reactor2_dataframe.to_csv('crossvalidation_optimal_values_reactor2.csv')

# Testing Optimal Values from Reactors 1, 2, and 4 as Theta0 for Reactor 3

In [None]:
############################### generate norm_res for the new theta0 values (optimal_values for reactors used as cross validation)  ###############################################

u_bound = [2, 9, -2, -2, -2]
l_bound = [-6, 3, -8, -8, -5]

R3_norm_res_theta0fromR1 = []
R3_optimal_values_theta0fromR1 = []

R3_norm_res_theta0fromR2 = []
R3_optimal_values_theta0fromR2 = []

R3_norm_res_theta0fromR4 = []
R3_optimal_values_theta0fromR4 = []

def norm_residuals(reactor_pressure_diff, normres_array, optval_array, theta0):
    
    res = least_squares(reactor_pressure_diff, theta0, bounds=(l_bound, u_bound))
    optimal = res.x
    optval_array.append(optimal)
    
    residuals = res.fun
    norm = np.linalg.norm(residuals)
    normres_array.append(norm)
    return optval_array, normres_array

[opt_R3_CV_R1, normres_R3_CV_R1] = norm_residuals(reactor3_pressure_diff, R3_norm_res_theta0fromR1, R3_optimal_values_theta0fromR1, new_reactor1_min_opt)
[opt_R3_CV_R2, normres_R3_CV_R2] = norm_residuals(reactor3_pressure_diff, R3_norm_res_theta0fromR2, R3_optimal_values_theta0fromR2, new_reactor2_min_opt)
[opt_R3_CV_R4, normres_R3_CV_R4] = norm_residuals(reactor3_pressure_diff, R3_norm_res_theta0fromR4, R3_optimal_values_theta0fromR4, reactor4_min_opt)

In [None]:
#Fix the formatting of newly generated norm_res and optimal_values to fit the dataframe already existing for least_squares tries

opt_R3_CV_R1 = np.concatenate(opt_R3_CV_R1, axis=0)
opt_R3_CV_R2 = np.concatenate(opt_R3_CV_R2, axis=0)
opt_R3_CV_R4 = np.concatenate(opt_R3_CV_R4, axis=0)

normres_R3_CV_R1 = str(normres_R3_CV_R1).strip('[]')
normres_R3_CV_R1 = float(normres_R3_CV_R1)
normres_R3_CV_R2 = str(normres_R3_CV_R2).strip('[]')
normres_R3_CV_R2 = float(normres_R3_CV_R2)
normres_R3_CV_R4 = str(normres_R3_CV_R4).strip('[]')
normres_R3_CV_R4 = float(normres_R3_CV_R4)

In [None]:
#Create a dataframe with the new values and add it to the already existing dataframe
data = {'Unnamed: 0': ["optimal values from reactor 1", "optimal values from reactor 2", "optimal values from reactor 4"], 'theta0_array': [new_reactor1_min_opt, new_reactor2_min_opt, reactor4_min_opt],'norm_res': [normres_R3_CV_R1, normres_R3_CV_R2, normres_R3_CV_R4], 'optimal_array': [opt_R3_CV_R1, opt_R3_CV_R2, opt_R3_CV_R4]}  
dataframe = pd.DataFrame(data)  

reactor3_dataframe = reactor3_df_final.append(dataframe, ignore_index=True)
reactor3_dataframe = reactor3_dataframe.rename(columns={'Unnamed: 0': 'ID'})
reactor3_dataframe = reactor3_dataframe.sort_values(by=['norm_res'])
newindex_reactor3_min_normres = reactor3_dataframe['norm_res'].idxmin()
new_reactor3_min_normres = reactor3_dataframe['norm_res'][newindex_reactor3_min_normres]
new_reactor3_min_opt = reactor3_dataframe["optimal_array"][newindex_reactor3_min_normres]

reactor3_dataframe

In [None]:
reactor3_dataframe.to_csv('crossvalidation_optimal_values_reactor3.csv')

# Testing Optimal Values from Reactors 1, 2, and 3 as Theta0 for Reactor 4

In [None]:
############################### generate norm_res for the new theta0 values (optimal_values for reactors used as cross validation)  ###############################################

u_bound = [2, 9, -2, -2, -2]
l_bound = [-6, 3, -8, -8, -5]

R4_norm_res_theta0fromR1 = []
R4_optimal_values_theta0fromR1 = []

R4_norm_res_theta0fromR2 = []
R4_optimal_values_theta0fromR2 = []

R4_norm_res_theta0fromR3 = []
R4_optimal_values_theta0fromR3 = []

def norm_residuals(reactor_pressure_diff, normres_array, optval_array, theta0):
    
    res = least_squares(reactor_pressure_diff, theta0, bounds=(l_bound, u_bound))
    optimal = res.x
    optval_array.append(optimal)
    
    residuals = res.fun
    norm = np.linalg.norm(residuals)
    normres_array.append(norm)
    return optval_array, normres_array

[opt_R4_CV_R1, normres_R4_CV_R1] = norm_residuals(reactor4_pressure_diff, R4_norm_res_theta0fromR1, R4_optimal_values_theta0fromR1, new_reactor1_min_opt)
[opt_R4_CV_R2, normres_R4_CV_R2] = norm_residuals(reactor4_pressure_diff, R4_norm_res_theta0fromR2, R4_optimal_values_theta0fromR2, new_reactor2_min_opt)
[opt_R4_CV_R3, normres_R4_CV_R3] = norm_residuals(reactor4_pressure_diff, R4_norm_res_theta0fromR3, R4_optimal_values_theta0fromR3, new_reactor3_min_opt)

In [None]:
#Fix the formatting of newly generated norm_res and optimal_values to fit the dataframe already existing for least_squares tries

opt_R4_CV_R1 = np.concatenate(opt_R4_CV_R1, axis=0)
opt_R4_CV_R2 = np.concatenate(opt_R4_CV_R2, axis=0)
opt_R4_CV_R3 = np.concatenate(opt_R4_CV_R3, axis=0)

normres_R4_CV_R1 = str(normres_R4_CV_R1).strip('[]')
normres_R4_CV_R1 = float(normres_R4_CV_R1)
normres_R4_CV_R2 = str(normres_R4_CV_R2).strip('[]')
normres_R4_CV_R2 = float(normres_R4_CV_R2)
normres_R4_CV_R3 = str(normres_R4_CV_R3).strip('[]')
normres_R4_CV_R3 = float(normres_R4_CV_R3)

In [None]:
#Create a dataframe with the new values and add it to the already existing dataframe
data = {'Unnamed: 0': ["optimal values from reactor 1", "optimal values from reactor 2", "optimal values from reactor 3"], 'theta0_array': [new_reactor1_min_opt, new_reactor2_min_opt, new_reactor3_min_opt],'norm_res': [normres_R4_CV_R1, normres_R4_CV_R2, normres_R4_CV_R3], 'optimal_array': [opt_R4_CV_R1, opt_R4_CV_R2, opt_R4_CV_R3]}  
dataframe = pd.DataFrame(data)  

reactor4_dataframe = reactor4_df_final.append(dataframe, ignore_index=True)
reactor4_dataframe = reactor4_dataframe.rename(columns={'Unnamed: 0': 'ID'})
reactor4_dataframe = reactor4_dataframe.sort_values(by=['norm_res'])
newindex_reactor4_min_normres = reactor4_dataframe['norm_res'].idxmin()
new_reactor4_min_normres = reactor4_dataframe['norm_res'][newindex_reactor4_min_normres]
new_reactor4_min_opt = reactor4_dataframe["optimal_array"][newindex_reactor4_min_normres]

reactor4_dataframe

In [None]:
reactor4_dataframe.to_csv('crossvalidation_optimal_values_reactor4.csv')

In [None]:
# With all datafiles already computed it is possible to start from here to compute only the cross validation and plots

In [None]:
############################### Read in the optimal values and theta0 generated previously for each reactor  ###############################################

reactor1_df_new = pd.read_csv("C:\\Users\\hronn\\Desktop\\Jupyter\\cross_validation\\crossvalidation_optimal_values_reactor1.csv", sep=",")


reactor2_df_new = pd.read_csv("C:\\Users\\hronn\\Desktop\\Jupyter\\cross_validation\\crossvalidation_optimal_values_reactor2.csv", sep=",")



reactor3_df_new = pd.read_csv("C:\\Users\\hronn\\Desktop\\Jupyter\\cross_validation\\crossvalidation_optimal_values_reactor3.csv", sep=",")


reactor4_df_new = pd.read_csv("C:\\Users\\hronn\\Desktop\\Jupyter\\cross_validation\\crossvalidation_optimal_values_reactor4.csv", sep=",")


In [None]:
# Fix the optimal_values so they can be used for further steps - convert values to a list of floats

reactor1_optimal_array = []
reactor1_optimal_list = []
reactor1_theta0_array = []

reactor2_optimal_array = []
reactor2_optimal_list = []
reactor2_theta0_array = []

reactor3_optimal_array = []
reactor3_optimal_list = []
reactor3_theta0_array = []

reactor4_optimal_array = []
reactor4_optimal_list = []
reactor4_theta0_array = []


def format_dataframe(optimal_values, optimal_list, optimal_array):
    
    #Format optimal_values to numpy array of floats
    for i in range(len(optimal_values)):
        optimal = ' '.join(optimal_values[i].split())
        optimal = optimal.replace(" ", ", ").strip('][').split(', ')
        optimal_list.append(optimal)
    for i in range(len(optimal_list)):
        if "" in optimal_list[i]:
            optimal_list[i].remove('')
        else:
            continue
    for i in range(len(optimal_list)):
        if "" in optimal_list[i]:
            optimal_list[i].remove('')
        else:
            continue
    for i in range(len(optimal_list)):
        float_optimal = np.array(optimal_list[i],dtype=float)
        optimal_array.append(float_optimal)
    return optimal_array


reactor1_opt_array = format_dataframe(new_reactor1_df_final["optimal_array"], reactor1_optimal_list, reactor1_optimal_array)
reactor2_opt_array = format_dataframe(new_reactor2_df_final["optimal_array"], reactor2_optimal_list, reactor2_optimal_array)
reactor3_opt_array = format_dataframe(new_reactor3_df_final["optimal_array"], reactor3_optimal_list, reactor3_optimal_array)
reactor4_opt_array = format_dataframe(new_reactor4_df_final["optimal_array"], reactor4_optimal_list, reactor4_optimal_array)

def add_arrays_to_dataframe(reactor_df_new, reactor_opt_array):
    reactor_df_new["optimal_array"] = reactor_opt_array

    return reactor_df_new


reactor1_df_new = add_arrays_to_dataframe(reactor1_df_new, reactor1_opt_array)
reactor2_df_new = add_arrays_to_dataframe(reactor2_df_new, reactor2_opt_array)
reactor3_df_new = add_arrays_to_dataframe(reactor3_df_new, reactor3_opt_array)
reactor4_df_new = add_arrays_to_dataframe(reactor4_df_new, reactor4_opt_array)

############################### generate final sorted dataframes and find the optimal_values that have the lowest norm_res  ###############################################

new_reactor1_df_final = reactor1_df_new.sort_values(by=['norm_res'])
new_reactor1_min_idnormres = new_reactor1_df_final['norm_res'].idxmin()
new_reactor1_min_normres= new_reactor1_df_final["norm_res"][new_reactor1_min_idnormres]
new_reactor1_min_opt = new_reactor1_df_final["optimal_array"][new_reactor1_min_idnormres]

new_reactor2_df_final = reactor2_df_new.sort_values(by=['norm_res'])
new_reactor2_min_idnormres = new_reactor2_df_final['norm_res'].idxmin()
new_reactor2_min_normres= new_reactor2_df_final["norm_res"][new_reactor2_min_idnormres]
new_reactor2_min_opt = new_reactor2_df_final["optimal_array"][new_reactor2_min_idnormres]

new_reactor3_df_final = reactor3_df_new.sort_values(by=['norm_res'])
new_reactor3_min_idnormres = new_reactor3_df_final['norm_res'].idxmin()
new_reactor3_min_normres= new_reactor3_df_final["norm_res"][new_reactor3_min_idnormres]
new_reactor3_min_opt = new_reactor3_df_final["optimal_array"][new_reactor3_min_idnormres]

new_reactor4_df_final = reactor4_df_new.sort_values(by=['norm_res'])
new_reactor4_min_idnormres = new_reactor4_df_final['norm_res'].idxmin()
new_reactor4_min_normres= new_reactor4_df_final["norm_res"][new_reactor4_min_idnormres]
new_reactor4_min_opt = new_reactor4_df_final["optimal_array"][new_reactor4_min_idnormres]

In [None]:
new_reactor4_df_final = reactor4_df_new.sort_values(by=['norm_res'])

# Reactor 1 - Cross Validation

In [None]:
############################### Take the average of the best optimal values for reactors 2,3, and 4  ###############################################
opt_R234 = [new_reactor2_min_opt, new_reactor3_min_opt, new_reactor4_min_opt]

biomass_r1 = []
CO2_uptake_r1 = []
H2_half_velocity_r1 = []
CO2_half_velocity_r1 = []
decay_biomass_r1 = []

def average_opt(array_list):
    for i in array_list:
        biomass_r1.append(i[0])
        CO2_uptake_r1.append(i[1])
        H2_half_velocity_r1.append(i[2])
        CO2_half_velocity_r1.append(i[3])
        decay_biomass_r1.append(i[4])
    return biomass_r1, CO2_uptake_r1, H2_half_velocity_r1, CO2_half_velocity_r1, decay_biomass_r1
        
    
        
[biomass_r1, CO2_uptake_r1, H2_half_velocity_r1, CO2_half_velocity_r1, decay_biomass_r1] = average_opt(opt_R234)

biomass_mean_r1 = st.mean(biomass_r1)
CO2_uptake_mean_r1 = st.mean(CO2_uptake_r1)
H2_half_velocity_mean_r1 = st.mean(H2_half_velocity_r1)
CO2_half_velocity_mean_r1 = st.mean(CO2_half_velocity_r1)
decay_biomass_mean_r1 = st.mean(decay_biomass_r1)

optmean_R234 = [biomass_mean_r1, CO2_uptake_mean_r1, H2_half_velocity_mean_r1, CO2_half_velocity_mean_r1, decay_biomass_mean_r1]

In [None]:
optmean_R234

# Reactor 2 - Cross Validation

In [None]:
############################### Take the average of the best optimal values for reactors 1,3, and 4  ###############################################
opt_R134 = [new_reactor1_min_opt, new_reactor3_min_opt, new_reactor4_min_opt]

biomass_r2 = []
CO2_uptake_r2 = []
H2_half_velocity_r2 = []
CO2_half_velocity_r2 = []
decay_biomass_r2 = []

def average_opt(array_list):
    for i in array_list:
        biomass_r2.append(i[0])
        CO2_uptake_r2.append(i[1])
        H2_half_velocity_r2.append(i[2])
        CO2_half_velocity_r2.append(i[3])
        decay_biomass_r2.append(i[4])
    return biomass_r2, CO2_uptake_r2, H2_half_velocity_r2, CO2_half_velocity_r2, decay_biomass_r2
        
    
        
[biomass_r2, CO2_uptake_r2, H2_half_velocity_r2, CO2_half_velocity_r2, decay_biomass_r2] = average_opt(opt_R134)

biomass_mean_r2 = st.mean(biomass_r2)
CO2_uptake_mean_r2 = st.mean(CO2_uptake_r2)
H2_half_velocity_mean_r2 = st.mean(H2_half_velocity_r2)
CO2_half_velocity_mean_r2 = st.mean(CO2_half_velocity_r2)
decay_biomass_mean_r2 = st.mean(decay_biomass_r2)

optmean_R134 = [biomass_mean_r2, CO2_uptake_mean_r2, H2_half_velocity_mean_r2, CO2_half_velocity_mean_r2, decay_biomass_mean_r2]

In [None]:
optmean_R134

# Reactor 3 - Cross Validation

In [None]:
############################### Take the average of the best optimal values for reactors 1,2, and 4  ###############################################
opt_R124 = [new_reactor1_min_opt, new_reactor2_min_opt, new_reactor4_min_opt]

biomass_r3 = []
CO2_uptake_r3 = []
H2_half_velocity_r3 = []
CO2_half_velocity_r3 = []
decay_biomass_r3 = []

def average_opt(array_list):
    for i in array_list:
        biomass_r3.append(i[0])
        CO2_uptake_r3.append(i[1])
        H2_half_velocity_r3.append(i[2])
        CO2_half_velocity_r3.append(i[3])
        decay_biomass_r3.append(i[4])
    return biomass_r3, CO2_uptake_r3, H2_half_velocity_r3, CO2_half_velocity_r3, decay_biomass_r3
        
    
        
[biomass_r3, CO2_uptake_r3, H2_half_velocity_r3, CO2_half_velocity_r3, decay_biomass_r3] = average_opt(opt_R124)

biomass_mean_r3 = st.mean(biomass_r3)
CO2_uptake_mean_r3 = st.mean(CO2_uptake_r3)
H2_half_velocity_mean_r3 = st.mean(H2_half_velocity_r3)
CO2_half_velocity_mean_r3 = st.mean(CO2_half_velocity_r3)
decay_biomass_mean_r3 = st.mean(decay_biomass_r3)

optmean_R124 = [biomass_mean_r3, CO2_uptake_mean_r3, H2_half_velocity_mean_r3, CO2_half_velocity_mean_r3, decay_biomass_mean_r3]

In [None]:
optmean_R124

# Reactor 4 - Cross Validation

In [None]:
############################### Take the average of the best optimal values for reactors 1,2, and 3  ###############################################
opt_R123 = [new_reactor1_min_opt, new_reactor2_min_opt, new_reactor3_min_opt]

biomass_r4 = []
CO2_uptake_r4 = []
H2_half_velocity_r4 = []
CO2_half_velocity_r4 = []
decay_biomass_r4 = []

def average_opt(array_list):
    for i in array_list:
        biomass_r4.append(i[0])
        CO2_uptake_r4.append(i[1])
        H2_half_velocity_r4.append(i[2])
        CO2_half_velocity_r4.append(i[3])
        decay_biomass_r4.append(i[4])
    return biomass_r4, CO2_uptake_r4, H2_half_velocity_r4, CO2_half_velocity_r4, decay_biomass_r4
        
    
        
[biomass_r4, CO2_uptake_r4, H2_half_velocity_r4, CO2_half_velocity_r4, decay_biomass_r4] = average_opt(opt_R123)

biomass_mean_r4 = st.mean(biomass_r4)
CO2_uptake_mean_r4 = st.mean(CO2_uptake_r4)
H2_half_velocity_mean_r4 = st.mean(H2_half_velocity_r4)
CO2_half_velocity_mean_r4 = st.mean(CO2_half_velocity_r4)
decay_biomass_mean_r4 = st.mean(decay_biomass_r4)

optmean_R123 = [biomass_mean_r4, CO2_uptake_mean_r4, H2_half_velocity_mean_r4, CO2_half_velocity_mean_r4, decay_biomass_mean_r4]

In [None]:
optmean_R123

# Plotting the Cross Validation

In [None]:
# Cross-validation values for each parameter of each reactor
biomass_mean_R1 = 10**optmean_R234[0]
CO2_uptake_mean_R1 = 10**optmean_R234[1]
H2_half_velocity_mean_R1 = 10**optmean_R234[2]
CO2_half_velocity_mean_R1 = 10**optmean_R234[3]
decay_biomass_mean_R1 = 10**optmean_R234[4]

biomass_mean_R2 = 10**optmean_R134[0]
CO2_uptake_mean_R2 = 10**optmean_R134[1]
H2_half_velocity_mean_R2 = 10**optmean_R134[2]
CO2_half_velocity_mean_R2 = 10**optmean_R134[3]
decay_biomass_mean_R2 = 10**optmean_R134[4]

biomass_mean_R3 = 10**optmean_R124[0]
CO2_uptake_mean_R3 = 10**optmean_R124[1]
H2_half_velocity_mean_R3 = 10**optmean_R124[2]
CO2_half_velocity_mean_R3 = 10**optmean_R124[3]
decay_biomass_mean_R3 = 10**optmean_R124[4]

biomass_mean_R4 = 10**optmean_R123[0]
CO2_uptake_mean_R4 = 10**optmean_R123[1]
H2_half_velocity_mean_R4 = 10**optmean_R132[2]
CO2_half_velocity_mean_R4 = 10**optmean_R123[3]
decay_biomass_mean_R4 = 10**optmean_R123[4]

In [None]:
# Best fit for parameters of interest for each reactor
biomass_org_R1 = 10**new_reactor1_min_opt[0]
CO2_uptake_org_R1 = 10**new_reactor1_min_opt[1]
H2_half_velocity_org_R1 = 10**new_reactor1_min_opt[2]
CO2_half_velocity_org_R1 = 10**new_reactor1_min_opt[3]
decay_biomass_org_R1 = 10**new_reactor1_min_opt[4]

biomass_org_R2 = 10**new_reactor2_min_opt[0]
CO2_uptake_org_R2 = 10**new_reactor2_min_opt[1]
H2_half_velocity_org_R2 = 10**new_reactor4_min_opt[2]
CO2_half_velocity_org_R2 = 10**new_reactor2_min_opt[3]
decay_biomass_org_R2 = 10**new_reactor2_min_opt[4]

biomass_org_R3 = 10**new_reactor3_min_opt[0]
CO2_uptake_org_R3 = 10**new_reactor3_min_opt[1]
H2_half_velocity_org_R3 = 10**new_reactor3_min_opt[2]
CO2_half_velocity_org_R3 = 10**new_reactor3_min_opt[3]
decay_biomass_org_R3 = 10**new_reactor3_min_opt[4]

biomass_org_R4 = 10**new_reactor4_min_opt[0]
CO2_uptake_org_R4 = 10**new_reactor4_min_opt[1]
H2_half_velocity_org_R4 = 10**new_reactor4_min_opt[2]
CO2_half_velocity_org_R4 = 10**new_reactor4_min_opt[3]
decay_biomass_org_R4 = 10**new_reactor4_min_opt[4]

In [None]:
# log scale values for average optimal value calculation

logbiomass_mean_R1 = optmean_R234[0]
logCO2_uptake_mean_R1 = optmean_R234[1]
logH2_half_velocity_mean_R1 = optmean_R234[2]
logCO2_half_velocity_mean_R1 = optmean_R234[3]
logdecay_biomass_mean_R1 = optmean_R234[4]

logbiomass_mean_R2 = optmean_R134[0]
logCO2_uptake_mean_R2 = optmean_R134[1]
logH2_half_velocity_mean_R2 = optmean_R134[2]
logCO2_half_velocity_mean_R2 = optmean_R134[3]
logdecay_biomass_mean_R2 = optmean_R134[4]

logbiomass_mean_R3 = optmean_R124[0]
logCO2_uptake_mean_R3 = optmean_R124[1]
logH2_half_velocity_mean_R3 = optmean_R124[2]
logCO2_half_velocity_mean_R3 = optmean_R124[3]
logdecay_biomass_mean_R3 = optmean_R124[4]

logbiomass_mean_R4 = optmean_R123[0]
logCO2_uptake_mean_R4 = optmean_R123[1]
logH2_half_velocity_mean_R4 = optmean_R123[2]
logCO2_half_velocity_mean_R4 = optmean_R123[3]
logdecay_biomass_mean_R4 = optmean_R123[4]

In [None]:
# Generate the average values from cross validated values of all reactors => to get the best fit for the parameters of interest

biomass_bestfitarray = [logbiomass_mean_R1, logbiomass_mean_R2, logbiomass_mean_R3, logbiomass_mean_R4]
biomass_logbestfit = sum(biomass_bestfitarray)/len(biomass_bestfitarray)


CO2_uptake_bestfitarray = [logCO2_uptake_mean_R1, logCO2_uptake_mean_R2, logCO2_uptake_mean_R3, logCO2_uptake_mean_R4] 
CO2_uptake_logbestfit = sum(CO2_uptake_bestfitarray)/len(CO2_uptake_bestfitarray)


H2_half_velocity_bestfitarray = [logH2_half_velocity_mean_R1, logH2_half_velocity_mean_R2, logH2_half_velocity_mean_R3, logH2_half_velocity_mean_R4]
H2_half_velocity_logbestfit = sum(H2_half_velocity_bestfitarray)/len(H2_half_velocity_bestfitarray)


CO2_half_velocity_bestfitarray = [logCO2_half_velocity_mean_R1, logCO2_half_velocity_mean_R2, logCO2_half_velocity_mean_R3, logCO2_half_velocity_mean_R4]
CO2_half_velocity_logbestfit = sum(CO2_half_velocity_bestfitarray)/len(CO2_half_velocity_bestfitarray)


decay_biomass_bestfitarray = [logdecay_biomass_mean_R1, logdecay_biomass_mean_R2, logdecay_biomass_mean_R3, logdecay_biomass_mean_R4]
decay_biomass_logbestfit = sum(decay_biomass_bestfitarray)/len(decay_biomass_bestfitarray)


theta_best_fit_log = [biomass_logbestfit, CO2_uptake_logbestfit, H2_half_velocity_logbestfit, CO2_half_velocity_logbestfit, decay_biomass_logbestfit]


print("Best optimal value found for parameters of interest (log-scale): ", theta_best_fit_log)

In [None]:
#Plot biomass
barWidth = 0.25
data = [[biomass_org_R1, biomass_org_R2, biomass_org_R3, biomass_org_R4],
[biomass_mean_R1, biomass_mean_R2, biomass_mean_R3, biomass_mean_R4]]

r1 = np.arange(len(data[0]))
r2 = [x + barWidth for x in r1]
fig = plt.figure()

plt.bar(r1, data[0], width = barWidth, color = 'brown')
plt.bar(r2, data[1], width = barWidth, color = 'darkseagreen')

plt.xticks([r + barWidth for r in range(len(data[0]))], ["Reactor 1", "Reactor 2", "Reactor 3", "Reactor 4"])
plt.ylabel('Biomass [Kg/m3]')
colors = {'Best Fit':'brown', 'Cross Validation':'darkseagreen'}         
labels = list(colors.keys())
handles = [plt.Rectangle((0,0),1,1, color=colors[label]) for label in labels]

plt.legend(handles, labels)

plt.yscale('log')
plt.ylim([1e-7, 1e3])

plt.axhline(10**biomass_logbestfit, linestyle='dotted', color='red', linewidth=2)
# Show graphic
plt.show()


######################################################


#Plot CO2_uptake
barWidth = 0.25
data = [[CO2_uptake_org_R1, CO2_uptake_org_R2, CO2_uptake_org_R3, CO2_uptake_org_R4],
[CO2_uptake_mean_R1, CO2_uptake_mean_R2, CO2_uptake_mean_R3, CO2_uptake_mean_R4]]

r1 = np.arange(len(data[0]))
r2 = [x + barWidth for x in r1]
fig = plt.figure()

plt.bar(r1, data[0], width = barWidth, color = 'brown')
plt.bar(r2, data[1], width = barWidth, color = 'darkseagreen')

plt.xticks([r + barWidth for r in range(len(data[0]))], ["Reactor 1", "Reactor 2", "Reactor 3", "Reactor 4"])
plt.ylabel('CO2 Uptake Constant [1/KgBM h]')
colors = {'Best Fit':'brown', 'Cross Validation':'darkseagreen'}         
labels = list(colors.keys())
handles = [plt.Rectangle((0,0),1,1, color=colors[label]) for label in labels]

plt.legend(handles, labels)

plt.yscale('log')
plt.ylim([1e0, 1e8])

plt.axhline(10**CO2_uptake_logbestfit, linestyle='dotted', color='red', linewidth=2)
# Show graphic
plt.show()

#########################################################

#Plot H2_half_velocity
barWidth = 0.25
data = [[H2_half_velocity_org_R1, H2_half_velocity_org_R2, H2_half_velocity_org_R3, H2_half_velocity_org_R4],
[H2_half_velocity_mean_R1, H2_half_velocity_mean_R2, H2_half_velocity_mean_R3, H2_half_velocity_mean_R4]]

r1 = np.arange(len(data[0]))
r2 = [x + barWidth for x in r1]
fig = plt.figure()

plt.bar(r1, data[0], width = barWidth, color = 'brown')
plt.bar(r2, data[1], width = barWidth, color = 'darkseagreen')

plt.xticks([r + barWidth for r in range(len(data[0]))], ["Reactor 1", "Reactor 2", "Reactor 3", "Reactor 4"])
plt.ylabel('H2 Half Velocity')
colors = {'Best Fit':'brown', 'Cross Validation':'darkseagreen'}         
labels = list(colors.keys())
handles = [plt.Rectangle((0,0),1,1, color=colors[label]) for label in labels]
plt.legend(handles, labels)

plt.yscale('log')
plt.ylim([1e-13, 1e-3])

plt.axhline(10**H2_half_velocity_logbestfit, linestyle='dotted', color='red', linewidth=2)
# Show graphic
plt.show()

#########################################################

#Plot CO2_half_velocity
barWidth = 0.25
data = [[CO2_half_velocity_org_R1, CO2_half_velocity_org_R2, CO2_half_velocity_org_R3, CO2_half_velocity_org_R4],
[CO2_half_velocity_mean_R1, H2_half_velocity_mean_R2, H2_half_velocity_mean_R3, CO2_half_velocity_mean_R4]]

r1 = np.arange(len(data[0]))
r2 = [x + barWidth for x in r1]
fig = plt.figure()

plt.bar(r1, data[0], width = barWidth, color = 'brown')
plt.bar(r2, data[1], width = barWidth, color = 'darkseagreen')

plt.xticks([r + barWidth for r in range(len(data[0]))], ["Reactor 1", "Reactor 2", "Reactor 3", "Reactor 4"])
plt.ylabel('CO2 Half Velocity')
colors = {'Best Fit':'brown', 'Cross Validation':'darkseagreen'}         
labels = list(colors.keys())
handles = [plt.Rectangle((0,0),1,1, color=colors[label]) for label in labels]
plt.legend(handles, labels)

plt.yscale('log')
plt.ylim([1e-12, 1e-2])

plt.axhline(10**CO2_half_velocity_logbestfit, linestyle='dotted', color='red', linewidth=2)
# Show graphic
plt.show()

#########################################################

#Plot decay_biomass
barWidth = 0.25
data = [[decay_biomass_org_R1, decay_biomass_org_R2, decay_biomass_org_R3, decay_biomass_org_R4],
[decay_biomass_mean_R1, decay_biomass_mean_R2, decay_biomass_mean_R3, decay_biomass_mean_R4]]

r1 = np.arange(len(data[0]))
r2 = [x + barWidth for x in r1]
fig = plt.figure()

plt.bar(r1, data[0], width = barWidth, color = 'brown')
plt.bar(r2, data[1], width = barWidth, color = 'darkseagreen')

plt.xticks([r + barWidth for r in range(len(data[0]))], ["Reactor 1", "Reactor 2", "Reactor 3", "Reactor 4"])
plt.ylabel('Biomass Decay [1/h]')
colors = {'Best Fit':'brown', 'Cross Validation':'darkseagreen'}         
labels = list(colors.keys())
handles = [plt.Rectangle((0,0),1,1, color=colors[label]) for label in labels]

plt.legend(handles, labels)

plt.yscale('log')
plt.ylim([1e-10, 1e0])

plt.axhline(10**decay_biomass_logbestfit, linestyle='dotted', color='red', linewidth=2)
# Show graphic
plt.show()

# Norm of residuals for a fixed theta0 (average optimal values from reactors)

In [None]:

# Reactor 1 - fixed cross validation
residuals_R1_CV_fixed = reactor1_pressure_diff(optmean_R234)
norm_residuals_R1_CV_fixed = np.linalg.norm(residuals_R1_CV_fixed)


# Reactor 2 - fixed cross validation
residuals_R2_CV_fixed = reactor2_pressure_diff(optmean_R134)
norm_residuals_R2_CV_fixed = np.linalg.norm(residuals_R2_CV_fixed)

# Reactor 3 - fixed cross validation
residuals_R3_CV_fixed = reactor3_pressure_diff(optmean_R124)
norm_residuals_R3_CV_fixed = np.linalg.norm(residuals_R3_CV_fixed)

# Reactor 4 - fixed cross validation
residuals_R4_CV_fixed = reactor4_pressure_diff(optmean_R123)
norm_residuals_R4_CV_fixed = np.linalg.norm(residuals_R4_CV_fixed)



In [None]:
# Create a dataframe to compare the values 
data = {'Best_fit_normres':[new_reactor1_min_normres, new_reactor2_min_normres, new_reactor3_min_normres, new_reactor4_min_normres], 
        'Cross_validation_normres':[norm_residuals_R1_CV_fixed, norm_residuals_R2_CV_fixed, norm_residuals_R3_CV_fixed, norm_residuals_R4_CV_fixed]}  
  
df = pd.DataFrame(data, index =['Reactor 1', 'Reactor 2', 'Reactor 3', 'Reactor 4'])  
df

In [None]:
# Plot the norm of residuals for all reactors against the best fit for each reactor

# All reactors on 1 plot
barWidth = 0.25
data = [[new_reactor1_min_normres, new_reactor2_min_normres, new_reactor3_min_normres, new_reactor4_min_normres],
[norm_residuals_R1_CV_fixed, norm_residuals_R2_CV_fixed, norm_residuals_R3_CV_fixed, norm_residuals_R4_CV_fixed]]

r1 = np.arange(len(data[0]))
r2 = [x + barWidth for x in r1]
fig = plt.figure()

plt.bar(r1, data[0], width = barWidth, color="brown")
plt.bar(r2, data[1], width = barWidth, color="darkseagreen")

plt.xticks([r + barWidth for r in range(len(data[0]))], ["Reactor 1", "Reactor 2", "Reactor 3", "Reactor 4"])

colors = {'Best Fit':'brown', 'Cross Validation':'darkseagreen'}         
labels = list(colors.keys())
handles = [plt.Rectangle((0,0),1,1, color=colors[label]) for label in labels]
plt.legend(handles, labels)

plt.ylabel('Norm of Residuals')
plt.title("Norm of Residuals for all reactors")

plt.yscale('log')
plt.ylim([1e0, 1e10])

plt.show()