In [None]:
import scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import minimize

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error
import time
import warnings

In [None]:
# training data set. Contains data for all three gases (CO2, Helium, Air)
training = pd.read_csv('training.csv')

In [None]:
# test data set. Contains data for all three gases (CO2, Helium, Air)
test = pd.read_csv('test.csv')

In [None]:
def sse(par, data):
    x = data['Time']
    z = data['Pressure']
    y = data['Speed']
    w = data['Part_size_original']
    b = par[0] + par[5]*z
    c = par[1] + par[6]*z
    d = par[2] + par[7]*z
    e = par[3] + par[8]*z
    f = par[4] + par[9]*z
    if np.any(e < 1e-10):
        return float('inf')
    
    y_pred = c + ((d - c + f*x)/(1+np.exp(b*(np.log(x)-np.log(e)))))
    s_se = np.sum((y - y_pred)**2)
    return(s_se)

def extended_bc5(test_, coef):
    x = test_['Time']
    z = test_['Pressure']
    w = test_['Part_size_original']
    b = coef[0] + coef[5]*z
    c = coef[1] + coef[6]*z
    d = coef[2] + coef[7]*z
    e = coef[3] + coef[8]*z
    f = coef[4] + coef[9]*z
    y_ = c + ((d - c + f*x)/(1+np.exp(b*(np.log(x)-np.log(e)))))
    return(y_)


In [None]:
def sse_nl(par, data):
    x = data['Time']
    z = data['Pressure']
    y = data['Speed']
    w = data['Part_size_original']
    b = par[0] + par[5]*z + par[10]*(z**2)
    c = par[1] + par[6]*z + par[11]*(z**2)
    d = par[2] + par[7]*z + par[12]*(z**2)
    e = par[3] + par[8]*z + par[13]*(z**2)
    f = par[4] + par[9]*z + par[14]*(z**2)
    y_pred = c + ((d - c + f*x)/(1+np.exp(b*(np.log(x)-np.log(e)))))
    s_se = np.sum((y - y_pred)**2)
    return(s_se)

def extended_bc5_nl(test_, coef):
    x = test_['Time']
    z = test_['Pressure']
    w = test_['Part_size_original']
    b = coef[0] + coef[5]*z + coef[10]*(z**2)
    c = coef[1] + coef[6]*z + coef[11]*(z**2)
    d = coef[2] + coef[7]*z + coef[12]*(z**2)
    e = coef[3] + coef[8]*z + coef[13]*(z**2)
    f = coef[4] + coef[9]*z + coef[14]*(z**2)
    y_ = c + ((d - c + f*x)/(1+np.exp(b*(np.log(x)-np.log(e)))))
    return(y_)

In [None]:
## CO2
# predictors: pressure, time
print("---Building model using extended BC.5 for CO2---")
init_coeff = [6.4790, 17.8232, 170.9843, 19.9785, -9.9553, 1, 1, 1, 1, 1]
# init_coeff = [6.4790, 17.8232, 170.9843, 19.9785, -9.9553, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] # in non-linear case

start_time = time.time()
# to ignore RuntimeWarning
warnings.simplefilter("ignore", RuntimeWarning)

# change to sse_nl in case of non-linear functions in the implementation. Change the optimization method = "abc" as needed
coeff_opt = minimize(sse, init_coeff, args = (training[training["Gas"]=="CO2"],), method='BFGS', options = {'maxiter' : 10000})
end_time = time.time()

print(f"Time taken by function: {end_time - start_time} seconds")

# change to extended_bc5_nl(...) in case of non-linear functions in the implementation
y_pred = extended_bc5(test[test["Gas"]=="CO2"], coeff_opt.x)

print("---Results---")
rmse_val = np.sqrt(mean_squared_error(test[test["Gas"]=="CO2"]["Speed"], y_pred))
mae_val = mean_absolute_error(test[test["Gas"]=="CO2"]["Speed"], y_pred)
print(f"Custom BC.5 model, RMSE: {rmse_val}")
print(f"And its Mean Absolute error: {mae_val}")

In [None]:
## Helium
# predictors: pressure, time
print("---Building model using extended BC.5 for Helium---")
init_coeff = [3.8302, 41.9722, 415.3618, 3.1784, -67.4586, 1, 1, 1, 1, 1]
# init_coeff = [3.8302, 41.9722, 415.3618, 3.1784, -67.4586, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] # in non-linear case

start_time = time.time()
# to ignore RuntimeWarning
warnings.simplefilter("ignore", RuntimeWarning)

# change to sse_nl in case of non-linear functions in the implementation. Change the optimization method = "abc" as needed
coeff_opt = minimize(sse, init_coeff, args = (training[training["Gas"]=="Helium"],), method='BFGS', options = {'maxiter' : 10000})
end_time = time.time()

print(f"Time taken by function: {end_time - start_time} seconds")

# change to extended_bc5_nl(...) in case of non-linear functions in the implementation
y_pred = extended_bc5(test[test["Gas"]=="Helium"], coeff_opt.x)

print("---Results---")
rmse_val = np.sqrt(mean_squared_error(test[test["Gas"]=="Helium"]["Speed"], y_pred))
mae_val = mean_absolute_error(test[test["Gas"]=="Helium"]["Speed"], y_pred)
print(f"Custom BC.5 model, RMSE: {rmse_val}")
print(f"And its Mean Absolute error: {mae_val}")

In [None]:
## Luft
# predictors: pressure, time
print("---Building model using extended BC.5 for Air (Luft)---")
init_coeff = [2.5525, 27.7128, 249.7527, 18.0972, -16.2367, 1, 1, 1, 1, 1]
# init_coeff = [2.5525, 27.7128, 249.7527, 18.0972, -16.2367, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] # in non-linear case

start_time = time.time()
# to ignore RuntimeWarning
warnings.simplefilter("ignore", RuntimeWarning)

# change to sse_nl in case of non-linear functions in the implementation. Change the optimization method = "abc" as needed
coeff_opt = minimize(sse, init_coeff, args = (training[training["Gas"]=="Luft"],), method='BFGS', options = {'maxiter' : 10000})
end_time = time.time()
print(f"Time taken by function: {end_time - start_time} seconds")

# change to extended_bc5_nl(...) in case of non-linear functions in the implementation
y_pred = extended_bc5(test[test["Gas"]=="Luft"], coeff_opt.x)

print("---Results---")
rmse_val = np.sqrt(mean_squared_error(test[test["Gas"]=="Luft"]["Speed"], y_pred))
mae_val = mean_absolute_error(test[test["Gas"]=="Luft"]["Speed"], y_pred)
print(f"Custom BC.5 model, RMSE: {rmse_val}")
print(f"And its Mean Absolute error: {mae_val}")