In [1]:
import numpy as np
import pandas as pd

import math
import scipy
from scipy.optimize import minimize

import tqdm as tq
from tqdm.notebook import tqdm

for pkg in [np, pd, scipy, tq]:
    print(pkg.__name__, pkg.__version__)

numpy 1.20.3
pandas 1.2.4
scipy 1.5.3
tqdm 4.61.0


In [2]:
# Where to save
curpathout = '.'
# Where to get input data
df = pd.read_csv('{}/Unfolding.csv'.format(curpathout))
df['Temperature'] = df['Temperature'].astype(int).astype('category')

In [None]:
PARAMETERS = [
    # name, λ, starting, bounds, scaling
    ('aN',   0,      7,       (0, None),      1/10),
    ('bN',   0,      1,       (0.0, None),  1/1000),
    ('aD',   0,      2,       (0, None),      1/10),
    ('bD',   0,      1,       (0.0, None),  1/1000),
    ('m',    0,      15,      (0, None),       100),
    ('D50',  0,      19,      (0, 100),          1),
]

# parameter_names
parameter_names = [n for n, _, _, _, _ in PARAMETERS]

# regularization coefficients
λ = np.array([l for _, l, _, _, _ in PARAMETERS])

# starting parameters
θ0 = [v for _, _, v, _, _ in PARAMETERS]

# parameter boundaries
bounds = [b for _, _, _, b, _ in PARAMETERS]

# parameter boundaries
scaling = [s for _, _, _, _, s in PARAMETERS]


def model(x, temp, params):
    aN, bN, aD, bD, m, D50 = params
    
    k = m * scaling[4] / (8.314 * temp)
    
    return (
        (aN * scaling[0] + bN * scaling[1] * x) + 
        (aD * scaling[2] + bD * scaling[3] * x) * 
        np.exp(k * (x-D50 * scaling[5])) /
        (1 + np.exp(k * (x-D50 * scaling[5])))
    )

def cost_function(params, x, y, temp):
    least_squares_cost = np.sum( (y - model(x, temp, params)) ** 2)
    regularization_cost = np.sum( (λ * params) ** 2)
    
    return least_squares_cost + regularization_cost


fit_parameters = []
for (solvent, enzyme, temperature), dfg in tqdm(df.groupby(['Solvent', 'Enzyme', 'Temperature'])):
    x = dfg['Concentration']
    y = dfg['FU']
    temp = temperature + 273
    
    θ = minimize(
        cost_function,
        θ0,                # starting values
        args=(x, y, temp), # data points
        bounds=bounds,     # boundaries for values
    )
    
    fit_parameters.append((solvent, enzyme, temperature, *θ.x))
    
df_fits = pd.DataFrame.from_records(fit_parameters, columns=('Solvent', 'Enzyme', 'Temperature', *parameter_names))
df_fits.set_index(['Solvent', 'Enzyme', 'Temperature'], inplace=True)

# Convert scaled values back to actual values and modify scaling variable accordingly
for s, column in zip(scaling, df_fits.columns):
    df_fits[column] = df_fits[column]*s

# Save out fitted variables
df_fits.to_csv(f'{curpathout}/Unfolding_params.csv', index=True)

  0%|          | 0/234 [00:00<?, ?it/s]