In [1]:
import numpy as np
import pandas as pd

import math
import scipy
from scipy.optimize import minimize

import tqdm as tq
from tqdm.notebook import tqdm

for pkg in [np, pd, scipy, tq]:
    print(pkg.__name__, pkg.__version__)

numpy 1.20.3
pandas 1.2.4
scipy 1.5.3
tqdm 4.61.0


In [2]:
# Where to find/save
path_in = '.'
curpathout = '.'

# Where to get input data
df = pd.read_csv('{}/Activity.csv'.format(path_in))
df['Temperature'] = df['Temperature'].astype(int).astype('category')

# Filter out controls 0.1 (no enzyme) and 0.2 (no solvent)
df = df.query('Concentration != 0.1 and Concentration != 0.2')

In [None]:
PARAMETERS = [
    # name, λ, starting, bounds
    ('x50', 0,      20,       (0, 100)),
    ('μ',   0.01,   10,       (0, 100)),
    ('σ',   0,      10,       (10, None)),
    ('A',   0.001,  1,        (0, None)),
    ('B',   0,      1,        (0, None)),
    ('m',   0,      15.00,    (None, None)),
]

# parameter_names
parameter_names = [n for n, _, _, _ in PARAMETERS]

# regularization coefficients
λ = np.array([l for _, l, _, _ in PARAMETERS])

# starting parameters
θ0 = [v for _, _, v, _ in PARAMETERS]

# parameter boundaries
bounds = [b for _, _, _, b in PARAMETERS]

def model(x, temp, params):
    x50, μ, σ, A, B, m = params
    
    k = m*100 / (8.314 * temp)
    
    return (
        A * np.exp(-0.5 * ((x - μ) / σ) ** 2) / (σ * np.sqrt(2 * np.pi)) +
        B * (np.exp(-k * (x - x50))) / (1 + np.exp(-k * (x - x50)))    
    )

def cost_function(params, x, y, temp):
    least_squares_cost = np.sum( (y - model(x, temp, params)) ** 2)
    regularization_cost = np.sum( (λ * params) ** 2)
    
    return least_squares_cost + regularization_cost


fit_parameters = []
for (solvent, enzyme, temperature), dfg in tqdm(df.groupby(['Solvent', 'Enzyme', 'Temperature'])):
    x = dfg['Concentration']
    y = dfg['Activity_mean']
    temp = temperature + 273
    
    θ = minimize(
        cost_function,
        θ0,                # starting values
        args=(x, y, temp), # data points
        bounds=bounds,     # boundaries for values
    )
    
    fit_parameters.append((solvent, enzyme, temperature, *θ.x))
    
df_fits = pd.DataFrame.from_records(fit_parameters, columns=('Solvent', 'Enzyme', 'Temperature', *parameter_names))
df_fits.set_index(['Solvent', 'Enzyme', 'Temperature'], inplace=True)

# Convert scaled values to back to actual values and modify scaling variable accordingly
df_fits['m'] = df_fits['m']*100

df_fits.to_csv(f'{curpathout}/Act_params.csv', index=True)

  0%|          | 0/72 [00:00<?, ?it/s]

  result = getattr(ufunc, method)(*inputs, **kwargs)
