In [None]:
import numpy as np
import pandas as pd

In [None]:
from scipy.optimize import curve_fit

In [None]:
from itertools import product

In [None]:
from commons import smoothen, lse

In [None]:
n_iterations = 20

# Preparations

## Loading the data

We start by loading the $N_i(t)$, while smoothening them :

In [None]:
Nt = smoothen(np.load("curves_raw.npy"), 10)

Let's also calculate already the derivatives $\frac{\Delta N_i(t)}{\Delta t} = \Delta N_i(t) = N_i(t+1) - N_i(t)$

In [None]:
dNdt = Nt[..., 1:] - Nt[..., :-1]

and $\rho_i(t) = \frac{\Delta N_i(t)}{N_i(t)}$

In [None]:
rho = dNdt / Nt[..., :-1]

Let's get the number of points consistent between $N_i(t)$ and the derivatives :

In [None]:
Nt = Nt[..., :-1]

## Dimensions

We also get the dimensions 

In [None]:
n_plates, n_rows, n_columns, n_points = Nt.shape
plates, rows, columns, points = map(np.arange, Nt.shape)

## Initial $\alpha_i(t)$ parameter values

The population-specific $\rho_i(t) = \alpha_i(t) \; \epsilon(t)$ model requires us to provide $\alpha_i(t)$ values, which we load here.
To get the time dimensionality consistent with the $\rho_i(t)$, we remove its last time point :

In [None]:
alpha = np.load("alpha/computed.npy")[..., :-10]

As we will recompute these values iteratively, along with the parameters that lead for those values, so here are some initial parameter values :

In [None]:
alphas = pd.read_csv("alpha/params.csv")
alphas.index = pd.MultiIndex.from_frame(alphas[["plate", "row", "column"]])
alphas = alphas[["r0 i", "m i", "c i"]]

The $r_0$ and $m$ parameters are global

In [None]:
r0_m = pd.DataFrame(index = pd.Index(plates, name = "plate"))

r0_m["r0"] = alphas["r0 i"].unique()
r0_m["m"] = alphas["m i"].unique()

while the $c_i$ parameters are population-specific

In [None]:
c_i = alphas["c i"]
c_i.index = alphas.index

# $\hat\rho_i(t) = \alpha_i(t) \; \epsilon(t)$

The general idea with this model is that we find a value for $\epsilon(t)$ for every $t$.
Then, if we recompute new optimal parameters for $\alpha_i(t)$, and then for $\epsilon(t)$ again, we obtain an iterative process :

In [None]:
epsilon = pd.DataFrame(
    data    = np.empty((n_points, n_plates)),
    index   = pd.Index(points, name = "time point"),
    columns = pd.Index(plates, name = "plate")
)

In [None]:
def fit_new_epsilons():
    global previous_epsilon
    previous_epsilon = epsilon.copy()
    
    discard = 42
    for p in plates:
        epsilon.loc[0, p] = curve_fit(
            f     = lambda _, e: alpha[p, ..., 0].reshape(-1) * e,
            xdata = discard,
            ydata = rho[p, ..., 0].reshape(-1)
        )[0]

        for t in points[1:]:
            epsilon.loc[t, p] = curve_fit(
                f     = lambda _, e: alpha[p, ..., t].reshape(-1) * e,
                xdata = discard,
                ydata = rho[p, ..., t].reshape(-1),
                p0    = epsilon.loc[t-1, p]
            )[0]

def fit_r0_m(fn):
    global r0_m
    ts = np.tile(points, n_rows * n_columns)
    
    for p in plates:
        ci = alphas.loc[p, "c i"].values.repeat(208)
        
        r0_m.loc[p] = curve_fit(
            f     = lambda t, r0, m:
                r0 * ci / (ci + np.exp(-m * t)) * fn(p),
            xdata = ts,
            ydata = rho[p].reshape(-1)
        )[0]

def fit_ci(fn):
    global c_i
    
    for p in plates:
        r0, m = r0_m.loc[p]
        
        for r, c in product(rows, columns):
            c_i[p, r, c] = curve_fit(
                f      = lambda t, ci:
                    r0 * ci / (ci + np.exp(-m * t)) * fn(p, r, c),
                xdata  = points,
                ydata  = rho[p, r, c],
                bounds = (0, np.inf)
            )[0]

def update_alpha():
    global alpha, previous_alphas, previous_alpha
    
    previous_alphas = alphas.copy()
    alphas.loc[:, "r0 i"] = list(r0_m["r0"].repeat(n_rows * n_columns))
    alphas.loc[:, "m i"] = list(r0_m["m"].repeat(n_rows * n_columns))
    alphas.loc[:, "c i"] = list(c_i)
    
    previous_alpha = alpha.copy()
    for idx in product(plates, rows, columns):
        r0, mi, ci = alphas.loc[idx]
        alpha[idx] = r0 * ci / (ci + np.exp(-mi * points))

def refit_alphas(r0m_fn, ci_fn):
    fit_r0_m(r0m_fn)
    fit_ci(ci_fn)
    update_alpha()

In [None]:
previous_score = np.inf
for it in range(n_iterations):
    print(f"iteration {it+1}")
    
    fit_new_epsilons()
    refit_alphas(
        r0m_fn = lambda p: np.tile(epsilon[p], n_rows * n_columns),
        ci_fn = lambda p, r, c: epsilon[p]
    )
    
    predictions = np.empty_like(rho)
    for p, t in product(plates, points):
        predictions[p, ..., t] = alpha[p, ..., t] * epsilon.loc[t, p]
        
    current_score = lse(predictions.reshape(-1), rho.reshape(-1))
    
    if current_score < previous_score:
        previous_score = current_score
    else:
        alphas = previous_alphas
        alpha = previous_alpha
        epsilon = previous_epsilon
        break

We only need to save the predicted $\hat \rho_i(t)$ :

In [None]:
predictions = np.empty_like(rho)

for p, t in product(plates, points):
    predictions[p, ..., t] = alpha[p, ..., t] * epsilon.loc[t, p]

In [None]:
np.save("predictions/level-2_alpha-epsilon.npy", predictions)