In [1]:
import numpy as np
import pandas as pd

In [2]:
from scipy.optimize import curve_fit

In [3]:
from itertools import product

In [4]:
from commons import smoothen, lse

In [5]:
n_iterations = 20

# Preparations

## Loading the data

We start by loading the $N_i(t)$, while smoothening them :

In [6]:
Nt = np.moveaxis(np.load("synthetic-data.npy"), 1, 3)

Let's also calculate already the derivatives $\frac{\Delta N_i(t)}{\Delta t} = \Delta N_i(t) = N_i(t+1) - N_i(t)$

In [7]:
dNdt = Nt[..., 1:] - Nt[..., :-1]

and $\rho_i(t) = \frac{\Delta N_i(t)}{N_i(t)}$

In [8]:
rho = dNdt / Nt[..., :-1]

Let's get the number of points consistent between $N_i(t)$ and the derivatives :

In [9]:
Nt = Nt[..., :-1]

## Dimensions

We also get the dimensions 

In [10]:
n_plates, n_rows, n_columns, n_points = Nt.shape
plates, rows, columns, points = map(np.arange, Nt.shape)

## Initial $\alpha_i(t)$ parameter values

The population-and-location-specific $\rho_i(t) = \alpha_i(t) \; \epsilon_k(t)$ model requires us to provide $\alpha_i(t)$ values, which we load here.
To get the time dimensionality consistent with the $\rho_i(t)$, we remove its last time point :

In [11]:
alpha = np.load("alpha/computed.npy")[..., :-11]

As we will recompute these values iteratively, along with the parameters that lead for those values, so here are some initial parameter values :

In [12]:
alphas = pd.read_csv("alpha/params.csv")
alphas.index = pd.MultiIndex.from_frame(alphas[["plate", "row", "column"]])
alphas = alphas[["r0 i", "c i", "m i"]]

The $r_0$ and $m$ parameters are global

In [13]:
r0_m = pd.DataFrame(index = pd.Index(plates, name = "plate"))

r0_m["r0"] = alphas["r0 i"].unique()
r0_m["m"] = alphas["m i"].unique()

while the $c_i$ parameters are population-specific

In [14]:
c_i = alphas["c i"]
c_i.index = alphas.index

## Layers

The $\epsilon_k(t)$ are organised in $k = 16$ layers, defined as equidistant points compared to their closest grid border.
We create here 16 matrices where for each layer the respective coordinates in the grid are set to 1 (0 otherwise) :

In [15]:
layers = np.zeros((16, n_rows, n_columns))

for i in range(16):
    layers[i, i, i:n_columns-i] = 1
    layers[i, i:n_rows-i, i] = 1
    layers[i, n_rows-i-1, i:n_columns-i] = 1
    layers[i, i:n_rows-i, n_columns-i-1] = 1

Convert a coordinate to the corresponding $\epsilon_k$ :

In [16]:
c2l = sum( (i+1) * l for i, l in enumerate(layers) ).astype(int)

# $\hat\rho_i(t) = \alpha_i(t) \; \epsilon_k(t)$

The general idea with this model is that we find a value for $\epsilon_k(t)$ for every time point $t$ and layer $k$.
Then, if we recompute new optimal parameters for $\alpha_i(t)$, and then for $\epsilon_k(t)$ again, we obtain an iterative process :

In [17]:
epsilon = pd.DataFrame(
    data    = np.empty((n_plates * n_points, 16)),
    columns = [ f"epsilon {k+1}" for k in range(16) ],
    index   = pd.MultiIndex.from_product((plates, points), names = ("plate", "time point"))
)

In [18]:
def fit_new_epsilons():
    global previous_epsilon
    previous_epsilon = epsilon.copy()
    
    for p in plates:
        t = 0
        epsilon.loc[p, t] = curve_fit(
            f     = lambda _, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16:
                        alpha[p, ..., t].reshape(-1) * np.array([e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16]).dot(layers.reshape((16, -1))),
            xdata = t,
            ydata = rho[p, ..., t].reshape(-1)
        )[0]

        for t in points[1:]:
            epsilon.loc[p, t] = curve_fit(
                f     = lambda _, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16:
                            alpha[p, ..., t].reshape(-1) * np.array([e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16]).dot(layers.reshape((16, -1))),
                xdata = t,
                ydata = rho[p, ..., t].reshape(-1),
                p0    = epsilon.loc[p, t-1]
            )[0]

def fit_r0_m(fn):
    ts = np.tile(points, n_rows * n_columns)
    
    for p in plates:
        ci = alphas.loc[p, "c i"].values.repeat(207)
        
        r0_m.loc[p] = curve_fit(
            f     = lambda _, r0, m:
                r0 * ci / (ci + np.exp(-m * ts)) * fn(p),
            xdata = 80085,
            ydata = rho[p].reshape(-1)
        )[0]

def fit_ci(fn):
    for p in plates:
        r0, m = r0_m.loc[p]
        
        for r, c in product(rows, columns):
            c_i[p, r, c] = curve_fit(
                f      = lambda _, ci:
                    r0 * ci / (ci + np.exp(-m * points)) * fn(p, r, c),
                xdata  = 80085,
                ydata  = rho[p, r, c],
                bounds = (0, np.inf)
            )[0]

def update_alpha():
    global previous_alphas, previous_alpha
    
    previous_alphas = alphas.copy()
    alphas.loc[:, "r0 i"] = list(r0_m["r0"].repeat(n_rows * n_columns))
    alphas.loc[:, "m i"] = list(r0_m["m"].repeat(n_rows * n_columns))
    alphas.loc[:, "c i"] = list(c_i)
    
    previous_alpha = alpha.copy()
    for idx in product(plates, rows, columns):
        r0, ci, mi = alphas.loc[idx]
        alpha[idx] = r0 * ci / (ci + np.exp(-mi * points))

def refit_alphas(r0m_fn, ci_fn):
    fit_r0_m(r0m_fn)
    fit_ci(ci_fn)
    update_alpha()

In [19]:
previous_score = np.inf
for it in range(n_iterations):
    print(f"iteration {it+1}")
    
    fit_new_epsilons()
    refit_alphas(
        r0m_fn = lambda p: epsilon.loc[p].dot(layers.reshape((16, -1))).T.values.reshape(-1),
        ci_fn = lambda p, r, c: epsilon.loc[p, f"epsilon {c2l[r, c]}"]
    )
    
    predictions = np.array([
            alpha[p].reshape((-1, n_points))
        *   epsilon.loc[p].dot(layers.reshape((16, -1))).T.values
        for p in plates
    ]).reshape((n_plates, n_rows, n_columns, n_points))
        
    current_score = lse(predictions.reshape(-1), rho.reshape(-1))
    
    if current_score < previous_score:
        previous_score = current_score
    else:
        alphas = previous_alphas
        alpha = previous_alpha
        epsilon = previous_epsilon
        break

iteration 1
iteration 2
iteration 3
iteration 4
iteration 5
iteration 6
iteration 7
iteration 8
iteration 9
iteration 10
iteration 11
iteration 12
iteration 13
iteration 14
iteration 15
iteration 16
iteration 17
iteration 18
iteration 19
iteration 20


We save the obtained $\alpha_i(t)$ and $\epsilon_k(t)$ for re-use with the $\hat\rho_i(t) = \alpha_i(t) \; \epsilon_k(t) \; \phi(N_i(t))$ model :

In [20]:
alphas.to_csv("alpha/params_alpha-epsilon_k.csv")

In [21]:
np.save("alpha/alpha-epsilon_k.npy", alpha)

In [22]:
epsilon.to_csv("epsilon/alpha-epsilon_k.csv")

And the predictions :

In [23]:
predictions = np.array([
        alpha[p].reshape((-1, n_points))
    *   epsilon.loc[p].dot(layers.reshape((16, -1))).T.values
    for p in plates
]).reshape((n_plates, n_rows, n_columns, n_points))

In [24]:
np.save("predictions/level-2_alpha-epsilon_k.npy", predictions)