### Numerical Differentiation of Denoised Data from Sample Mean of Multiple Simulations

In [1]:
import numpy as np
import pandas as pd

import sys
import joblib
import datetime
sys.path.append('../')

from Modules.Utils.ModelWrapper import ModelWrapper
from Modules.Models.BuildBINNs import MLPComponentsCV # MLPComponentsCovasim, MLPComponentsCovasim2
from Modules.Utils.Imports import *

import Modules.Loaders.DataFormatter as DF
from utils import plot_loss_convergence, get_case_name
import matplotlib
matplotlib.use('Agg')

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
device = torch.device(GetLowestGPU(pick_from=[0,1,2,3]))

def to_torch(ndarray):
    arr = torch.tensor(ndarray, dtype=torch.float)
    arr.requires_grad_(True)
    arr = arr.to(device)
    return arr

def to_numpy(x):
    return x.detach().cpu().numpy()

Device set to cpu


In [4]:
# instantiate BINN model parameters and path
path = '../Data/covasim_data/drums_data/'
population = 50000
test_prob = 0.1
trace_prob = 0.3
keep_d = True
retrain = False
dynamic = True
n_runs = 1000
chi_type = 'piecewise'
case_name = get_case_name(population, test_prob, trace_prob, keep_d, dynamic=dynamic, chi_type=chi_type)

In [5]:
params = DF.load_covasim_data(path, population, test_prob, trace_prob, keep_d, case_name + '_' + str(n_runs), plot=False)

In [6]:
data = np.array(params['data'])
data_smooth = np.mean(data, axis=0)
data_smooth = (data_smooth / params['population'])

N = len(data_smooth) - 2
u = to_torch(data_smooth[1:N+1,:])

t_max_real = N
t = np.arange(N)[:,None] + 1
params.pop('data')
print()




In [7]:
u_front = data_smooth[:N,:]
u_back = data_smooth[2:,:]
ut = to_torch((u_back - u_front) / 2.)

In [8]:
# split into train/val and convert to torch
split = int(0.8*N)
# generate shuffled array of indices from 0 to N-1
p = np.random.permutation(N)[:,None]

In [12]:
inputs = torch.cat([u[:,:,None], ut[:,:,None]], axis=2)

In [15]:
inputs[:,:,0].shape

torch.Size([181, 9])

In [13]:
if False:
    data = np.array(params['data'])
    data_smooth = np.mean(data, axis=0)
    data_smooth = (data_smooth / params['population'])

    N = len(data_smooth)

    smooth_data = to_torch(data_smooth[1:N-1,:])

    t_max = N - 1
    t = np.arange(1, N - 1)[:,None]
    # indices = (t - 1)[:,0]
    params.pop('data')

    # computer numerically approximated derivatives
    M_front = data_smooth[:N-2,:]
    M_back = data_smooth[2:,:]
    derivs = (M_back - M_front) / 2.
    ut = to_torch(derivs)

    # split into train/val and convert to torch
    split = int(0.8*N)
    # generate shuffled array of indices from 0 to N-1
    p = np.random.permutation(N-2) + 1
    # assign x_train to be randomly shuffled days from 1 to 182 of size int(0.8 * N)
    x_train = to_torch((p[:split][:,None] + 1)/(N-1))
    # assign y_train to be values corresponding to x_train of size int(0.8 * N)
    y_train = to_torch(data_smooth[p[:split]])
    # assign x_val to be randomly shuffled days from 1 to 182 of size int(0.2 * N)
    x_val = to_torch((p[split:][:,None] + 1)/(N-1))
    # assign y_val to be values corresponding to y_val of size int(0.2 * N)
    y_val = to_torch(data_smooth[p[split:]])

    tracing_array = params['tracing_array']

In [14]:
if False:
    print(data_smooth.shape)
    print(smooth_data.shape)
    print(t_max)
    print(N)
    print(split)
    print(p.shape)
    print(p.min())
    print(p.max())

In [7]:
def surface_fit(t, u):
    
    return u[p - 1]

def est_deriv(t, ut):
    
    return ut[p - 1]

#### Notes:
- The shape of `derivs` is $(181, 9)$ since we used the method of central differences to estimate the derivatives for data of shape $(183, 9)$.
- `t` are the time points of each of the `derivs` points.
- `indices` is the array of the index points for each 

In [8]:
dSdt = ut[:,0]
dTdt = derivs[:,1]
dEdt = derivs[:,2]
dAdt = derivs[:,3]
dYdt = derivs[:,4]
dDdt = derivs[:,5]
dQdt = derivs[:,6]
dRdt = derivs[:,7]
dFdt = derivs[:,8]

In [9]:
plot = False

plt.style.use('seaborn')
plt.rcParams['figure.figsize'] = (8, 6)

if plot:
    for i in range(9):
        if i==0:
            plt.title('$dSdt$ versus time (days)')
            plt.plot(t_divs, dSdt*50000, label='dSdt', color='b')
            plt.xlabel('Time (days)')
            plt.ylabel('$dSdt$')
            plt.legend()
            plt.savefig('../Notebooks/figs/drums/' + case_name + '_' + str(n_runs) + '_dSdt' + '.png')
            # plt.show()
            plt.close()
        if i==1:
            plt.title('$dTdt$ versus time (days)')
            plt.plot(t_divs, dTdt*50000, label='dTdt', color='b')
            plt.xlabel('Time (days)')
            plt.ylabel('$dTdt$')
            plt.legend()
            plt.savefig('../Notebooks/figs/drums/' + case_name + '_' + str(n_runs) + '_dTdt' + '.png')
            # plt.show()
            plt.close()
        if i==2:
            plt.title('$dEdt$ versus time (days)')
            plt.plot(t_divs, dSdt*50000, label='dEdt', color='y')
            plt.xlabel('Time (days)')
            plt.ylabel('$dEdt$')
            plt.legend()
            plt.savefig('../Notebooks/figs/drums/' + case_name + '_' + str(n_runs) + '_dSEt' + '.png')
            # plt.show()
            plt.close()
        if i==3:
            plt.title('$dAdt$ versus time (days)')
            plt.plot(t_divs, dSdt*50000, label='dAdt', color='r')
            plt.xlabel('Time (days)')
            plt.ylabel('$dAdt$')
            plt.legend()
            plt.savefig('../Notebooks/figs/drums/' + case_name + '_' + str(n_runs) + '_dAdt' + '.png')
            # plt.show()
            plt.close()
        if i==4:
            plt.title('$dYdt$ versus time (days)')
            plt.plot(t_divs, dSdt*50000, label='dYdt', color='r')
            plt.xlabel('Time (days)')
            plt.ylabel('$dYdt$')
            plt.legend()
            plt.savefig('../Notebooks/figs/drums/' + case_name + '_' + str(n_runs) + '_dYdt' + '.png')
            # plt.show()
            plt.close()
        if i==5:
            plt.title('$dDdt$ versus time (days)')
            plt.plot(t_divs, dSdt*50000, label='dDdt', color='m')
            plt.xlabel('Time (days)')
            plt.ylabel('$dDdt$')
            plt.legend()
            plt.savefig('../Notebooks/figs/drums/' + case_name + '_' + str(n_runs) + '_dDdt' + '.png')
            # plt.show()
            plt.close()
        if i==6:
            plt.title('$dQdt$ versus time (days)')
            plt.plot(t_divs, dSdt*50000, label='dQdt', color='m')
            plt.xlabel('Time (days)')
            plt.ylabel('$dQdt$')
            plt.legend()
            plt.savefig('../Notebooks/figs/drums/' + case_name + '_' + str(n_runs) + '_dQdt' + '.png')
            # plt.show()
            plt.close()
        if i==7:
            plt.title('$dRdt$ versus time (days)')
            plt.plot(t_divs, dSdt*50000, label='dRdt', color='g')
            plt.xlabel('Time (days)')
            plt.ylabel('$dRdt$')
            plt.legend()
            plt.savefig('../Notebooks/figs/drums/' + case_name + '_' + str(n_runs) + '_dRdt' + '.png')
            # plt.show()
            plt.close()
        if i==8:
            plt.title('$dFdt$ versus time (days)')
            plt.plot(t_divs, dSdt*50000, label='dFdt', color='k')
            plt.xlabel('Time (days)')
            plt.ylabel('$dFdt$')
            plt.legend()
            plt.savefig('../Notebooks/figs/drums/' + case_name + '_' + str(n_runs) + '_dFdt' + '.png')
            # plt.show()
            plt.close()

  plt.style.use('seaborn')


In [10]:
# generate save path
mydir = os.path.join('../models/covasim', datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
os.makedirs(mydir)

In [11]:
# initialize model
binn = MLPComponentsCovasim(params, smooth_data, ut, N - 1, tracing_array, keep_d=keep_d, chi_type=chi_type)
binn.to(device)

MLPComponentsCovasim(
  (surface_fitter): identity_MLP(
    (mlp): BuildMLP(
      (activation): Identity()
      (output_activation): Identity()
      (MLP): Sequential(
        (0): Linear(in_features=1, out_features=9, bias=True)
        (1): Identity()
      )
    )
  )
  (eta_func): infect_rate_MLP(
    (mlp): BuildMLP(
      (activation): ReLU()
      (output_activation): Sigmoid()
      (MLP): Sequential(
        (0): Linear(in_features=3, out_features=256, bias=True)
        (1): ReLU()
        (2): Dropout(p=0.2, inplace=False)
        (3): Linear(in_features=256, out_features=1, bias=True)
        (4): Sigmoid()
      )
    )
  )
  (beta_func): beta_MLP(
    (mlp): BuildMLP(
      (activation): ReLU()
      (output_activation): Sigmoid()
      (MLP): Sequential(
        (0): Linear(in_features=2, out_features=256, bias=True)
        (1): ReLU()
        (2): Dropout(p=0.2, inplace=False)
        (3): Linear(in_features=256, out_features=1, bias=True)
        (4): Sigmoid()
   

In [12]:
parameters = binn.parameters()
opt = torch.optim.Adam(parameters, lr=1e-3)
os.makedirs(os.path.join(mydir, case_name))
model = ModelWrapper(
    model=binn,
    optimizer=opt,
    loss=binn.loss,
    augmentation=None,
    # scheduler= scheduler,
    save_name=os.path.join(mydir, case_name) )
model.str_name = 'STEAYDQRF_no_main'

In [13]:
# save the range information before training
ranges = [binn.yita_lb, binn.yita_ub, binn.beta_lb, binn.beta_ub, binn.tau_lb, binn.tau_ub]
file_name = '_'.join([str(m) for m in ranges])
joblib.dump(None, os.path.join(mydir, file_name)) # model.save_folder
# if retrain
if retrain:
    model.load(model.save_name + '_best_val_model', device=device)
    model.model.train()
    model.save_name += '_retrain'
    
epochs = int(2000)
batch_size = 128
rel_save_thresh = 0.05

In [14]:
# train jointly
model.fit(
    x=x_train,
    y=y_train,
    batch_size=batch_size,
    epochs=epochs,
    callbacks=None,
    verbose=1,
    validation_data=[x_val, y_val],
    early_stopping=20000,
    rel_save_thresh=rel_save_thresh)

(145, 1)
torch.Size([181, 9])
torch.Size([145, 9])
inputs shape: torch.Size([1000, 1])
(1000, 1)
torch.Size([181, 9])


IndexError: index 181 is out of bounds for dimension 0 with size 181