Evaluate a given model and compare multiple models. Push selected models to Neptune.

In [None]:
import utils
import importlib
import pandas as pd
import numpy as np
import datetime as dt
import torch

from matplotlib.ticker import MultipleLocator
from matplotlib.dates import DayLocator, AutoDateLocator, ConciseDateFormatter
%matplotlib inline

ARCHS_DIR = 'archs'
DATA_DIR = 'data'
EXPERIMENTS_DIR = 'experiments'
DEVICE='cpu'

### Load saved model

In [None]:
experiment_id = '0001_test'
checkpoint = 'latest-e10.pt'

def load_model(experiment_id, checkpoint):
    """returns model and checkpoint data"""
    experiment_dir = EXPERIMENTS_DIR + '/' + experiment_id
    cp = utils.load_checkpoint(experiment_dir, checkpoint)
    print("Epochs:", cp['epoch'])
    config = cp['config']
    print(config)

    # init Net
    arch_mod = importlib.import_module("." + config['ARCH'], ARCHS_DIR)
    importlib.reload(arch_mod) # ensure changes are imported
    model = arch_mod.CovidNet(ip_seq_len=config['DS']['IP_SEQ_LEN'], op_seq_len=config['DS']['OP_SEQ_LEN'], hidden_size=config['HIDDEN_SIZE'], num_layers=config['NUM_LAYERS'])
    model = model.to(DEVICE)
    print ("Model initialised")
    
    model.load_state_dict(cp['model_state_dict'])
    model.eval()
    
    return model, cp

model, cp = load_model(experiment_id, checkpoint)

### Load data

In [None]:
cols = ['location', 'date', 'total_cases', 'new_cases', 'total_deaths', 'new_deaths', 'population']
dates = ['date']
df = pd.read_csv(DATA_DIR + "/" + cp['config']['DS']['SRC'],
                 usecols=cols,
                 parse_dates=dates)
df.sample()

### Backtest

In [None]:
c = "Italy"
pop_fct = df.loc[df.location==c, 'population'].iloc[0] / 1000

IP_SEQ_LEN = cp['config']['DS']['IP_SEQ_LEN']
OP_SEQ_LEN = cp['config']['DS']['OP_SEQ_LEN']

all_preds = []
pred_vals = []
out_vals = []

test_data = np.array(df.loc[(df.location==c) & (df.total_cases>=100), 'new_cases'].rolling(7, center=True, min_periods=1).mean() / pop_fct, dtype=np.float32)

for i in range(len(test_data) - IP_SEQ_LEN - OP_SEQ_LEN + 1):
    ip = torch.tensor(test_data[i : i+IP_SEQ_LEN])
    op = torch.tensor(test_data[i+IP_SEQ_LEN : i+IP_SEQ_LEN+OP_SEQ_LEN])
    ip = ip.to(DEVICE)
    op = op.to(DEVICE)

    pred = model.predict(ip.view(1, IP_SEQ_LEN, 1))    
    if i==0: # prepend first input
        out_vals.extend(ip.view(IP_SEQ_LEN).cpu().numpy() * pop_fct)
        pred_vals.extend([np.NaN] * IP_SEQ_LEN)
    all_preds.append(pred.view(OP_SEQ_LEN).cpu().numpy() * pop_fct)
    pred_vals.append(pred.view(OP_SEQ_LEN).cpu().numpy()[0] * pop_fct)
    out_vals.append(op.view(OP_SEQ_LEN).cpu().numpy()[0] * pop_fct)

# last N-1 values
out_vals.extend(op.view(OP_SEQ_LEN).cpu().numpy()[1:] * pop_fct)
pred_vals.extend(([np.NaN] * OP_SEQ_LEN)[1:]) # pad with NaN

cmp_df = pd.DataFrame({
    'actual': out_vals,
    'predicted0': pred_vals
})

# set date
start_date = df.loc[(df.location==c) & (df.total_cases>=100)]['date'].iloc[0]
end_date = start_date + dt.timedelta(days=cmp_df.index[-1])
cmp_df['Date'] = pd.Series([start_date + dt.timedelta(days=i) for i in range(len(cmp_df))])

# plot noodles
ax=None
i=IP_SEQ_LEN
mape=[]
for pred in all_preds:
    cmp_df['predicted_cases'] = np.NaN
    cmp_df.loc[i:i+OP_SEQ_LEN-1, 'predicted_cases'] = pred
    ax = cmp_df.plot(x='Date', y='predicted_cases', ax=ax, legend=False)
    ape = 100 * ((cmp_df['actual'] - cmp_df['predicted_cases']).abs() / cmp_df['actual'])
    mape.append(ape.mean())
    i+=1
acc = f"{100 - sum(mape)/len(mape):0.2f}%"

# plot primary lines
ax = cmp_df.plot(
    x='Date',
    y=['actual', 'predicted0'],
    figsize=(20,8),
    lw=5,
    title=c + ' | Daily predictions | ' + acc,
    ax=ax
)
mn_l = DayLocator()
ax.xaxis.set_minor_locator(mn_l)
mj_l = AutoDateLocator()
mj_f = ConciseDateFormatter(mj_l, show_offset=False)
ax.xaxis.set_major_formatter(mj_f)