Evaluate a given model and compare multiple models. Push selected models to Neptune.

In [1]:
import importlib
import neptune
import pandas as pd
import numpy as np
import datetime as dt
import torch

from src import utils
from src import data
from src import backtest
from src import constants
from matplotlib.ticker import MultipleLocator
from matplotlib.dates import DayLocator, AutoDateLocator, ConciseDateFormatter
%matplotlib inline

ARCHS_DIR = 'archs'
DATA_DIR = 'data'
EXPERIMENTS_DIR = 'experiments'
DEVICE='cpu'
NEPTUNE_PRJ = 'indiacovidseva/covid-net'

### Backtest one model

In [2]:
experiment_id = '0026'
checkpoint = 'latest-e10000.pt'

model, cp = utils.load_model(experiment_id, checkpoint)

Checkpoint loaded
Epochs: 10000
{'NEPTUNE_ID': 'COV-25', 'ID': '0026', 'DESC': 'First model with weater data - t', 'ARCH': 'v3', 'DATASET': 'ds_cdthp_pgba_4020_dataset_2020-09-15_v3.csv.pt', 'IP_FEATURES': [0, 2, 3, 4], 'OP_FEATURES': [0], 'AUX_FEATURES': [], 'BATCH_SIZE': 1000, 'HIDDEN_SIZE': 40, 'NUM_LAYERS': 4, 'DROPOUT': 0.5, 'LEARNING_RATE': 0.001, 'NUM_EPOCHS': 10001, 'DS': {'FEATURES': ['new_cases', 'new_deaths', 'temp_mean', 'humidity_mean', 'pressure_mean'], 'POP_FEATURES': ['new_cases', 'new_deaths'], 'AUX_FEATURES': ['population_density', 'gdp_per_capita', 'hospital_beds_per_thousand', 'median_age'], 'VAL_RATIO': 0.3, 'IP_SEQ_LEN': 40, 'OP_SEQ_LEN': 20, 'SRC': 'dataset_2020-09-15_v3.csv'}}
Model initialised


#### Plot loss and acc

In [None]:
df_loss = pd.DataFrame({
    'trn_loss': cp['trn_losses'],
    'val_loss': cp['val_losses']
})

df_acc = pd.DataFrame({
    'trn_acc': cp.get('trn_acc', np.zeros((cp['config']['NUM_EPOCHS']))),
    'val_acc': cp.get('val_acc', np.zeros((cp['config']['NUM_EPOCHS'])))
})

# smoothen
df_loss['trn_loss'] = df_loss['trn_loss'].rolling(3, min_periods=1, center=True).mean()
df_loss['val_loss'] = df_loss['val_loss'].rolling(3, min_periods=1, center=True).mean()
df_acc['trn_acc'] = df_acc['trn_acc'].rolling(3, min_periods=1, center=True).mean()
df_acc['val_acc'] = df_acc['val_acc'].rolling(3, min_periods=1, center=True).mean()

_ = df_loss.plot(
    y=['trn_loss', 'val_loss'],
    title='Loss per epoch',
    subplots=False,
    figsize=(5,5),
    sharex=False,
    logy=True
)
_ = df_acc.plot(
    y=['trn_acc', 'val_acc'],
    title='Acc per epoch',
    subplots=False,
    figsize=(5,5),
    sharex=False,
    logy=True
)

#### Load training data and backtest

In [None]:
cols = ['location', 'date', 'total_cases', 'new_cases', 'total_deaths', 'new_deaths', 'temp_mean', 'humidity_mean', 'pressure_mean', 'population', 'population_density', 'gdp_per_capita', 'hospital_beds_per_thousand', 'median_age']
dates = ['date']
df = pd.read_csv(DATA_DIR + "/" + cp['config']['DS']['SRC'],
                 usecols=cols,
                 parse_dates=dates)
df = data.fix_anomalies_owid(df)
df.sample()

In [None]:
backtest.countrywise(model, cp, df, constants.STT_INFO)

#### Load covid19india data and backtest

In [3]:
df = data.get_statewise_data(True)

In [None]:
backtest.statewise(model, cp, df, constants.STT_INFO, plot=True)

#### Upload model to Neptune

In [None]:
neptune_prj = neptune.init(NEPTUNE_PRJ)
neptune_exp = neptune_prj.get_experiments(id=cp['config']['NEPTUNE_ID'])[0]
neptune_exp.log_artifact(EXPERIMENTS_DIR + "/" + experiment_id + "/" + checkpoint)

### Backtest all models

In [None]:
accs = []
for e in range(0, 10001, 1): # start, stop, step
    checkpoint = 'latest-e' + str(e) + '.pt'
    try:
        model, cp = utils.load_model(experiment_id, checkpoint, v=False)
        acc = backtest.statewise(model, cp, df, constants.STT_INFO, plot=False)
        accs.append(acc)
        print(checkpoint, acc)
    except Exception as e:
        print(checkpoint, e)

  elwise_mape = total_acc / len(mape)
  a = np.nanmean(np.array(child_accs))


AN nan
AP nan
AR nan
AS nan
BR nan
CH 28.251843402213723
CT nan
DN nan
DL 33.25114964789087
GA 26.40491671331874
GJ nan
HR nan
HP nan
JK nan
JH nan
KA nan
KL nan
LA nan
MP nan
MH nan
MN 77.60444525875103
ML nan
MZ nan
NL nan
OR nan
PY 24.787990878882653
PB nan
RJ nan
TN nan
TG nan
TR 53.54109426288283
UP nan
UT 26.52201218606129
WB nan
India 35.94258216911755
latest-e0.pt 35.94258216911755
AN nan
AP 0.14205673335747804
AR 0.777485515719917
AS 0.20765093898187104
BR 0.38818275242758205
CH 0.44613884745749033
CT 0.25109579982890295
DN nan
DL 0.12355976080780806
GA 0.3194883732198832
GJ 0.21402195745990582
HR 0.17891771320138616
HP 0.618900806027213
JK 0.3459712458405164
JH 0.18350348917206816
KA 0.17285719199448124
KL 0.19404075116631248
LA nan
MP 0.436865147823454
MH 0.05998824055572527
MN 0.7903541838830819
ML nan
MZ nan
NL nan
OR 0.2592818298035553
PY 0.31519968044531765
PB 0.2748311814285671
RJ 0.3382615030944862
TN 0.07946320485551439
TG 0.15355895539480002
TR 0.4908670536695894
UP 

#### Plot test accuracy vs epochs

In [None]:
# which output feature's acc should be plotted
feature = 0
feature_name = cp['config']['DS']['FEATURES'][feature]

df_exp = pd.DataFrame({
    'test_acc': np.array(accs)[:, feature],
    'val_acc': cp.get('val_acc', np.zeros((cp['config']['NUM_EPOCHS']))),
    'epochs': np.arange(0, 10001, 1)
})
ax = df_exp[:].plot(
    x='epochs',
    y=['test_acc', 'val_acc'],
    title='Test accuracy: ' + feature_name,
    subplots=False,
    figsize=(5,5),
    sharex=False
)

print("Models with best test accuracy for", feature_name)
print(df_exp.sort_values('test_acc', ascending=False).head())
# print(df_exp.loc[(df_exp['test_acc'] - df_exp['val_acc']).abs().sort_values(ascending=True).index]\
#       .head(100).sort_values('test_acc', ascending=False))

#### Log test accuracy to Neptune

In [None]:
neptune_prj = neptune.init(NEPTUNE_PRJ)
neptune_exp = neptune_prj.get_experiments(id=cp['config']['NEPTUNE_ID'])[0]
for idx, row in df_exp.iterrows():
    neptune_exp.log_metric('test accuracy: ' + feature_name, row['epochs'], row['test_acc'])