In [None]:
import sys

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

from scipy.interpolate import CubicSpline
from datetime import datetime, timedelta

sys.path.append('../..')

from data.dataloader import Covid19IndiaLoader
from data.processing import get_dataframes_cached, get_data

from utils.util import rollingavg, train_test_split
from utils.enums.columns import Columns

## Data

In [None]:
district = 'Pune'
state = 'Maharashtra'
disable_tracker = True

loader = Covid19IndiaLoader()
dataframes = loader.get_covid19india_api_data()

data = get_data(dataframes, state, district, disable_tracker=disable_tracker)
data['date'] = pd.to_datetime(data['date'])
data = data.rename(columns={"total": "total_infected", "active": "hospitalised"})

In [None]:
cols = [c.name for c in Columns.which_compartments()]
actual_start_date = data['date'].min()+timedelta(5)
smooth_data = data.copy(deep=True)
for col in cols:
    smooth_data[col] = rollingavg(data[col], 5)
smooth_data = smooth_data.dropna(subset=cols, how='any')
_, smooth_data = train_test_split(smooth_data, actual_start_date-timedelta(1))
smooth_data.head()

## Experiments: July 8

In [None]:
res = pd.read_csv('../../outputs/consolidated/Pune_SEIR_s3_performance_July_8.csv') # c2_train_period = 7
seir_val_res = res[res["type"] == "val"].reset_index(drop=True)
seir_val_res

In [None]:
exp = dict()
for i in range(4):
    exp[i] = seir_val_res[seir_val_res["exp"] == i+1].reset_index(drop=True)

In [None]:
fig, ax = plt.subplots(1,3, sharex=True, sharey=True, figsize=(20,6))
cols_plot = ['recovered', 'hospitalised', 'deceased']
for i, col in enumerate(cols_plot):
    ax[i].plot(exp[0]['shift'], exp[0][col], 'o-', label='SEIR test MAPE on s3 (using ground truth train data(s1+s2))', color='green')
    ax[i].plot(exp[1]['shift'], exp[1][col], 'o-', label='SEIR test MAPE on s3 (using IHME forecast as train data)', color='red')
    ax[i].plot(exp[2]['shift'], exp[2][col], 'o-', label='SEIR test MAPE on s3 (using SEIR forecast as train data)', color='blue')
    ax[i].plot(exp[3]['shift'], exp[3][col], 'o-', label='SEIR test MAPE on s3 (using ground truth train data(s1))', color='black')
    ax[i].title.set_text(col)
    ax[i].set_xlabel('Index of training start date (from 4-24-20)', fontsize=10)
    ax[i].set_ylabel('MAPE', fontsize=10)
    ax[i].grid()
plt.legend()

In [None]:
fig, ax = plt.subplots(1,3, sharex=True, sharey=True, figsize=(20,6))
cols_plot = ['recovered', 'hospitalised', 'deceased']
for i, col in enumerate(cols_plot):
    ax[i].plot(exp[0]['shift'], exp[0][col], 'o-', label='SEIR test MAPE on s3 (using ground truth train data (s1+s2))', color='green')
    ax[i].plot(exp[1]['shift'], exp[1][col], 'o-', label='SEIR test MAPE on s3 (using IHME forecast as train data)', color='red')
    ax[i].plot(exp[3]['shift'], exp[3][col], 'o-', label='SEIR test MAPE on s3 (using ground truth train data(s1))', color='black')
    ax[i].title.set_text(col)
    ax[i].set_xlabel('Index of training start date (from 4-24-20)', fontsize=10)
    ax[i].set_ylabel('MAPE', fontsize=10)
    ax[i].grid()
plt.legend()

## IHME s2 performance, SEIR s3 performance

In [None]:
res = pd.read_csv('../../outputs/consolidated/Pune_IHME_performance.csv')
val_res = res[res['type'] == 'val'].reset_index(drop=True)
train_res = res[res['type'] == 'train'].reset_index(drop=True)
val_res

In [None]:
res = pd.read_csv('../../outputs/consolidated/Pune_SEIR_s3_performance.csv')
seir_val_res = res[res["type"] == "val"].reset_index(drop=True)
exp1, exp2, exp3 = seir_val_res.iloc[0::3, :], seir_val_res.iloc[1::3, :], seir_val_res.iloc[2::3, :]

In [None]:
fig, ax = plt.subplots(1,3, sharex=True, sharey=True, figsize=(20,3))
plt.title('Pune MAPE on s2')
cols_plot = ['recovered', 'hospitalised', 'deceased']
for i, col in enumerate(cols_plot):
    ax[i].plot(train_res['shift'], train_res[col], 'o-', label='IHME train MAPE on s1', color='blue')
    ax[i].plot(val_res['shift'], val_res[col], 'o-', label='IHME test MAPE on s2', color='red')
    ax[i].title.set_text(col)
    ax[i].set_xlabel('Index of training start date (from 4-24-20)', fontsize=10)
    ax[i].set_ylabel('MAPE', fontsize=10)
    ax[i].grid()
plt.legend()

In [None]:
fig, ax = plt.subplots(1,3, sharex=True, sharey=True, figsize=(20,6))
cols_plot = ['recovered', 'hospitalised', 'deceased']
for i, col in enumerate(cols_plot):
    ax[i].plot(exp1['shift'], exp1[col], 'o-', label='SEIR test MAPE on s3 (using ground truth train data)', color='green')
    ax[i].plot(exp2['shift'], exp2[col], 'o-', label='SEIR test MAPE on s3 (using IHME forecast as train data)', color='red')
    ax[i].plot(exp3['shift'], exp3[col], 'o-', label='SEIR test MAPE on s3 (using SEIR forecast as train data)', color='blue')
    ax[i].set_xlabel('Index of training start date (from 4-24-20)', fontsize=10)
    ax[i].set_ylabel('MAPE', fontsize=10)
    ax[i].grid()
    ax[i].title.set_text(col)
plt.legend()

In [None]:
fig, ax = plt.subplots(1,3, sharex=True, sharey=True, figsize=(20,6))
cols_plot = ['recovered', 'hospitalised', 'deceased']
for i, col in enumerate(cols_plot):
    ax[i].plot(exp1['shift'], exp1[col], 'o-', label='SEIR test MAPE on s3 (using ground truth train data)', color='green')
    ax[i].plot(exp2['shift'], exp2[col], 'o-', label='SEIR test MAPE on s3 (using IHME forecast as train data)', color='red')
    ax[i].title.set_text(col)
    ax[i].set_xlabel('Index of training start date (from 4-24-20)', fontsize=10)
    ax[i].set_ylabel('MAPE', fontsize=10)
    ax[i].grid()
plt.legend()

## Variance/smoothness analysis

In [None]:
def get_var_series(compartment, data, train_period, smoothness):
    variances = []
    mean_smooth = []
    dates = []
    start = data['date'].min()
    i = 0
    while start+timedelta(train_period) < data['date'].max():
        dates.append(start)
        variances.append(np.var(data[compartment].iloc[i:i+train_period]))
        mean_smooth.append(np.mean(smoothness[i:i+train_period]))
        i += 1
        start += timedelta(1)
    return variances, mean_smooth, dates

def get_smoothness(compartment, data):
    cs = CubicSpline(data['date'], data[compartment])
#     plt.plot(data['date'].values, cs(data['date']), '--', 'brown')
    second_deriv = cs.__call__(data['date'], nu=2)
    smoothness = [x**2 for x in second_deriv] 
    return smoothness

def plot_analysis(compartment, losses, variances, smoothness, dates):
    shifts = [(dates[i]-min(dates)).days for i in range(len(dates))]
    plt.figure(figsize=(12,12))
    plt.plot(losses['shift'], losses[compartment], 'o-', label='mape', color='red')
    plt.plot(shifts, variances, color='black', label='variance')
    plt.plot(shifts, smoothness, '--', color='blue', label='smoothness')
    plt.legend()
    plt.grid()
    plt.title(compartment)

In [None]:
var_scale_factors = [10000, 200, 4000, 100000]  # train period 10
smooth_scale_factors = [2.5e54, 2e55, 5e53, 1e53]

for i, col in enumerate(cols):
    smoothness = get_smoothness(col, smooth_data)
    variances, mean_smoothness, dates = get_var_series(col, smooth_data, 10, smoothness)
    scaled_variances = [x/var_scale_factors[i] for x in variances]
    scaled_smoothness = [x*smooth_scale_factors[i] for x in mean_smoothness]
    plot_analysis(col, val_res, scaled_variances[:35], scaled_smoothness[:35], dates[:35])

In [None]:
print("Graphs")

## IHME s2 performance, SEIR s3 performance (with uncertainty)

In [None]:
res = pd.read_csv('../../outputs/consolidated/Pune_IHME_performance_July_1.csv')
val_res = res[res['type'] == 'val'].reset_index(drop=True)
train_res = res[res['type'] == 'train'].reset_index(drop=True)
uncertainty_res = res[res['type'] == 'average s2 uncertainty'].reset_index(drop=True)

In [None]:
res = pd.read_csv('../../outputs/consolidated/Pune_SEIR_s3_performance_July_1.csv')
seir_val_res = res[res["type"] == "val"].reset_index(drop=True)
exp1, exp2, exp3 = seir_val_res.iloc[0::3, :], seir_val_res.iloc[1::3, :], seir_val_res.iloc[2::3, :]

In [None]:
fig, ax = plt.subplots(1,3, sharex=True, sharey=True, figsize=(20,3))
cols_plot = ['recovered', 'hospitalised', 'deceased']
for i, col in enumerate(cols_plot):
    ax[i].plot(train_res['shift'], train_res[col], 'o-', label='IHME train MAPE on s1', color='blue')
    ax[i].plot(val_res['shift'], val_res[col], 'o-', label='IHME test MAPE on s2', color='red')
    ax[i].title.set_text(col)
    ax[i].set_xlabel('Index of training start date (from 4-24-20)', fontsize=10)
    ax[i].set_ylabel('MAPE', fontsize=10)
    ax[i].grid()
plt.legend()

In [None]:
fig, ax = plt.subplots(1,3, sharex=True, sharey=True, figsize=(20,3))
cols_plot = ['recovered', 'hospitalised', 'deceased']
for i, col in enumerate(cols_plot):
    ax[i].plot(uncertainty_res['shift'], uncertainty_res[col], 'o-', label='IHME uncertainty on s2', color='black')
    ax[i].title.set_text(col)
    ax[i].set_xlabel('Index of training start date (from 4-24-20)', fontsize=10)
    ax[i].set_ylabel('MAPE', fontsize=10)
    ax[i].grid()
plt.legend()

In [None]:
fig, ax = plt.subplots(1,3, sharex=True, sharey=True, figsize=(20,6))
cols_plot = ['recovered', 'hospitalised', 'deceased']
for i, col in enumerate(cols_plot):
    ax[i].plot(exp1['shift'], exp1[col], 'o-', label='SEIR test MAPE on s3 (using ground truth train data)', color='green')
    ax[i].plot(exp2['shift'], exp2[col], 'o-', label='SEIR test MAPE on s3 (using IHME forecast as train data)', color='red')
    ax[i].title.set_text(col)
    ax[i].set_xlabel('Index of training start date (from 4-24-20)', fontsize=10)
    ax[i].set_ylabel('MAPE', fontsize=10)
    ax[i].grid()
plt.legend()

In [None]:
fig, ax = plt.subplots(1,3, sharex=True, sharey=True, figsize=(20,6))
cols_plot = ['recovered', 'hospitalised', 'deceased']
for i, col in enumerate(cols_plot):
    ax[i].plot(exp1['shift'], exp1[col], 'o-', label='SEIR test MAPE on s3 (using ground truth train data)', color='green')
    ax[i].plot(exp2['shift'], exp2[col], 'o-', label='SEIR test MAPE on s3 (using IHME forecast as train data)', color='red')
    ax[i].plot(exp3['shift'], exp3[col], 'o-', label='SEIR test MAPE on s3 (using SEIR forecast as train data)', color='blue')
    ax[i].title.set_text(col)
    ax[i].set_xlabel('Index of training start date (from 4-24-20)', fontsize=10)
    ax[i].set_ylabel('MAPE', fontsize=10)
    ax[i].grid()
plt.legend()

## Ahmedabad

In [None]:
res = pd.read_csv('../../outputs/consolidated/Ahmedabad_IHME_performance_July_1.csv')
val_res = res[res['type'] == 'val'].reset_index(drop=True)
train_res = res[res['type'] == 'train'].reset_index(drop=True)
uncertainty_res = res[res['type'] == 'average s2 uncertainty'].reset_index(drop=True)

In [None]:
res = pd.read_csv('../../outputs/consolidated/Ahmedabad_SEIR_s3_performance_July_1.csv')
seir_val_res = res[res["type"] == "val"].reset_index(drop=True)
exp1, exp2, exp3 = seir_val_res.iloc[0::3, :], seir_val_res.iloc[1::3, :], seir_val_res.iloc[2::3, :]

In [None]:
fig, ax = plt.subplots(1,3, sharex=True, sharey=True, figsize=(20,3))
cols_plot = ['recovered', 'hospitalised', 'deceased']
for i, col in enumerate(cols_plot):
    ax[i].plot(train_res['shift'], train_res[col], 'o-', label='data', color='blue')
    ax[i].plot(val_res['shift'], val_res[col], 'o-', label='data', color='red')
    ax[i].title.set_text(col)
    ax[i].set_xlabel('Index of training start date (from 4-24-20)', fontsize=10)
    ax[i].set_ylabel('MAPE', fontsize=10)
    ax[i].grid()

In [None]:
fig, ax = plt.subplots(1,3, sharex=True, sharey=True, figsize=(20,6))
cols_plot = ['recovered', 'hospitalised', 'deceased']
for i, col in enumerate(cols_plot):
    ax[i].plot(exp1['shift'], exp1[col], 'o-', label='data', color='green')
    ax[i].plot(exp2['shift'], exp2[col], 'o-', label='data', color='red')
    ax[i].title.set_text(col)
    ax[i].set_xlabel('Index of training start date (from 4-24-20)', fontsize=10)
    ax[i].set_ylabel('MAPE', fontsize=10)
    ax[i].grid()

In [None]:
fig, ax = plt.subplots(1,3, sharex=True, sharey=True, figsize=(20,6))
cols_plot = ['recovered', 'hospitalised', 'deceased']
for i, col in enumerate(cols_plot):
    ax[i].plot(exp1['shift'], exp1[col], 'o-', label='data', color='green')
    ax[i].plot(exp2['shift'], exp2[col], 'o-', label='data', color='red')
    ax[i].plot(exp3['shift'], exp3[col], 'o-', label='data', color='blue')
    ax[i].title.set_text(col)
    ax[i].set_xlabel('Index of training start date (from 4-24-20)', fontsize=10)
    ax[i].set_ylabel('MAPE', fontsize=10)
    ax[i].grid()

## IHME s2 performance, SEIR s3 and baseline performance

In [None]:
res = pd.read_csv('../../outputs/consolidated/Pune_IHME_performance_July_5.csv') # c2_train_period = 5
val_res = res[res['type'] == 'val'].reset_index(drop=True)
train_res = res[res['type'] == 'train'].reset_index(drop=True)
uncertainty_res = res[res['type'] == 'average s2 uncertainty'].reset_index(drop=True)

In [None]:
res = pd.read_csv('../../outputs/consolidated/Pune_SEIR_s3_performance_July_5.csv')
seir_val_res = res[res["type"] == "val"].reset_index(drop=True)
seir_val_res

In [None]:
exp = dict()
for i in range(4):
    exp[i] = seir_val_res[seir_val_res["exp"] == i+1].reset_index(drop=True)

In [None]:
fig, ax = plt.subplots(1,3, sharex=True, sharey=True, figsize=(20,3))
cols_plot = ['recovered', 'hospitalised', 'deceased']
for i, col in enumerate(cols_plot):
    ax[i].plot(train_res['shift'], train_res[col], 'o-', label='IHME train MAPE on s1', color='blue')
    ax[i].plot(val_res['shift'], val_res[col], 'o-', label='IHME test MAPE on s2', color='red')
    ax[i].title.set_text(col)
    ax[i].set_xlabel('Index of training start date (from 4-24-20)', fontsize=10)
    ax[i].set_ylabel('MAPE', fontsize=10)
    ax[i].grid()
plt.legend()

In [None]:
fig, ax = plt.subplots(1,3, sharex=True, sharey=True, figsize=(20,6))
cols_plot = ['recovered', 'hospitalised', 'deceased']
for i, col in enumerate(cols_plot):
    ax[i].plot(exp[0]['shift'], exp[0][col], 'o-', label='SEIR test MAPE on s3 (using ground truth train data(s1+s2))', color='green')
    ax[i].plot(exp[1]['shift'], exp[1][col], 'o-', label='SEIR test MAPE on s3 (using IHME forecast as train data)', color='red')
    ax[i].plot(exp[2]['shift'], exp[2][col], 'o-', label='SEIR test MAPE on s3 (using SEIR forecast as train data)', color='blue')
    ax[i].plot(exp[3]['shift'], exp[3][col], 'o-', label='SEIR test MAPE on s3 (using ground truth train data(s1))', color='black')
    ax[i].title.set_text(col)
    ax[i].set_xlabel('Index of training start date (from 4-24-20)', fontsize=10)
    ax[i].set_ylabel('MAPE', fontsize=10)
    ax[i].grid()
plt.legend()

In [None]:
fig, ax = plt.subplots(1,3, sharex=True, sharey=True, figsize=(20,6))
cols_plot = ['recovered', 'hospitalised', 'deceased']
for i, col in enumerate(cols_plot):
    ax[i].plot(exp[0]['shift'], exp[0][col], 'o-', label='SEIR test MAPE on s3 (using ground truth train data (s2))', color='green')
    ax[i].plot(exp[1]['shift'], exp[1][col], 'o-', label='SEIR test MAPE on s3 (using IHME forecast as train data)', color='red')
    ax[i].plot(exp[3]['shift'], exp[3][col], 'o-', label='SEIR test MAPE on s3 (using ground truth train data(s1))', color='black')
    ax[i].title.set_text(col)
    ax[i].set_xlabel('Index of training start date (from 4-24-20)', fontsize=10)
    ax[i].set_ylabel('MAPE', fontsize=10)
    ax[i].grid()
plt.legend()

In [None]:
fig, ax = plt.subplots(1,3, sharex=True, sharey=True, figsize=(20,6))
cols_plot = ['recovered', 'hospitalised', 'deceased']
for i, col in enumerate(cols_plot):
    ax[i].plot(exp[0]['shift'], exp[0][col], 'o-', label='SEIR test MAPE on s3 (using ground truth train data (s2))', color='green')
    ax[i].plot(exp[1]['shift'], exp[1][col], 'o-', label='SEIR test MAPE on s3 (using IHME forecast as train data)', color='red')
    ax[i].title.set_text(col)
    ax[i].set_xlabel('Index of training start date (from 4-24-20)', fontsize=10)
    ax[i].set_ylabel('MAPE', fontsize=10)
    ax[i].grid()
plt.legend()

## IHME s2 performance, SEIR s3 and baseline performance

In [None]:
res = pd.read_csv('../../outputs/consolidated/Pune_SEIR_s3_performance_July_6.csv') # c2_train_period = 7
seir_val_res = res[res["type"] == "val"].reset_index(drop=True)
seir_val_res

In [None]:
exp = dict()
for i in range(4):
    exp[i] = seir_val_res[seir_val_res["exp"] == i+1].reset_index(drop=True)

In [None]:
fig, ax = plt.subplots(1,3, sharex=True, sharey=True, figsize=(20,6))
cols_plot = ['recovered', 'hospitalised', 'deceased']
for i, col in enumerate(cols_plot):
    ax[i].plot(exp[0]['shift'], exp[0][col], 'o-', label='SEIR test MAPE on s3 (using ground truth train data(s1+s2))', color='green')
    ax[i].plot(exp[1]['shift'], exp[1][col], 'o-', label='SEIR test MAPE on s3 (using IHME forecast as train data)', color='red')
    ax[i].plot(exp[2]['shift'], exp[2][col], 'o-', label='SEIR test MAPE on s3 (using SEIR forecast as train data)', color='blue')
    ax[i].plot(exp[3]['shift'], exp[3][col], 'o-', label='SEIR test MAPE on s3 (using ground truth train data(s1))', color='black')
    ax[i].title.set_text(col)
    ax[i].set_xlabel('Index of training start date (from 4-24-20)', fontsize=10)
    ax[i].set_ylabel('MAPE', fontsize=10)
    ax[i].grid()
plt.legend()

In [None]:
fig, ax = plt.subplots(1,3, sharex=True, sharey=True, figsize=(20,6))
cols_plot = ['recovered', 'hospitalised', 'deceased']
for i, col in enumerate(cols_plot):
    ax[i].plot(exp[0]['shift'], exp[0][col], 'o-', label='SEIR test MAPE on s3 (using ground truth train data (s1+s2))', color='green')
    ax[i].plot(exp[1]['shift'], exp[1][col], 'o-', label='SEIR test MAPE on s3 (using IHME forecast as train data)', color='red')
    ax[i].plot(exp[3]['shift'], exp[3][col], 'o-', label='SEIR test MAPE on s3 (using ground truth train data(s1))', color='black')
    ax[i].title.set_text(col)
    ax[i].set_xlabel('Index of training start date (from 4-24-20)', fontsize=10)
    ax[i].set_ylabel('MAPE', fontsize=10)
    ax[i].grid()
plt.legend()

In [None]:
fig, ax = plt.subplots(1,3, sharex=True, sharey=True, figsize=(20,6))
cols_plot = ['recovered', 'hospitalised', 'deceased']
for i, col in enumerate(cols_plot):
    ax[i].plot(exp[0]['shift'], exp[0][col], 'o-', label='SEIR test MAPE on s3 (using ground truth train data (s1+s2))', color='green')
    ax[i].plot(exp[1]['shift'], exp[1][col], 'o-', label='SEIR test MAPE on s3 (using IHME forecast as train data)', color='red')
    ax[i].plot(exp[3]['shift'], exp[3][col], 'o-', label='SEIR test MAPE on s3 (using ground truth train data(s1))', color='black')
    ax[i].title.set_text(col)
    ax[i].set_xlabel('Index of training start date (from 4-24-20)', fontsize=10)
    ax[i].set_ylabel('MAPE', fontsize=10)
    ax[i].grid()
    ax[i].set_ylim(0,50)
plt.legend()