Generate results in various formats from one model

In [None]:
import pandas as pd
import numpy as np
import requests as rq
import datetime as dt
import torch
import json
import neptune

from src import constants
from src import data
from src import utils
from src import predictions
from matplotlib.ticker import MultipleLocator
from matplotlib.dates import DayLocator, AutoDateLocator, ConciseDateFormatter
%matplotlib inline

DATA_DIR = 'data'
EXPERIMENTS_DIR = 'experiments'
DEVICE = 'cpu'
NEPTUNE_PRJ = 'indiacovidseva/covid-net'

In [None]:
experiment_id = "0001_test"
checkpoint = "latest-e100.pt"

model, cp = utils.load_model(experiment_id, checkpoint)

In [None]:
cols = ['location', 'date', 'total_cases', 'new_cases', 'total_deaths', 'new_deaths', 'population', 'population_density', 'gdp_per_capita', 'hospital_beds_per_thousand', 'median_age']
dates = ['date']
df = pd.read_csv(DATA_DIR + "/" + cp['config']['DS']['SRC'],
                 usecols=cols,
                 parse_dates=dates)
df = data.fix_anomalies_owid(df)
df.sample()

### Predict from OWID data

In [None]:
c = "India"
n_days_prediction = 200

# restrict predictions if outputs != inputs
if cp['config']['IP_FEATURES'] != cp['config']['OP_FEATURES']:
    op_len = cp['config']['DS']['OP_SEQ_LEN']
    print("WARNING: Input features and output features are different. Cannot predict more than", op_len, "days.")
    n_days_prediction = op_len

IP_SEQ_LEN = cp['config']['DS']['IP_SEQ_LEN']
OP_SEQ_LEN = cp['config']['DS']['OP_SEQ_LEN']
pop_fct = df.loc[df.location==c, 'population'].iloc[0] / 1000
ip_aux = torch.tensor(
            np.array(df.loc[df.location==c, cp['config']['DS']['AUX_FEATURES']].iloc[0])[cp['config']['AUX_FEATURES']],
            dtype=torch.float32
        ).to(DEVICE)
test_data = np.array(df.loc[(df.location==c) & (df.total_cases>=100), cp['config']['DS']['FEATURES']].rolling(7, center=True, min_periods=1).mean() / pop_fct, dtype=np.float32)

in_data = test_data[-IP_SEQ_LEN:, cp['config']['IP_FEATURES']]
out_data = np.ndarray(shape=(0, len(cp['config']['OP_FEATURES'])), dtype=np.float32)
for i in range(int(n_days_prediction / OP_SEQ_LEN)):
    ip = torch.tensor(
        in_data,
        dtype=torch.float32
    )
    ip = ip.to(DEVICE)
    pred = model.predict(
        ip.view(1, IP_SEQ_LEN, len(cp['config']['IP_FEATURES'])),
        ip_aux.view(1, len(cp['config']['AUX_FEATURES']))
    ).view(OP_SEQ_LEN, len(cp['config']['OP_FEATURES']))
    in_data = np.append(in_data[-IP_SEQ_LEN+OP_SEQ_LEN:, :], pred.cpu().numpy(), axis=0)
    out_data = np.append(out_data, pred.cpu().numpy(), axis=0)

for o in cp['config']['IP_FEATURES']:
    orig_df = pd.DataFrame({
        'actual': test_data[:,o] * pop_fct
    })
    fut_df = pd.DataFrame({
        'predicted': out_data[:,o] * pop_fct
    })
    # print(fut_df['predicted'].astype('int').to_csv(sep='|', index=False))
    orig_df = orig_df.append(fut_df, ignore_index=True, sort=False)
    orig_df['total'] = (orig_df['actual'].fillna(0) + orig_df['predicted'].fillna(0)).cumsum()

    start_date = df.loc[(df.location==c) & (df.total_cases>=100)]['date'].iloc[0]
    orig_df['Date'] = pd.Series([start_date + dt.timedelta(days=i) for i in range(len(orig_df))])
    ax = orig_df.plot(
        x='Date',
        y=['actual', 'predicted'],
        title=c + ' ' + cp['config']['DS']['FEATURES'][o],
        figsize=(10,6),
        grid=True
    )
    mn_l = DayLocator()
    ax.xaxis.set_minor_locator(mn_l)
    mj_l = AutoDateLocator()
    mj_f = ConciseDateFormatter(mj_l, show_offset=False)
    ax.xaxis.set_major_formatter(mj_f)
    # orig_df['total'] = orig_df['total'].astype('int')
    # orig_df['predicted'] = orig_df['predicted'].fillna(0).astype('int')
    # print(orig_df.tail(n_days_prediction))

    # arrow
    # peakx = 172
    # peak = orig_df.iloc[peakx]
    # peak_desc = peak['Date'].strftime("%d-%b") + "\n" + str(int(peak['predicted']))
    # _ = ax.annotate(
    #     peak_desc, 
    #     xy=(peak['Date'] - dt.timedelta(days=1), peak['predicted']),
    #     xytext=(peak['Date'] - dt.timedelta(days=45), peak['predicted'] * .9),
    #     arrowprops={},
    #     bbox={'facecolor':'white'}
    # )

    # _ = ax.axvline(x=peak['Date'], linewidth=1, color='r')

### Statewise predictions (covid19india)

In [None]:
states_df = data.get_statewise_data()

#### Dummy state data: fruit country

In [None]:
# dummy data for testing
# SET 1 - 10 states
# STT_INFO = {
#     'A': {"name": "Apple", "popn": 10000000},
#     'B': {"name": "Berry", "popn": 10000000},
#     'C': {"name": "Cherry", "popn": 10000000},
#     'D': {"name": "Dates", "popn": 10000000},
#     'E': {"name": "Elderberry", "popn": 10000000},
#     'F': {"name": "Fig", "popn": 10000000},
#     'G': {"name": "Grape", "popn": 10000000},
#     'H': {"name": "Honeysuckle", "popn": 10000000},
#     'I': {"name": "Icaco", "popn": 10000000},
#     'J': {"name": "Jujube", "popn": 10000000},
# }
# total = 100
# SET 2 - 1 agg state
STT_INFO = {
    'Z': {"name": "FruitCountry1000x", "popn": 10000000},
}
total = 1000


r = {
    'state': [],
    'date': [],
    'total': []
}

start_date = dt.datetime(day=1, month=3, year=2020)
end_date = dt.datetime.now()
while start_date <= end_date:
    for s in STT_INFO:
        r['state'].append(s)
        r['date'].append(start_date)
        r['total'].append(total)
    total *= 1.03
    start_date += dt.timedelta(days=1)
states_df = pd.DataFrame(r)
states_df['date'] = pd.to_datetime(states_df['date'])
states_df.tail()

#### Predict

In [None]:
feature = 0 # 0:confirmed, 1:deaths
n_days_prediction = 200 # number of days for prediction
prediction_offset = 1 # how many days of actual data to skip

prediction_date = (states_df.date.max().to_pydatetime() - dt.timedelta(days=prediction_offset)).strftime("%Y-%m-%d")

api = predictions.generate(
    states_df,
    constants.STT_INFO,
    model,
    cp,
    feature,
    n_days_prediction,
    prediction_offset,
    plot=True
)

#### Export JSON for API

In [None]:
predictions.export_tracker(api)

#### Export data for video player

In [None]:
predictions.export_videoplayer(api,  prediction_date, 'vp.json')

#### Upload model to Neptune

In [None]:
neptune_prj = neptune.init(NEPTUNE_PRJ)
neptune_exp = neptune_prj.get_experiments(id=cp['config']['NEPTUNE_ID'])[0]
neptune_exp.log_artifact(EXPERIMENTS_DIR + "/" + experiment_id + "/" + checkpoint)