In [None]:
import numpy as np
import pandas as pd
from keras.models import Model, load_model

# Goal
Predict the daily number of cases for the passed countries, regions and a given intervention plan.


## Intervention plan
An intervention plan consists in a list of containment and closure policies, as well as health system policies. See https://github.com/OxCGRT/covid-policy-tracker/blob/master/documentation/codebook.md for more details. The following policies are considered:

In [None]:
NPI_COLUMNS = ['C1_School closing',
               'C2_Workplace closing',
               'C3_Cancel public events',
               'C4_Restrictions on gatherings',
               'C5_Close public transport',
               'C6_Stay at home requirements',
               'C7_Restrictions on internal movement',
               'C8_International travel controls',
               'H1_Public information campaigns',
               'H2_Testing policy',
               'H3_Contact tracing']

## Example
For the passed input, provide a model that can produce the expected output

### Passed input
One row represents a country, region and day. For each row, the interventions that were in place on this day for this region and country are provided. Interventions can change from one day to the other for each country and region.

In [None]:
EXAMPLE_INPUT_FILE = "20200801_20200804_npis.csv"

In [None]:
prediction_input_df = pd.read_csv(EXAMPLE_INPUT_FILE,
                                  parse_dates=['Date'],
                                  encoding="ISO-8859-1")

In [None]:
prediction_input_df.head()

### Expected output
For each row that was provided in input, i.e. for each country, region and day, the output should contain an additional `PredictedDailyNewCases` column with the predicted number of cases for that day, region and country. It is possible to leave `PredictedDailyNewCases` empty or NaN, or to remove the row, in case no predition is available.

In [None]:
EXAMPLE_OUTPUT_FILE = "20200801_20200804_predictions.csv"

In [None]:
prediction_output_df = pd.read_csv(EXAMPLE_OUTPUT_FILE,
                                   parse_dates=['Date'],
                                   encoding="ISO-8859-1")

In [None]:
prediction_output_df.head()

# Training a model

In [None]:
# Main source for the training data
DATA_URL = "tests/fixtures/OxCGRT_latest.csv"

In [None]:
# Consider the data after this date is not known yet
CUTOFF_DATE = np.datetime64("2020-07-31")

In [None]:
# Reload the module to get the latest changes
import xprize_predictor
from importlib import reload
reload(xprize_predictor)
from xprize_predictor import XPrizePredictor

In [None]:
predictor = XPrizePredictor(None, DATA_URL, CUTOFF_DATE, NPI_COLUMNS)

In [None]:
predictor_model = predictor.train()

In [None]:
predictor_model.save("trained_model.h5")

# Predicting using a model

## Load candidate model

In [None]:
model_file = "trained_model.h5"

In [None]:
predictor = XPrizePredictor(model_file, DATA_URL, CUTOFF_DATE, NPI_COLUMNS)

## Make prediction

In [None]:
start_date = prediction_input_df.Date.min()
end_date = prediction_input_df.Date.max()

In [None]:
preds_df = predictor.predict(start_date, end_date, prediction_input_df)

In [None]:
preds_df.head()

In [None]:
# preds_df.to_csv(EXAMPLE_OUTPUT_FILE, index=False)