In [None]:
from datetime import datetime
import pandas as pd

from covid_xprize.scoring.predictor_scoring import load_dataset
from covid_xprize.validation.scenario_generator import generate_scenario

# Scenario generator

## Latest data

In [None]:
LATEST_DATA_URL = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/OxCGRT_latest.csv'
GEO_FILE = "../../countries_regions.csv"
latest_df = load_dataset(LATEST_DATA_URL, GEO_FILE)

In [None]:
len(latest_df.CountryName.unique())

In [None]:
len(latest_df.RegionName.unique())

# Scenario: historical IP until 2020-09-30
Latest historical data, truncated to the specified end date

In [None]:
start_date_str = None
end_date_str = "2020-09-30"
countries = None
output_file = "data/2020-09-30_historical_ip.csv"

In [None]:
scenario_df = generate_scenario(start_date_str, end_date_str, latest_df, countries, scenario="Historical")

In [None]:
scenario_df[scenario_df.CountryName == "Italy"].Date.max()

In [None]:
truncation_date = pd.to_datetime(end_date_str, format='%Y-%m-%d')
scenario_df = scenario_df[scenario_df.Date <= truncation_date]

In [None]:
scenario_df.tail()

In [None]:
# Write to file
# scenario_df.to_csv(output_file, index=False)

# Scenario: frozen NPIs
Latest historical data + frozen NPIS between last known date and end of Januaray 2021 for India and Mexico

## Generate

In [None]:
start_date_str = "2021-01-01"
end_date_str = "2021-01-31"
countries = ["India", "Mexico"]

In [None]:
scenario_df = generate_scenario(start_date_str, end_date_str, latest_df, countries, scenario="Freeze")

In [None]:
len(scenario_df)

In [None]:
scenario_df.CountryName.unique()

In [None]:
scenario_df.tail()

## Save

In [None]:
# Write to a file
# hist_file_name = "data/future_ip.csv"
# scenario_df.to_csv(hist_file_name, index=False)

# Robojudge test: December
IP file to test robojudge for the month of December

## Generate

In [None]:
today = datetime.utcnow().strftime('%Y%m%d_%H%M%S')
start_date_str = "2020-12-01"
end_date_str = "2020-12-31"
latest_df = load_dataset(LATEST_DATA_URL, GEO_FILE)
countries = None
scenario_df = generate_scenario(start_date_str, end_date_str, latest_df, countries, scenario="Freeze")

In [None]:
# Check: should contain all 366 days of 2020
nb_countries = len(scenario_df.CountryName.unique())
nb_regions = len(scenario_df.RegionName.unique()) - 1  # Ignore the '' region
len(scenario_df) / (nb_countries + nb_regions)

## Save

In [None]:
from datetime import datetime
sd = 20200101  # IP file always contains data since inception
ed = end_date_str.replace('-', "")
december_file_name = f"../../../covid-xprize-robotasks/ips/tests/{today}_{sd}_{ed}_ips.csv"
scenario_df.to_csv(december_file_name, index=False)
print(f"Saved to {december_file_name}")

# Robojudge: Official
IP file robojudge uses for its daily submissions evaluation

## Generate

In [None]:
today = datetime.utcnow().strftime('%Y%m%d_%H%M%S')
start_date_str = "2020-12-22"
end_date_str = "2021-06-19"
latest_df = load_dataset(LATEST_DATA_URL, GEO_FILE)
countries = None
scenario_df = generate_scenario(start_date_str, end_date_str, latest_df, countries, scenario="Freeze")

In [None]:
# Check: should contain 536 days:
# 366 days of 2020 + 170 days of 2021 (10 days in 2020 + 170 days in 2021 = 180 days of eval)
nb_countries = len(scenario_df.CountryName.unique())
nb_regions = len(scenario_df.RegionName.unique()) - 1  # Ignore the 'nan' region
len(scenario_df) / (nb_countries + nb_regions)

In [None]:
len(scenario_df.CountryName.unique())

In [None]:
len(scenario_df.RegionName.unique())

## Save

In [None]:
from datetime import datetime
sd = 20200101  # IP file always contains data since inception
ed = end_date_str.replace('-', "")
december_file_name = f"../../../covid-xprize-robotasks/ips/live/{today}_{sd}_{ed}_ips.csv"
scenario_df.to_csv(december_file_name, index=False)
print(f"Saved to {december_file_name}")