In [None]:
import pandas as pd
import numpy as np

# Get data-truth from Covid-19 forecast hub

## Get incident case data

In [None]:
url = 'https://media.githubusercontent.com/media/reichlab/covid19-forecast-hub/master/data-truth/truth-Incident%20Cases.csv'
raw_cases = pd.read_csv(url)
#raw_cases

## Get incident hospitalization data

In [None]:
url = 'https://media.githubusercontent.com/media/reichlab/covid19-forecast-hub/master/data-truth/truth-Incident%20Hospitalizations.csv'
raw_hosp = pd.read_csv(url)
#raw_hosp

## Get cumulative death data

In [None]:
url = 'https://media.githubusercontent.com/media/reichlab/covid19-forecast-hub/master/data-truth/truth-Cumulative%20Deaths.csv'
raw_deaths = pd.read_csv(url)
raw_deaths

In [None]:
raw_cases['date'] = pd.to_datetime(raw_cases.date, infer_datetime_format = True)
raw_cases.sort_values(by = 'date', ascending = True, inplace = True)
display(raw_cases.head())

raw_hosp['date'] = pd.to_datetime(raw_hosp.date, infer_datetime_format = True)
raw_hosp.sort_values(by = 'date', ascending = True, inplace = True)
display(raw_hosp.head())

raw_deaths['date'] = pd.to_datetime(raw_deaths.date, infer_datetime_format = True)
raw_deaths.sort_values(by = 'date', ascending = True, inplace = True)
display(raw_deaths.head())

In [None]:
raw_hosp.date[1]

# Process US hospitalizations, deaths and cases

## Incident to census function

In [None]:
def incident_to_census(data, duration):
    '''This function converts incident data to census data.
    
    :param data: data must be a DataFrame with a census and value column, where value column corresponds to incident data
    :param duration: this is the length of time before leaving state variable category
    :returns: DataFrame with census column filled out
    '''
    for i in range(0, len(data.index) - duration):
        for j in range(0, duration + 1):
            data.census[i + j] += data.value[i]
            
        data.census[i + duration] -= data.value[i]

    for i in range(len(data.index) - duration, len(data.index)):
        rows_left = len(data.index) - i
        for j in range(0, rows_left):
            data.census[i + j] += data.value[i]
        
    return data

## Convert US case incident to census data

In [None]:
us_cases1 = raw_cases[raw_cases["location"] == "US"]
us_cases1
us_cases = us_cases1
infectious_period = 10
us_cases["infectious_period"] = infectious_period
us_cases["census"] = 0
us_cases = us_cases.reset_index()
us_cases

cases_census = incident_to_census(us_cases, infectious_period)

In [None]:
from IPython.display import HTML 
HTML(cases_census.to_html())

## Convert US hospital incident to census data 

In [None]:
us_hosp1 = raw_hosp[raw_hosp["location"] == "US"]
us_hosp1
us_hosp = us_hosp1
length_of_stay = 6
us_hosp["length_of_stay"] = length_of_stay
us_hosp["census"] = 0
us_hosp = us_hosp.reset_index()
us_hosp
mo_hosp = us_hosp.copy()
mo_hosp["value"] = 1

hosp_census = incident_to_census(us_hosp, length_of_stay)

In [None]:
from IPython.display import HTML 
HTML(hosp_census.to_html())

## Extract US cumulative deaths

In [None]:
us_deaths = raw_deaths[raw_deaths["location"] == "US"]
us_deaths

## Export hospitalization, cases, and deaths individually

In [None]:
us_hosp.to_csv('us_hospitalization_census_and_incident_data.csv', columns=["date", "location", "length_of_stay", "value", "census"], 
              header=["date", "location", "length_of_stay", "admissions", "hospital_census"])

In [None]:
us_cases.to_csv('us_case_census_and_incident_data.csv', columns=["date", "location", "infectious_period", "value", "census"], 
              header=["date", "location", "infectious_period", "new_cases", "case_census"])

In [None]:
us_deaths.to_csv('us_cumulative_death_data.csv', columns=["date", "location", "value"], 
              header=["date", "location", "cumulative_deaths"])

# Combine US cases, hospitalizations, and deaths into one dataframe and export

In [None]:
import os
import pandas as pd
DATA_PATH = "."

cases_data_fname = "us_case_census_and_incident_data.csv"
cases_data_fname = os.path.join(DATA_PATH, cases_data_fname)
cases_df = pd.read_csv(cases_data_fname, index_col=0).set_index("date")
display(cases_df.head())

hospital_data_fname = "us_hospitalization_census_and_incident_data.csv"
hospital_data_fname = os.path.join(DATA_PATH, hospital_data_fname)
hospital_df = pd.read_csv(hospital_data_fname, index_col=0).set_index("date")
display(hospital_df.head())

death_data_fname = "us_cumulative_death_data.csv"
death_data_fname = os.path.join(DATA_PATH, death_data_fname)
death_df = pd.read_csv(death_data_fname, index_col=0).set_index("date")
display(death_df.head())

keep_cols = ["case_census", "cumulative_deaths", "hospital_census"]

obs_df = pd.merge(cases_df, death_df, how="inner", left_index=True, right_index=True)
obs_df = pd.merge(obs_df, hospital_df, how="left", left_index=True, right_index=True)
obs_df[keep_cols].to_csv("US_case_hospital_death.csv")

obs_df[keep_cols]
