In [None]:
import os
import sys
import time

import dill as pickle

import functools
import operator
from datetime import datetime, timedelta

import numpy as np
import pandas as pd
pd.options.display.max_rows = 999
pd.options.display.max_columns = 99
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

sys.path.append('..')
from data import get_input_data, plot_crude_rates
from drawer import Drawer
from utilities import CompareModelDeaths, COV_SETTINGS, KS, RATE_THRESHOLD, submit_curvefit

import warnings
warnings.simplefilter('ignore')

RUN_TYPE = 'prod'
ENV = 'prod'
DATESTAMP_LABEL = '2020_04_07_US'
DATA_VERSION = '2020_04_07.1'


In [None]:
CODE_DIR = os.path.abspath('')
OUTPUT_DIR = f'/ihme/covid-19/deaths/{RUN_TYPE}/{DATESTAMP_LABEL}'

if not os.path.exists(OUTPUT_DIR):
    os.mkdir(OUTPUT_DIR)
print(f'Writing to {OUTPUT_DIR}')


In [None]:
# come up with more informative names...
input_full_df = get_input_data('full_data', DATA_VERSION)
input_death_df = get_input_data('deaths', DATA_VERSION)
# # Dropping recent Montana data due to slow growth resulting in implausible backcast
# input_death_df = input_death_df.loc[(input_death_df['Location']!="Montana") | (input_death_df['Date'] < pd.Timestamp("2020-04-01"))]
input_age_pop_df = get_input_data('age_pop', DATA_VERSION)
input_age_death_df = get_input_data('age_death', DATA_VERSION)


## get date df

In [None]:
date_df = pd.read_csv(f'{OUTPUT_DIR}/threshold_dates.csv')
date_draws = [d for d in date_df.columns if d.startswith('death_date_draw_')]
for date_draw in date_draws:
    date_df[date_draw] = pd.to_datetime(date_df[date_draw])


## test draws

In [None]:
# draw_cols = [f'draw_{i}' for i in range(1000)]
# df = pd.read_csv(f'{OUTPUT_DIR}/state_data.csv')
# location_ids = df['location_id'].unique().tolist()

# sub_dfs = []
# for location_id in location_ids:
#     sub_df = df.loc[df['location_id'] == location_id].reset_index(drop=True)
#     sub_df['cumulative_deaths'] = sub_df[draw_cols].mean(axis=1)
#     sub_df['daily_deaths'] = sub_df['cumulative_deaths']
#     sub_df['daily_deaths'][1:] = (sub_df[draw_cols].values[1:] - sub_df[draw_cols].values[:-1]).mean(axis=1)
#     sub_df = sub_df[['location_id', 'location', 'date', 'cumulative_deaths', 'daily_deaths']]
#     sub_df['peak_date'] = False
#     sub_df['peak_date'][sub_df['daily_deaths'].values.argmax()] = True
#     assert sub_df['peak_date'].sum() == 1, 'Multiple/no peak dates'
#     sub_dfs.append(sub_df[['location_id', 'location', 'date', 'peak_date']])

# sub_df = pd.concat(sub_dfs)
# df = sub_df.merge(df)
# df.to_csv(f'{OUTPUT_DIR}/state_data_w_peak.csv', index=False)
# print(f'{OUTPUT_DIR}/state_data_w_peak.csv')

# set up ensemble
model_out_dirs = []
for cov_sort, weights in COV_SETTINGS:
    for k in KS:
        # set up dirs
        model_out_dir = f'{OUTPUT_DIR}/model_data_{cov_sort}_{k}'
        if not os.path.exists(model_out_dir):
            os.mkdir(model_out_dir)
        model_out_dirs.append(model_out_dir)

# location
location_name = 'Wyoming'
location_id = 573

# get draws
data_draws = Drawer(
    ensemble_dirs=model_out_dirs,
    location_name=location_name,
    location_id=location_id,
    obs_df=input_full_df.loc[input_full_df['Province/State'] == location_name],
    date_draws=date_df.loc[date_df['location'] == location_name, date_draws].values,
    population=input_age_pop_df.loc[input_age_pop_df['location_id'] == location_id, 'population'].sum()
)
draw_df, past_df, model_used, days, ensemble_draws = data_draws.get_dated_draws()


In [None]:
draw_df