In [1]:
import pandas as pd
from scipy.stats import norm

import imperial_model
from imperial_model import ImperialModel

In [2]:
imp_model = ImperialModel()
imp_counties = imperial_model.get_top_cumuluative_death_counties(30, "2020-05-24")
fit = imp_model.train_imperial_model(imp_counties, "2020-05-24", 18, "2020-05-16", 200, 4)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_67df06f30f21e8ec3873e0c072506f90 NOW.


Compiling model...
Training model...


To run all diagnostics call pystan.check_hmc_diagnostics(fit)


Training completed.


In [3]:
key = pd.read_csv("../data/us/processing_data/fips_key.csv", encoding='latin-1')
all_fips = key["FIPS"].tolist()

imperial_preds = imp_model.get_imperial_predictions_map(fit)

In [7]:
sample_submission = pd.read_csv("../sample_submission.csv")
dates_of_interest = ['2020-05-25', '2020-05-26', '2020-05-27', '2020-05-28', '2020-05-29', '2020-05-30', '2020-05-31', '2020-06-01', '2020-06-02', '2020-06-03', '2020-06-04', '2020-06-05', '2020-06-06', '2020-06-07']

def get_id_list():
    return sample_submission["id"].values

def extract_date_from_id(row_id):
    split = row_id.split('-')
    return '-'.join(split[:-1])

def extract_fips_from_id(row_id):
    return row_id.split('-')[-1]

def generate_quantiles(value, err):
    if err == 0:
        return [value] * 9
    
    quantiles = []
    for quantile in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
        quantiles.append(norm.ppf(quantile, loc=value, scale=err))

    return quantiles

def create_submission_from_map(preds):
    lists = []
    for row_id in get_id_list():
        date = extract_date_from_id(row_id)
        fips = int(extract_fips_from_id(row_id))
        
        if not fips in preds:
            lst = [row_id] + ["%.2f" % 0.00] * 9
            lists.append(lst)
            continue
            
        if not date in dates_of_interest:
            lst = [row_id] + ["%.2f" % 0.00] * 9
            lists.append(lst)
            continue
            
        date_index = dates_of_interest.index(date)
        quantiles = generate_quantiles(preds[fips][date_index], preds[fips][date_index] * 0.6)
        lst = [row_id]
        for q in quantiles:
            if q < 0:
                lst.append("%.2f" % 0.00)
            else:
                lst.append("%.2f" % q)
        
        lists.append(lst)
    
    df = pd.DataFrame(lists, columns=sample_submission.columns)
    df.to_csv("top_30_imperial_submission.csv", index=False, sep=',')

In [8]:
create_submission_from_map(imperial_preds)

In [9]:
imperial_model.imperial_util.get_deaths_list(42045, endDate="2020-05-24")

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  2.,  1.,  0.,  0.,  2.,
        1.,  1.,  2.,  3.,  1.,  1.,  5.,  3.,  3.,  4.,  9.,  0.,  1.,
        6., 14., 11.,  3.,  4., 13.,  9., 18.,  6.,  0.,  7., 11.,  1.,
        2., 19., 60., 11.,  5., 15.,  0.,  3., 62.,  6., 50.,  6., 12.,
        1.,  4.,  6., 14., 29., 18., 12.,  0.,  0., 13.,  0.,  0., 27.,
        3.,  0.])