In [57]:
import numpy as np
import pandas as pd
import random
import math

from scipy.stats import norm

# Imported for its helper functions
import imperial_model

In [58]:
predictions1 = pd.read_csv("submissions/arima_model_5-24.csv")
predictions2 = pd.read_csv("submissions/arima_residuals_5-24.csv")
predictions3 = pd.read_csv("submissions/benchmark_5-24.csv")
predictions4 = pd.read_csv("submissions/case_model_5-24.csv")
predictions5 = pd.read_csv("submissions/delphi_5-24.csv")
predictions6 = pd.read_csv("submissions/historian_model_5-24.csv")
predictions7 = pd.read_csv("submissions/imperial_model_5-24.csv")
predictions8 = pd.read_csv("submissions/rnn_residuals_5-24.csv")
predictions9 = pd.read_csv("submissions/svm_5-24.csv")

In [None]:
imperial_fips = set(imperial_model.get_top_cumuluative_death_counties(30, "2020-05-24"))
historian_fips = set(imperial_model.get_top_cumuluative_death_counties(100, "2020-05-24")[30:])
case_model_fips = set(imperial_model.get_top_cumuluative_death_counties(100, "2020-05-24"))

In [None]:
# Generate the quantiles for a given value and standard error
# according to a normal distribution.
def generate_quantiles(value, err):
    if err == 0:
        return [value] * 9
    
    quantiles = []
    for quantile in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
        quantiles.append(norm.ppf(quantile, loc=value, scale=err))

    return quantiles

def extract_date_from_id(row_id):
    split = row_id.split('-')
    return '-'.join(split[:-1])

def extract_fips_from_id(row_id):
    return row_id.split('-')[-1]

sample_submission = pd.read_csv("../sample_submission.csv")

dates_of_interest = ['2020-05-25', '2020-05-26', '2020-05-27', '2020-05-28', '2020-05-29', '2020-05-30', '2020-05-31', '2020-06-01', '2020-06-02', '2020-06-03', '2020-06-04', '2020-06-05', '2020-06-06', '2020-06-07']

lists = []
for row_id in sample_submission["id"].values:
    print(row_id)
    
    fips = int(extract_fips_from_id(row_id))
    date = extract_date_from_id(row_id)
    
    if date not in dates_of_interest:
        lists.append([row_id] + ['{0:.2f}'.format(0)] * 9)
        continue
    
    if fips in [44001, 44003, 44005, 44007, 44009]:
        lists.append([row_id] + [0] * 9)
        continue
    
    # Query to access the value in any dataframe
    query = "id==" + "\"" + date + "-" + str(fips) + "\""
    
    arima            = list(predictions1.query(query).values[0])[5]
    arima_residuals  = list(predictions2.query(query).values[0])[5]
    benchmark        = list(predictions3.query(query).values[0])[5]
    
    case             = list(predictions4.query(query).values[0])[5]
    
    # Impose guards against extreme upper outliers
    if fips not in imperial_fips:
        case = min(20, case)
    else:
        case = min(120, case)
    
    delphi           = list(predictions5.query(query).values[0])[5]
    
    historian        = list(predictions6.query(query).values[0])[5]
    
    # Impose guards against extreme upper outliers
    historian = min(20, historian)
    
    imperial         = list(predictions7.query(query).values[0])[5]
    rnn_residuals    = list(predictions8.query(query).values[0])[5]
    svm              = list(predictions9.query(query).values[0])[5]
    
    # Impose guards against extreme upper outliers
    if fips not in imperial_fips:
        svm = min(20, svm)
    else:
        svm = min(120, svm)
    
    quantiles = []
    
    if fips == 36061:
        median_pred = arima * 0.1 + arima_residuals * 0.1 + case * 0.15 + imperial * 0.6 + svm * 0.05
        
        # Guard against extremely large values
        median_pred = min(median_pred, 120)
        
        quantiles = generate_quantiles(median_pred, median_pred * 0.6)
    elif fips in imperial_fips:
        median_pred = arima * 0.05 + arima_residuals * 0.1 + benchmark * 0.05 + case * 0.15 + delphi * 0.05 + imperial * 0.5 + rnn_residuals * 0.05 + svm * 0.05
        
        # Guard against extremely large values
        median_pred = min(median_pred, 120)
        
        quantiles = generate_quantiles(median_pred, median_pred * 0.6)
    elif fips in historian_fips:
        median_pred = arima * 0.15 + arima_residuals * 0.2 + benchmark * 0.1 + case * 0.2 + delphi * 0.15 + rnn_residuals * 0.1 + svm * 0.1
        
        # Guard against extremely large values
        median_pred = min(20, median_pred)
        
        quantiles = generate_quantiles(median_pred, median_pred * 0.6)
    else:
        recent_deaths = list(imperial_model.imperial_util.get_deaths_list(fips, endDate="2020-05-24"))[-14:]
        zero_count = recent_deaths.count(0)

        if len(recent_deaths) > 0:
            total_deaths = np.sum(recent_deaths) - np.max(recent_deaths)
        else:
            total_deaths = 0

        # Apply manual heuristics for very small counties, or determine predictions as a combination
        # of the remaining predictions files.
        if len(recent_deaths) == 14 and total_deaths < 30:
            if total_deaths > 20 and zero_count < 8:
                quantiles = [0, 0, 1, 1, 2, 2, 3, 3, 4]
            elif total_deaths > 10 and zero_count < 8:
                quantiles = [0, 0, 0, 0, 1, 1, 1, 2, 2]
            elif total_deaths > 5 and zero_count < 10:
                quantiles = [0, 0, 0, 0, 0, 0, 1, 1, 1]
            else:
                quantiles = [0] * 9
        else:
            median_pred = arima * 0.2 + arima_residuals * 0.2 + benchmark * 0.1 + delphi * 0.2 + rnn_residuals * 0.1 + svm * 0.2
            
            # Guard against extremely large values
            median_pred = min(10, median_pred)
            
            quantiles = generate_quantiles(median_pred, median_pred * 0.6)
    
    for i in range(len(quantiles)):
        if str(quantiles[i]) == "nan":
            quantiles[i] = 0

        quantiles[i] = max(quantiles[i], 0)
        
    # Round down for lower quantiles
    for i in range(5):
        quantiles[i] = math.floor(quantiles[i])
        
    for i in range(5, 9):
        quantiles[i] = math.ceil(quantiles[i])
    
    for i in range(0, 9):
        quantiles[i] = '{0:.2f}'.format(quantiles[i])
    
    lists.append([row_id] + quantiles)
        
df = pd.DataFrame(lists, columns=sample_submission.columns)

In [None]:
df.to_csv("final_submission_2.csv", index=False, sep=',')

In [None]:
generate_quantiles(2, 0.6 * 2)
[0.00,0.00,0.00,1.00,2.00,2.00,2.00,3.00,3.00]