## Brier score
Testing out a new metric of forecast skill with a binary outocme

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from pathlib import Path
import glob
import seaborn as sns
import math
# import geopandas

In [2]:
google_root = "Q:"
data_path = r"\Shared drives\Pandemic Data"
model_name = "slf_model"
run_name = "slf_grid_broad"
commodity = "6802-6802"
years_before_firstRecord = 4
native_countries_list = ["China","Viet Nam"]

data_dir = f"{google_root}{data_path}\{model_name}"

os.chdir(data_dir)

In [3]:
stats_dir = f"{data_dir}/outputs/summary_stats/{run_name}"
# input_dir = "inputs"
input_dir = f"{data_dir}/inputs/noTWN"
out_dir = f"{data_dir}/outputs/slf_origin/{run_name}"

In [43]:
validation_df = pd.read_csv(
    input_dir + "/first_records_validation.csv",
    header=0,
    index_col=0,
)

In [173]:
param_samp = glob.glob(f"{out_dir}/*{commodity}*")
run_outputs = glob.glob(f"{sample}/run*/pandemic_output_aggregated.csv")

def compute_brier_score(run_outputs, validation_df, native_countries_list, years_before_firstRecord):

    model_output = pd.read_csv(run_outputs[0])
    
    presence_cols = [
    c
    for c in model_output.columns
    if c.startswith("Presence") and len(c.split(" ")[-1]) == 4]

    years = [
    c.split(" ")[-1]    
    for c in presence_cols
    ]
    
    # Remove native countries
    model_output.drop(model_output.loc[model_output['NAME'].isin(native_countries_list)].index, inplace=True)
    validation = model_output.merge(validation_df, how="left", on="ISO3")[['ISO3','ObsFirstIntro']]

    for year in years:
        validation[year] = 0
        validation.loc[validation['ObsFirstIntro'] <= int(year),year] = 1

    validation_w_lag = validation.copy() 
    for year in years:
        validation_w_lag.loc[validation_w_lag['ObsFirstIntro'] <= int(year) + years_before_firstRecord,year] = 1

    validation.drop(columns=["ISO3","ObsFirstIntro"], inplace=True)
    validation_w_lag.drop(columns=["ISO3","ObsFirstIntro"], inplace=True)

    total_intros = model_output[presence_cols].values
    
    for run in run_outputs:
        model_output = pd.read_csv(run)
        model_output.drop(model_output.loc[model_output['NAME'].isin(native_countries_list)].index, inplace=True)
        total_intros = np.dstack((total_intros, model_output[presence_cols].values))
    
    mean_intros = np.mean(total_intros, axis=2)

    # For each value that is in the window period, pick the score that does better (presence or absence)
    brier_scores = np.minimum((mean_intros - validation.values)**2, (mean_intros - validation_w_lag.values)**2)

    brier_score = np.mean(brier_scores)

    return brier_score
    

In [179]:
param_samp = glob.glob(f"{out_dir}/*{commodity}*")
run_outputs = glob.glob(f"{param_samp[1]}/run*/pandemic_output_aggregated.csv")

In [180]:
compute_brier_score(run_outputs, validation_df, native_countries_list, years_before_firstRecord)

0.0031904446576663014

# Remaking the summary stats by sample

In [6]:
coi = "USA"

validation_df = pd.read_csv(
        input_dir + "/first_records_validation.csv", header=0, index_col=0,
    )

# Set up probability by year dictionary keys (column names)
sim_years =[2014, 2020]
year_probs_dict_keys = []
for year in sim_years:
    year_probs_dict_keys.append(f"prob_by_{year}_{coi}")

# Set up difference by recorded country dictionary keys (column names)
countries_dict_keys = []
for ISO3 in validation_df.index:
    countries_dict_keys.append(f"diff_obs_pred_metric_{ISO3}")

data = pd.read_csv(
        r"C:\\Users\\asaffer\\OneDrive - North Carolina State University\Documents\\MobaXterm\slf_model\slf_origin\outputs\summary_stats\slf_grid_broad" + "/summary_stats_wPrecisionRecallF1FBetaAggProb.csv", header=0, index_col=0, usecols=list(range(0,21))
    )

In [7]:
def mse(x):
    return sum(x) / len(x)


def avg_std(x):
    """
    Compute average standard deviation when aggregating across runs
    of a parameter sample
    """
    return math.sqrt(sum(x ** 2) / len(x))


def mape(x):
    return (1 / len(x)) * sum(abs(x / 3))


def fbeta(precision, recall, weight):
    if (precision != 0) and (recall != 0):
        return ((1 + (weight ** 2)) * precision * recall) / (
            (weight ** 2) * precision + recall
        )
    else:
        return 0


def f1(precision, recall):
    if (precision != 0) and (recall != 0):
        return (2 * precision * recall) / (precision + recall)
    else:
        return 0


In [9]:
agg_dict = {
        "start": ["max"],
        "alpha": ["max"],
        "lamda": ["max"],
        "run_num": ["max"],
        "total_countries_intros_predicted": ["mean", "std"],
        "diff_total_countries": ["mean", "std"],
        "diff_total_countries_sqrd": [mse],
        "count_known_countries_time_window": ["mean", "std"],
        "diff_obs_pred_metric_mean": ["mean"],
        "diff_obs_pred_metric_stdev": [avg_std],
        "count_known_countries_time_window_recall": ["mean"],
        "count_known_countries_time_window_precision": ["mean"],
        "count_known_countries_time_window_f1": ["mean"],
        "count_known_countries_time_window_fbeta": ["mean"],
    }
prob_agg_dict = dict(
    zip(year_probs_dict_keys, ["mean" for i in range(len(year_probs_dict_keys))])
)
countries_agg_dict = dict(
    zip(
        countries_dict_keys,
        [["mean", "std"] for i in range(len(countries_dict_keys))],
    )
)

agg_dict = {**agg_dict, **prob_agg_dict, **countries_agg_dict}

agg_df = data.groupby("sample").agg(agg_dict)

agg_df.columns = ["_".join(x) for x in agg_df.columns.values]
# agg_df.to_csv(summary_stat_path + "/summary_stats_bySample.csv")

agg_df.to_csv(r"C:\Users\asaffer\OneDrive - North Carolina State University\Documents\MobaXterm\slf_model\slf_origin\outputs\summary_stats\slf_grid_broad" + "/summary_stats_bySample.csv", index=False)