# Compute metrics for different runs and plot them
##### author: Elizabeth A. Barnes, Randal J. Barnes and Mark DeMaria
##### version: v0.3.0

In [1]:
import datetime
import os
import pickle
import pprint
import time

import experiment_settings
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import shash
from build_data import build_hurricane_data
import build_model
import model_diagnostics
from silence_tensorflow import silence_tensorflow
import prediction
from sklearn.neighbors import KernelDensity
import pandas as pd
from tqdm import tqdm
import imp

import warnings
warnings.filterwarnings("ignore")

silence_tensorflow()
dpiFig = 400

mpl.rcParams["figure.facecolor"] = "white"
mpl.rcParams["figure.dpi"] = 150
np.warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)

In [2]:
__author__  = "Randal J Barnes and Elizabeth A. Barnes"
__version__ = "18 March 2022"

EXP_NAME_LIST = (
                 # "intensity301_EPCP24",
                 "intensity302_EPCP48",    
#                  "intensity303_EPCP72",
#                  "intensity304_EPCP96",    
#                  "intensity305_EPCP120",
    
#                  "intensity201_AL24",
#                  "intensity202_AL48",    
#                  "intensity203_AL72",
#                  "intensity204_AL96",    
#                  "intensity205_AL120",            
    
                 # "intensity412_EPCP48",
                 # "intensity422_EPCP48",    
    
                 "intensity900_EPCP48",  
                 )

OVERWRITE_METRICS = False
DATA_PATH = "data/"
MODEL_PATH = "saved_models/"
METRIC_PATH = "saved_metrics/"

## Define get_metrics()

In [3]:
def get_metrics(x_test, onehot_test):
    tf.random.set_seed(network_seed)
    shash_incs = np.arange(-160,161,1)

    if (settings["uncertainty_type"] in ("mcdrop", "reg")) | ("bnn" in settings["uncertainty_type"]):       
        # loop through runs for bnn calculation    
        runs = 5_000
        bins_plot = np.linspace(np.min(shash_incs), np.max(shash_incs), 1000)
        bnn_cpd = np.zeros((np.shape(x_test)[0],runs))
        bnn_mode = np.zeros((np.shape(x_test)[0],))

        for i in tqdm(range(0,runs)):
            if "bnn" in settings["uncertainty_type"]:
                bnn_cpd[:,i] = np.reshape(model.predict(x_test),np.shape(bnn_cpd)[0])
            elif settings["uncertainty_type"] in ("mcdrop", "reg"):
                bnn_cpd[:,i] = np.reshape(model(x_test,training=True),np.shape(bnn_cpd)[0])                
            else:
                raise NotImplementedError
                
        bnn_mean = np.mean(bnn_cpd,axis=1)
        bnn_median = np.median(bnn_cpd,axis=1)

        for j in tqdm(range(0,np.shape(bnn_mode)[0])):
            kde = KernelDensity(kernel="gaussian", bandwidth=4.).fit(bnn_cpd[j,:].reshape(-1,1))
            log_dens = kde.score_samples(bins_plot.reshape(-1,1))
            i = np.argmax(log_dens)
            bnn_mode[j] = bins_plot[i]

        mean_error, median_error, mode_error = model_diagnostics.compute_errors(onehot_test, bnn_mean, bnn_median, bnn_mode)         
        bins, hist_bnn, pit_D, EDp_bnn = model_diagnostics.compute_pit('bnn',onehot_test, bnn_cpd)
        iqr_capture = model_diagnostics.compute_interquartile_capture('bnn',onehot_test, bnn_cpd)
        iqr_error_spearman, iqr_error_pearson = model_diagnostics.compute_iqr_error_corr('bnn',
                                                                                          onehot_data=onehot_test, 
                                                                                          bnn_cpd=bnn_cpd, 
                                                                                          pred_median=bnn_median,
                                                                                         )
        
    elif settings["uncertainty_type"] in ("shash","shash2", "shash3", "shash4"):         
        shash_cpd = np.zeros((np.shape(x_test)[0],len(shash_incs)))
        shash_mean = np.zeros((np.shape(x_test)[0],))
        shash_med = np.zeros((np.shape(x_test)[0],))
        shash_mode = np.zeros((np.shape(x_test)[0],))

        # loop through samples for shash calculation and get PDF for each sample
        for j in tqdm(range(0,np.shape(shash_cpd)[0])):
            mu_pred, sigma_pred, gamma_pred, tau_pred = prediction.params( x_test[np.newaxis,j], model )
            shash_cpd[j,:] = shash.prob(shash_incs, mu_pred, sigma_pred, gamma_pred, tau_pred)    
            shash_mean[j]  = shash.mean(mu_pred,sigma_pred,gamma_pred,tau_pred)
            shash_med[j]   = shash.median(mu_pred,sigma_pred,gamma_pred,tau_pred)

            i = np.argmax(shash_cpd[j,:])
            shash_mode[j]  = shash_incs[i]

        mean_error, median_error, mode_error = model_diagnostics.compute_errors(onehot_test, shash_mean, shash_med, shash_mode)    
        bins, hist_shash, pit_D, EDp_shash = model_diagnostics.compute_pit('shash',onehot_test, x_data=x_test,model_shash=model)
        iqr_capture = model_diagnostics.compute_interquartile_capture('shash',onehot_test, x_data=x_test,model_shash=model)
        iqr_error_spearman, iqr_error_pearson = model_diagnostics.compute_iqr_error_corr('shash',
                                                                                                onehot_data=onehot_test,
                                                                                                pred_median=shash_med,
                                                                                                x_data=x_test,
                                                                                                model_shash=model,
                                                                                               )
    else:
        raise NotImplementedError
        
    # by definition Consensus is a correction of zero
    cons_error = np.mean(np.abs(0.0 - onehot_test[:,0]))
    
    # write metrics dictionary and return
    metrics = {
        'pit_D': pit_D,
        'iqr_capture': iqr_capture,
        
        'iqr_error_spearman': iqr_error_spearman[0],
        'iqr_error_pearson': iqr_error_pearson[0],
        'iqr_error_spearman_p': iqr_error_spearman[1],
        'iqr_error_pearson_p': iqr_error_pearson[1],

        'cons_error': cons_error,
        'mean_error':mean_error, 
        'median_error': median_error,
        'mode_error': mode_error,        
        
        'mean_error_reduction': cons_error - mean_error,
        'median_error_reduction': cons_error - median_error,
        'mode_error_reduction': cons_error - mode_error,
    }
        
    return metrics


## Compute Metrics

In [None]:
imp.reload(model_diagnostics)

for exp_name in EXP_NAME_LIST:
    settings = experiment_settings.get_settings(exp_name)

    # set testing data
    if settings["test_condition"] == "leave-one-out":
        TESTING_YEARS_LIST = np.arange(2013,2022)
    elif settings["test_condition"] == "years":
        TESTING_YEARS_LIST = (np.copy(settings["years_test"]))
    else:
        raise NotImplementError('no such testing condition')
        
    for testing_years in TESTING_YEARS_LIST:        
        # set testing year
        settings["years_test"] = (testing_years,)
        
        
        for rng_seed in settings['rng_seed_list']:
            settings['rng_seed'] = rng_seed
            NETWORK_SEED_LIST = [settings["rng_seed"]]
            network_seed = NETWORK_SEED_LIST[0]
            tf.random.set_seed(network_seed)  # This sets the global random seed.    

            #----------------------------------------------------------------------------------------------------
            # get the data
            (
                data_summary,        
                x_train,
                onehot_train,
                x_val,
                onehot_val,
                x_test,
                onehot_test,        
                x_valtest,
                onehot_valtest,
                df_train,
                df_val,
                df_test,
                df_valtest,
            ) = build_hurricane_data(DATA_PATH, settings, verbose=0)

            #----------------------------------------------------------------------------------------------------
            # get the model
            # Make, compile, and train the model
            tf.keras.backend.clear_session()            
            model = build_model.make_model(
                settings,
                x_train,
                onehot_train,
                model_compile=False,
            )   
            model_name = (
                exp_name + "_" + 
                str(testing_years) + '_' +
                settings["uncertainty_type"] + '_' + 
                f"network_seed_{network_seed}_rng_seed_{settings['rng_seed']}"
            )

            try:
                model.load_weights(MODEL_PATH + model_name + "_weights.h5")
            except:
                print(model_name + ': model does not exist. skipping...')
                continue

            #----------------------------------------------------------------------------------------------------
            # check if the metric filename exists already
            metric_filename = METRIC_PATH + model_name + '_metrics.pickle'              
            if (os.path.exists(metric_filename) and OVERWRITE_METRICS==False):
                # print(metric_filename + ' exists. Skipping...')
                continue

            # get metrics and put into a dictionary
            pprint.pprint(model_name)

            # compute the metrics
            metrics_test = get_metrics(x_test, onehot_test)
            metrics_val = get_metrics(x_val, onehot_val)
            metrics_train = get_metrics(x_train, onehot_train)
            metrics_valtest = get_metrics(x_valtest, onehot_valtest)

            # create the metrics dataframe
            d = {}
            d['uncertainty_type'] = settings["uncertainty_type"]
            d['network_seed'] = network_seed
            d['rng_seed'] = settings['rng_seed']
            d['exp_name'] = exp_name
            d['basin_lead'] = exp_name[exp_name.rfind('_')+1:]
            d['testing_years'] = settings["years_test"]
            

            for k in metrics_test.keys():
                k_key = k + '_test'            
                d[k_key] = metrics_test[k]
            for k in metrics_val.keys():
                k_key = k + '_val'
                d[k_key] = metrics_val[k]
            for k in metrics_train.keys():
                k_key = k + '_train'
                d[k_key] = metrics_train[k]            
            for k in metrics_valtest.keys():
                k_key = k + '_valtest'
                d[k_key] = metrics_valtest[k]

            # save the dataframe    
            # pprint.pprint(d, width=80)  
            df = pd.DataFrame(data=d, index=[0])
            df.to_pickle(metric_filename)

'intensity900_EPCP48_2020_bnnshash2_network_seed_222_rng_seed_222'


100%|██████████████████████████████████████████████████████████████████████████| 5000/5000 [03:54<00:00, 21.29it/s]
100%|████████████████████████████████████████████████████████████████████████████| 124/124 [00:14<00:00,  8.79it/s]
100%|██████████████████████████████████████████████████████████████████████████| 5000/5000 [05:05<00:00, 16.37it/s]
100%|████████████████████████████████████████████████████████████████████████████| 200/200 [00:22<00:00,  8.87it/s]
  2%|█▏                                                                          | 82/5000 [00:09<08:55,  9.18it/s]

In [None]:
2+2