# Compute metrics for different runs and plot them
##### author: Elizabeth A. Barnes, Randal J. Barnes and Mark DeMaria
##### version: v0.2.0

In [1]:
import datetime
import os
import pickle
import pprint
import time

import experiment_settings
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import shash_tfp
from build_data import build_hurricane_data
import build_model
import model_diagnostics
from silence_tensorflow import silence_tensorflow
import prediction
from sklearn.neighbors import KernelDensity
import pandas as pd
from tqdm import tqdm
import imp

import warnings
warnings.filterwarnings("ignore")

silence_tensorflow()
dpiFig = 400

mpl.rcParams["figure.facecolor"] = "white"
mpl.rcParams["figure.dpi"] = 150
np.warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)

In [2]:
__author__  = "Randal J Barnes and Elizabeth A. Barnes"
__version__ = "03 June 2022"

EXP_NAME_LIST = (
                 "intensity201_AL24",
                 "intensity202_AL48",    
                 "intensity203_AL72",
                 "intensity204_AL96",    
                 "intensity205_AL120",    
    
                 "intensity301_EPCP24",
                 "intensity302_EPCP48",    
                 "intensity303_EPCP72",
                 "intensity304_EPCP96",    
                 "intensity305_EPCP120",        
                 )


OVERWRITE_METRICS = False
DATA_PATH = "data/"
MODEL_PATH = "saved_models/"
METRIC_PATH = "saved_metrics/"
PREDICTION_PATH = "saved_predictions/"

In [3]:
RI_THRESH_DICT = {24: 30,
                  48: 55,
                  72: 65,
                  96: None,
                  120: None,
                 }

## Compute Predictions

In [4]:
imp.reload(model_diagnostics)

for exp_name in EXP_NAME_LIST:
    settings = experiment_settings.get_settings(exp_name)
    print(exp_name)

    # set testing data
    if settings["test_condition"] == "leave-one-out":
        TESTING_YEARS_LIST = np.arange(2013,2022)
    elif settings["test_condition"] == "years":
        TESTING_YEARS_LIST = (np.copy(settings["years_test"]))
    else:
        raise NotImplementError('no such testing condition')
        
    for testing_years in TESTING_YEARS_LIST:        
        # set testing year
        settings["years_test"] = (testing_years,)
        
        
        for rng_seed in settings['rng_seed_list']:
            settings['rng_seed'] = rng_seed
            NETWORK_SEED_LIST = [settings["rng_seed"]]
            network_seed = NETWORK_SEED_LIST[0]
            tf.random.set_seed(network_seed)  # This sets the global random seed.    

            model_name = (
                exp_name + "_" + 
                str(testing_years) + '_' +
                settings["uncertainty_type"] + '_' + 
                f"network_seed_{network_seed}_rng_seed_{settings['rng_seed']}"
            )
            #----------------------------------------------------------------------------------------------------
            # check if the metric filename exists already
            metric_filename = PREDICTION_PATH + model_name + '_testingPredictions.csv'              
            if (os.path.exists(metric_filename) and OVERWRITE_METRICS==False):
                # print(metric_filename + ' exists. Skipping...')
                continue
            
            #----------------------------------------------------------------------------------------------------
            # get the data
            (
                data_summary,        
                x_train,
                onehot_train,
                x_val,
                onehot_val,
                x_test,
                onehot_test,        
                x_valtest,
                onehot_valtest,
                df_train,
                df_val,
                df_test,
                df_valtest,
            ) = build_hurricane_data(DATA_PATH, settings, verbose=0)

            #----------------------------------------------------------------------------------------------------
            # get the model
            # Make, compile, and train the model
            tf.keras.backend.clear_session()            
            model = build_model.make_model(
                settings,
                x_train,
                onehot_train,
                model_compile=False,
            )   

            #----------------------------------------------------------------------------------------------------
            # load the model            
            try:
                model.load_weights(MODEL_PATH + model_name + "_weights.h5")
            except:
                print(model_name + ': model does not exist. skipping...')
                continue


            # compute the climatological errors
            obs_dev_cons_hist, OBS_DEV_BINS = model_diagnostics.compute_clim_errors(
                onehot=np.append(onehot_train[:,0],onehot_val[:,0]), 
                smooth = True,
            )                
            
            # get metrics and put into a dictionary
            pprint.pprint(model_name)
            
            SHASH_INCS = np.arange(-160,161,1)
            shash_cpd = np.zeros((np.shape(x_test)[0],len(SHASH_INCS)))
            shash_mean = np.zeros((np.shape(x_test)[0],))
            shash_med = np.zeros((np.shape(x_test)[0],))
            shash_mode = np.zeros((np.shape(x_test)[0],))
            shash_25p = np.zeros((np.shape(x_test)[0],))
            shash_75p = np.zeros((np.shape(x_test)[0],))
            shash_90p = np.zeros((np.shape(x_test)[0],))
            shash_pr_ri = np.zeros((np.shape(x_test)[0],))
            clim_pr_ri = np.zeros((np.shape(x_test)[0],))


            # loop through samples for shash calculation and get PDF for each sample
            for j in tqdm(range(0,np.shape(shash_cpd)[0])):
                mu_pred, sigma_pred, gamma_pred, tau_pred = prediction.params( x_test[np.newaxis,j], model )
                dist = shash_tfp.Shash(mu_pred, sigma_pred, gamma_pred, tau_pred)
                
                shash_cpd[j,:] = dist.prob(SHASH_INCS)
                shash_mean[j]  = dist.mean()
                shash_med[j]   = dist.median()

                shash_25p[j] = dist.quantile(.25)
                shash_75p[j] = dist.quantile(.75)
                shash_90p[j] = dist.quantile(.9) 

                i = np.argmax(shash_cpd[j,:])
                shash_mode[j]  = SHASH_INCS[i]
                
                try:
                    cons_intensity = df_test["VMXC"][j]
                    ri_threshold = df_test["VMAX0"][j] + RI_THRESH_DICT[settings["leadtime"]]
                    shash_pr_ri[j] = model_diagnostics.compute_pr_ri(SHASH_INCS+cons_intensity,shash_cpd[j,:], ri_threshold)
                    clim_pr_ri[j] = model_diagnostics.compute_pr_ri(OBS_DEV_BINS+cons_intensity,obs_dev_cons_hist, ri_threshold)
                except:
                    shash_pr_ri[j] = np.nan
                    clim_pr_ri[j] = np.nan
            

            # add predictions to the data_frame
            df_predictions = df_test.copy()                      
            df_predictions["shash_median"] = shash_med
            df_predictions["shash_mean"] = shash_mean
            df_predictions["shash_mode"] = shash_mode
            df_predictions["shash_25p"] = shash_25p
            df_predictions["shash_75p"] = shash_75p
            df_predictions["shash_90p"] = shash_90p
            df_predictions["shash_pr_ri"] = shash_pr_ri
            df_predictions["clim_pr_ri"] = clim_pr_ri

            df_predictions["shash_error"] = shash_med - onehot_test[:,0]
            df_predictions["cons_error"] = 0.0 - onehot_test[:,0]
            df_predictions["shash_improvement"] = df_predictions["cons_error"].abs() - df_predictions["shash_error"].abs()
                
            # save the dataframe    
            df_predictions.to_csv(metric_filename)

intensity201_AL24
intensity202_AL48
intensity203_AL72
intensity204_AL96
intensity205_AL120
intensity301_EPCP24
intensity302_EPCP48
intensity303_EPCP72
intensity304_EPCP96
intensity305_EPCP120


In [5]:
error('here')

NameError: name 'error' is not defined

## Create one prediction file

In [6]:
df_bestval = pd.read_pickle(PREDICTION_PATH + "best_shash3_validation_seeds.pickle")

df_bestpred = pd.DataFrame()
for exp_name in EXP_NAME_LIST:
    settings = experiment_settings.get_settings(exp_name)

    # set testing data
    if settings["test_condition"] == "leave-one-out":
        TESTING_YEARS_LIST = np.arange(2013,2022)
    elif settings["test_condition"] == "years":
        TESTING_YEARS_LIST = (np.copy(settings["years_test"]))
    else:
        raise NotImplementError('no such testing condition')
        
    for testing_years in TESTING_YEARS_LIST:        
        # set testing year
        settings["years_test"] = (testing_years,)
        
        BEST_SEED = None
        try:
            BEST_SEED = df_bestval[(df_bestval["exp_name"]==exp_name) & (df_bestval["testing_years"] == testing_years)]["rng_seed"][0]
        except:
            print(BEST_SEED)
            continue
            
        for rng_seed in settings['rng_seed_list']:
            
            if rng_seed !=BEST_SEED:
                continue
            
            settings['rng_seed'] = rng_seed
            NETWORK_SEED_LIST = [settings["rng_seed"]]
            network_seed = NETWORK_SEED_LIST[0]
            tf.random.set_seed(network_seed)  # This sets the global random seed.    
            
            model_name = (
                exp_name + "_" + 
                str(testing_years) + '_' +
                settings["uncertainty_type"] + '_' + 
                f"network_seed_{network_seed}_rng_seed_{settings['rng_seed']}"
            )
            
            #----------------------------------------------------------------------------------------------------
            # check if the metric filename exists already
            metric_filename = PREDICTION_PATH + model_name + '_testingPredictions.csv'              
            if (os.path.exists(metric_filename) is False):
                continue
            pred_data = pd.read_csv(metric_filename)
            
            df_bestpred = df_bestpred.append(pred_data)
            
            
df_bestpred.to_csv(PREDICTION_PATH + "shash3_bestValTestingPredictions.csv")
print('number of rows = ' + str(len(df_bestpred)))
df_bestpred.head()


number of rows = 16596


Unnamed: 0.1,Unnamed: 0,ATCF,Name,year,time,ftime(hr),VMAX0,NCI,OBDV,DSDV,...,shash_mean,shash_mode,shash_25p,shash_75p,shash_90p,shash_pr_ri,clim_pr_ri,shash_error,cons_error,shash_improvement
0,0,AL07,GABRIELL,2013,91112,24,40.0,4,-1.2,2.8,...,0.655948,-1.0,-2.615146,3.551698,6.857447,1.713578e-09,0.018106,1.407364,1.2,-0.207364
1,1,AL11,JERRY,2013,92906,24,30.0,4,1.5,1.5,...,0.862209,-1.0,-2.727088,4.03882,7.666942,1.911398e-05,0.045446,-1.131004,-1.5,0.368996
2,2,AL07,GABRIELL,2013,91200,24,30.0,4,6.0,1.0,...,1.522003,0.0,-1.464829,4.104378,7.182334,1.081089e-08,0.025745,-4.961375,-6.0,1.038625
3,3,AL03,CHANTAL,2013,70806,24,35.0,4,8.0,2.0,...,4.790596,1.0,-1.928901,10.71841,17.52899,0.04353928,0.061604,-4.155567,-8.0,3.844433
4,4,AL09,HUMBERTO,2013,91018,24,60.0,4,5.5,1.5,...,4.942814,1.0,-5.711915,14.665569,25.153158,0.2530958,0.13687,-1.671015,-5.5,3.828985
