# Compute metrics for different runs and plot them
##### author: Elizabeth A. Barnes, Randal J. Barnes and Mark DeMaria
##### version: v0.1.0
##### date: 14 December 2021

```
conda create --name env-hurr-tfp python=3.9
conda activate env-hurr-tfp
pip install tensorflow==2.7.0
pip install tensorflow-probability==0.15.0
pip install --upgrade numpy scipy pandas statsmodels matplotlib seaborn 
pip install --upgrade palettable progressbar2 tabulate icecream flake8
pip install --upgrade keras-tuner sklearn
pip install --upgrade jupyterlab black isort jupyterlab_code_formatter
pip install silence-tensorflow
pip install tqdm
```

Use the command
```python -m pip freeze > requirements.txt```
to make a pip installation list.

In [1]:
import datetime
import os
import pickle
import pprint
import time

import experiment_settings
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import shash
from build_data import build_hurricane_data
from build_model import build_shash_model, build_bnn_model
from custom_loss import compute_shash_NLL, compute_NLL
from custom_metrics import CustomMAE, InterquartileCapture, SignTest
from model_diagnostics import plot_history
import model_diagnostics
from save_model_run import save_model_run
from sklearn import preprocessing
from tensorflow.keras import optimizers
from training_instrumentation import TrainingInstrumentation
from silence_tensorflow import silence_tensorflow
import tensorflow_probability as tfp
from keras.utils.layer_utils import count_params  
import prediction

from sklearn.neighbors import KernelDensity

from tqdm import tqdm

silence_tensorflow()
dpiFig = 400

In [2]:
__author__ = "Randal J Barnes and Elizabeth A. Barnes"
__version__ = "14 January 2022"

EXP_NAME_LIST = ("intensity0_AL72","intensity1_AL72")

DATA_PATH = "data/"
MODEL_PATH = "saved_models/"

In [3]:
mpl.rcParams["figure.facecolor"] = "white"
mpl.rcParams["figure.dpi"] = 150
np.warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)

## Load the models

In [4]:
for exp_name in EXP_NAME_LIST:
    settings = experiment_settings.get_settings(exp_name)
    # pprint.pprint(settings, width=80)
    
    NETWORK_SEED_LIST = [settings["rng_seed"]]
    network_seed = NETWORK_SEED_LIST[0]
    tf.random.set_seed(network_seed)  # This sets the global random seed.    
    
    (
        x_train,
        onehot_train,
        x_val,
        onehot_val,
        x_eval,
        onehot_eval,    
        data_summary,
        df_val,
        df_eval,
    ) = build_hurricane_data(DATA_PATH, settings, verbose=0)  

    if settings["uncertainty_type"] == "bnn":       
        model_name_bnn = (
            exp_name + "_" + settings["uncertainty_type"] + '_' + f"network_seed_{network_seed}_rng_seed_{settings['rng_seed']}"
        )
        pprint.pprint(model_name_bnn)
        
        model_bnn = build_bnn_model(
            x_train,
            onehot_train,
            hiddens=settings["hiddens"],
            output_shape=onehot_train.shape[1],
            ridge_penalty=settings["ridge_param"],
            act_fun=settings["act_fun"],
        )
        model_bnn.load_weights(MODEL_PATH + model_name_bnn + "_weights.h5")

    elif settings["uncertainty_type"][:5] == "shash": 
        model_name_shash = (
            exp_name + "_" + settings["uncertainty_type"] + '_' + f"network_seed_{network_seed}_rng_seed_{settings['rng_seed']}"
        )
        pprint.pprint(model_name_shash)
        
        model_shash = build_shash_model(
            x_train,
            onehot_train,
            hiddens=settings["hiddens"],
            output_shape=onehot_train.shape[1],
            ridge_penalty=settings["ridge_param"],
            act_fun=settings["act_fun"],
        )
        model_shash.load_weights(MODEL_PATH + model_name_shash + "_weights.h5")
        
    


settings["train_condition"] is undefined
'intensity0_AL72_shash3_network_seed_888_rng_seed_888'
settings["train_condition"] is undefined
'intensity1_AL72_bnn_network_seed_888_rng_seed_888'


  loc = add_variable_fn(
  untransformed_scale = add_variable_fn(


## Make Model Predictions

In [5]:
tf.random.set_seed(network_seed)

shash_incs = np.arange(-160,161,1)
shash_cpd = np.zeros((np.shape(x_eval)[0],len(shash_incs)))
shash_mean = np.zeros((np.shape(x_eval)[0],))
shash_med = np.zeros((np.shape(x_eval)[0],))
shash_mode = np.zeros((np.shape(x_eval)[0],))

# loop through samples for shash calculation and get PDF for each sample
for j in tqdm(range(0,np.shape(shash_cpd)[0])):
    mu_pred, sigma_pred, gamma_pred, tau_pred = prediction.params( x_eval[np.newaxis,j], model_shash )
    shash_cpd[j,:] = shash.prob(shash_incs, mu_pred, sigma_pred, gamma_pred, tau_pred)    
    shash_mean[j]  = shash.mean(mu_pred,sigma_pred,gamma_pred,tau_pred)#np.sum(shash_cpd[j,:]*shash_incs)
    shash_med[j]   = shash.median(mu_pred,sigma_pred,gamma_pred,tau_pred)
    
    i = np.argmax(shash_cpd[j,:])
    shash_mode[j]  = shash_incs[i]
    
mean_error_shash, median_error_shash, mode_error_shash = model_diagnostics.compute_errors(onehot_val, shash_mean, shash_med, shash_mode)    
bins, hist_shash, D_shash, EDp_shash = model_diagnostics.compute_pit('shash',onehot_val, x_val=x_val,model_shash=model_shash)
iqr_capture_shash = model_diagnostics.compute_interquartile_capture('shash',onehot_val, x_val=x_val,model_shash=model_shash)


# loop through runs for bnn calculation    
runs = 5_000
bins_plot = np.linspace(np.min(shash_incs), np.max(shash_incs), 1000)
bnn_cpd = np.zeros((np.shape(x_eval)[0],runs))
bnn_mode = np.zeros((np.shape(x_eval)[0],))

for i in tqdm(range(0,runs)):
    bnn_cpd[:,i] = np.reshape(model_bnn.predict(x_eval),np.shape(bnn_cpd)[0])
bnn_mean = np.mean(bnn_cpd,axis=1)
bnn_median = np.median(bnn_cpd,axis=1)

for j in tqdm(range(0,np.shape(bnn_mode)[0])):
    kde = KernelDensity(kernel="gaussian", bandwidth=4.).fit(bnn_cpd[j,:].reshape(-1,1))
    log_dens = kde.score_samples(bins_plot.reshape(-1,1))
    i = np.argmax(log_dens)
    bnn_mode[j] = bins_plot[i]
    
mean_error_bnn, median_error_bnn, mode_error_bnn = model_diagnostics.compute_errors(onehot_val, bnn_mean, bnn_median, bnn_mode)         
bins, hist_bnn, D_bnn, EDp_bnn = model_diagnostics.compute_pit('bnn',onehot_val, bnn_cpd)
iqr_capture_bnn = model_diagnostics.compute_interquartile_capture('bnn',onehot_val, bnn_cpd)

100%|█████████████████████████████████████████████████████████████████████████| 300/300 [00:12<00:00, 24.54it/s]
100%|███████████████████████████████████████████████████████████████████████| 5000/5000 [03:48<00:00, 21.89it/s]
  0%|                                                                                   | 0/300 [00:00<?, ?it/s]


NameError: name 'KernelDensity' is not defined

In [6]:
from sklearn.neighbors import KernelDensity
for j in tqdm(range(0,np.shape(bnn_mode)[0])):
    kde = KernelDensity(kernel="gaussian", bandwidth=4.).fit(bnn_cpd[j,:].reshape(-1,1))
    log_dens = kde.score_samples(bins_plot.reshape(-1,1))
    i = np.argmax(log_dens)
    bnn_mode[j] = bins_plot[i]
    
mean_error_bnn, median_error_bnn, mode_error_bnn = model_diagnostics.compute_errors(onehot_val, bnn_mean, bnn_median, bnn_mode)         
bins, hist_bnn, D_bnn, EDp_bnn = model_diagnostics.compute_pit('bnn',onehot_val, bnn_cpd)
iqr_capture_bnn = model_diagnostics.compute_interquartile_capture('bnn',onehot_val, bnn_cpd)

100%|█████████████████████████████████████████████████████████████████████████| 300/300 [00:29<00:00, 10.01it/s]


In [7]:
print(mean_error_shash,mean_error_bnn)
print(median_error_shash,median_error_bnn)
print(mode_error_shash,mode_error_bnn)
print(D_shash, D_bnn)
print(iqr_capture_shash,iqr_capture_bnn)

11.172096203486126 11.91726674749986
11.377947850704192 11.91974970754981
12.715666666666666 11.935565231898565
0.02494436533259028 0.03555902760825217
0.43 0.31666666666666665


In [8]:
exp_name

'intensity1_AL72'

In [9]:
settings

{'filename': 'nnfit_vlist_intensity_and_track_extended.dat',
 'uncertainty_type': 'bnn',
 'leadtime': 72,
 'basin': 'AL',
 'target': 'intensity',
 'undersample': False,
 'hiddens': [15, 10],
 'learning_rate': 0.0001,
 'momentum': 0.9,
 'nesterov': True,
 'batch_size': 64,
 'rng_seed': 888,
 'act_fun': 'relu',
 'n_epochs': 25000,
 'patience': 300,
 'ridge_param': 0.0,
 'n_val': 300,
 'n_train': 'max'}

In [10]:
import pandas as pd

d = {'uncertainty_type': settings["uncertainty_type"],
     'exp_name': exp_name,
     'mean_error': mean_error,
     'median_error': median_error,
     'mode_error': mode_error,
     'pit_d': D,
     'iqr_capture': iqr_capture,
}

NameError: name 'mean_error' is not defined

In [None]:
from scipy import stats
from sklearn.neighbors import KernelDensity

bins = np.arange(np.min(shash_incs),np.max(shash_incs)+1,1)
hist = np.histogram(bnn_cpd[40,:],bins)

bins_plot = np.linspace(np.min(shash_incs), np.max(shash_incs), 1000)
kde = KernelDensity(kernel="gaussian", bandwidth=4.).fit(bnn_cpd[40,:].reshape(-1,1))
log_dens = kde.score_samples(bins_plot.reshape(-1,1))
i = np.argmax(log_dens)
print(bins_plot[i])

plt.plot(hist[1][1:],hist[0]/np.sum(hist[0]))
plt.plot(bins_plot,np.exp(log_dens))
plt.axvline(x=bins_plot[i],color='k')
