In [136]:
import sys
sys.path.append("../")
from src import prepro, metrics, run, setup
import src.models.factory as model_factory
import config
import torch

import numpy as np
from src.utils import models
import os.path as osp
import pandas as pd

from xgboost import XGBClassifier
from xgboost import XGBRegressor
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_absolute_error
from src.utils.data_processing import *

window_size = 8
predict_at = 16
full = True

tgt_intensity_cat_train = torch.LongTensor(np.load('../data/y_train_intensity_cat_1980_34_20_120_w' + str(window_size) + '_at_' + str(predict_at) + '.npy',
                                      allow_pickle=True))
tgt_intensity_cat_test = torch.LongTensor(np.load('../data/y_test_intensity_cat_1980_34_20_120_w' + str(window_size) + '_at_' + str(predict_at) + '.npy',
                                     allow_pickle=True))

tgt_intensity_train = torch.Tensor(np.load('../data/y_train_intensity_1980_34_20_120_w' + str(window_size) + '_at_' + str(predict_at) + '.npy',
                                  allow_pickle=True))
tgt_intensity_test = torch.Tensor(np.load('../data/y_test_intensity_1980_34_20_120_w' + str(window_size) + '_at_' + str(predict_at) + '.npy',
                                 allow_pickle=True))

tgt_intensity_cat_baseline_train = torch.LongTensor(np.load('../data/y_train_intensity_cat_baseline_1980_34_20_120_w' + str(window_size) + '_at_' + str(predict_at) + '.npy',  allow_pickle = True))
tgt_intensity_cat_baseline_test = torch.LongTensor(np.load('../data/y_test_intensity_cat_baseline_1980_34_20_120_w' + str(window_size) + '_at_' + str(predict_at) + '.npy', allow_pickle=True))

def standardize(tgt_displacement_train, tgt_displacement_test):
    mean_dx = tgt_displacement_train[:, 0].mean()
    std_dx = tgt_displacement_train[:, 0].std()
    tgt_displacement_train[:, 0] = (tgt_displacement_train[:, 0] - mean_dx) / std_dx
    tgt_displacement_test[:, 0] = (tgt_displacement_test[:, 0] - mean_dx) / std_dx
    std_dx = float(std_dx)
    mean_dx = float(mean_dx)
    mean_dy = tgt_displacement_train[:, 1].mean()
    std_dy = tgt_displacement_train[:, 1].std()
    tgt_displacement_train[:, 1] = (tgt_displacement_train[:, 1] - mean_dy) / std_dy
    tgt_displacement_test[:, 1] = (tgt_displacement_test[:, 1] - mean_dy) / std_dy
    std_dy = float(std_dy)
    mean_dy = float(mean_dy)
    return tgt_displacement_train, tgt_displacement_test, std_dx, mean_dx, std_dy, mean_dy

def unstandardize(tgt_displacement_train, tgt_displacement_test, std_dx, mean_dx, std_dy, mean_dy):
    tgt_displacement_train[:, 0] = tgt_displacement_train[:, 0] *  std_dx + mean_dx
    tgt_displacement_test[:, 0] = tgt_displacement_test[:, 0] *  std_dx + mean_dx
    tgt_displacement_train[:, 1] = tgt_displacement_train[:, 1] * std_dy + mean_dy
    tgt_displacement_test[:, 1] = tgt_displacement_test[:, 1] * std_dy + mean_dy
    return tgt_displacement_train, tgt_displacement_test


##########
##########
########## PREPARING DATA FOR XGB

X_train = np.load('../data/X_train_stat_1980_34_20_120_w' + str(window_size) + '_at_' + str(predict_at) + '.npy',
            allow_pickle=True)
X_test = np.load('../data/X_test_stat_1980_34_20_120_w' + str(window_size) + '_at_' + str(predict_at) + '.npy',
            allow_pickle=True)


names = ['LAT', 'LON', 'WMO_WIND', 'WMO_PRES', 'DIST2LAND',
         'STORM_SPEED', 'cat_cos_day', 'cat_sign_day', 'COS_STORM_DIR', 'SIN_STORM_DIR',
         'COS_LAT', 'SIN_LAT', 'COS_LON', 'SIN_LON', 'cat_storm_category', 'cat_basin_AN',
         'cat_basin_EP', 'cat_basin_NI', 'cat_basin_SA',
         'cat_basin_SI', 'cat_basin_SP', 'cat_basin_WP', 'cat_nature_DS', 'cat_nature_ET',
         'cat_nature_MX', 'cat_nature_NR', 'cat_nature_SS', 'cat_nature_TS',
         'STORM_DISPLACEMENT_X', 'STORM_DISPLACEMENT_Y']

names_all = names * window_size

for i in range(len(names_all)):
    names_all[i] += '_' + str(i // 30)

X_train = pd.DataFrame(X_train)
X_test = pd.DataFrame(X_test)
X_train.columns = names_all
X_test.columns = names_all

cols = [c for c in X_train.columns if c.lower()[-2:] == '_0' or c.lower()[:3] != 'cat']

X_train = X_train[cols]
X_test = X_test[cols]

In [137]:
#X_train_embed = np.load('../data/embeddings/X_train_embeds_1980_34_20_120_results8_16_20_44_28.npy', allow_pickle = True)
#X_test_embed = np.load('../data/embeddings/X_test_embeds_1980_34_20_120_results8_16_20_44_28.npy', allow_pickle = True)
#X_train_embed = np.load('../data/embeddings/X_train_embed_1980_34_20_120_intensity.npy', allow_pickle = True)
#X_test_embed = np.load('../data/embeddings/X_test_embed_1980_34_20_120_intensity.npy', allow_pickle = True)

#48
#X_train_embed = np.load('../data/embeddings/X_train_embed_1980_34_20_120_intensity_48.npy', allow_pickle = True)
#X_test_embed = np.load('../data/embeddings/X_test_embed_1980_34_20_120_intensity_48.npy', allow_pickle = True)

X_train_embed = np.load('../data/embeddings/X_train_embed_1980_34_20_120_track_48.npy', allow_pickle = True)
X_test_embed = np.load('../data/embeddings/X_test_embed_1980_34_20_120_track_48.npy', allow_pickle = True)

X_train_total = np.concatenate((X_train, X_train_embed), axis = 1)
X_test_total = np.concatenate((X_test, X_test_embed), axis = 1)

In [138]:
#X_test_baseline = pd.DataFrame(np.load('../data/X_test_stat_1980_34_20_120_forecast_24_2012_v2_w' + str(window_size) + '_at_' + str(predict_at) + '.npy', allow_pickle=True))
X_test_baseline = pd.DataFrame(np.load('../data/X_test_stat_1980_34_20_120_forecast_48_2012_v2_w' + str(window_size) + '_at_' + str(predict_at) + '.npy', allow_pickle=True))


#names_baselines = ['YEAR', 'MONTH', 'DAY', 'HOUR', 'LAT', 'LON', 'WMO_WIND', 'WMO_PRES', 'DIST2LAND', 'STORM_SPEED', 'cos_day', 'sin_day', 'COS_STORM_DIR', 'SIN_STORM_DIR', 'COS_LAT', 'SIN_LAT', 'COS_LON', 'SIN_LON', 'wind_category', 'GFDL_24_lat', 'GFDL_24_lon', 'GFDL_24_vmax', 'GFDL_24_mslp', 'GFDL_24_COS_LAT', 'GFDL_24_SIN_LAT', 'GFDL_24_COS_LON', 'GFDL_24_SIN_LON', 'CMC_24_lat', 'CMC_24_lon', 'CMC_24_vmax', 'CMC_24_mslp', 'CMC_24_COS_LAT', 'CMC_24_SIN_LAT', 'CMC_24_COS_LON', 'CMC_24_SIN_LON', 'FSSE_24_lat', 'FSSE_24_lon', 'FSSE_24_vmax', 'FSSE_24_mslp', 'FSSE_24_COS_LAT', 'FSSE_24_SIN_LAT', 'FSSE_24_COS_LON', 'FSSE_24_SIN_LON', 'OFCL_24_lat', 'OFCL_24_lon', 'OFCL_24_vmax', 'OFCL_24_mslp', 'OFCL_24_COS_LAT', 'OFCL_24_SIN_LAT', 'OFCL_24_COS_LON', 'OFCL_24_SIN_LON', 'NGPS_24_lat', 'NGPS_24_lon', 'NGPS_24_vmax', 'NGPS_24_mslp', 'NGPS_24_COS_LAT', 'NGPS_24_SIN_LAT', 'NGPS_24_COS_LON', 'NGPS_24_SIN_LON', 'DSHP_24_lat', 'DSHP_24_lon', 'DSHP_24_vmax', 'DSHP_24_mslp', 'DSHP_24_COS_LAT', 'DSHP_24_SIN_LAT', 'DSHP_24_COS_LON', 'DSHP_24_SIN_LON', 'SHIP_24_lat', 'SHIP_24_lon', 'SHIP_24_vmax', 'SHIP_24_mslp', 'SHIP_24_COS_LAT', 'SHIP_24_SIN_LAT', 'SHIP_24_COS_LON', 'SHIP_24_SIN_LON', 'CLP5_24_lat', 'CLP5_24_lon', 'CLP5_24_vmax', 'CLP5_24_mslp', 'CLP5_24_COS_LAT', 'CLP5_24_SIN_LAT', 'CLP5_24_COS_LON', 'CLP5_24_SIN_LON', 'HWRF_24_lat', 'HWRF_24_lon', 'HWRF_24_vmax', 'HWRF_24_mslp', 'HWRF_24_COS_LAT', 'HWRF_24_SIN_LAT', 'HWRF_24_COS_LON', 'HWRF_24_SIN_LON', 'UKXI_24_lat', 'UKXI_24_lon', 'UKXI_24_vmax', 'UKXI_24_mslp', 'UKXI_24_COS_LAT', 'UKXI_24_SIN_LAT', 'UKXI_24_COS_LON', 'UKXI_24_SIN_LON', 'LBAR_24_lat', 'LBAR_24_lon', 'LBAR_24_vmax', 'LBAR_24_mslp', 'LBAR_24_COS_LAT', 'LBAR_24_SIN_LAT', 'LBAR_24_COS_LON', 'LBAR_24_SIN_LON', 'AEMN_24_lat', 'AEMN_24_lon', 'AEMN_24_vmax', 'AEMN_24_mslp', 'AEMN_24_COS_LAT', 'AEMN_24_SIN_LAT', 'AEMN_24_COS_LON', 'AEMN_24_SIN_LON', 'DISPLACEMENT_LAT_CLP5_24', 'DISPLACEMENT_LON_CLP5_24', 'DISPLACEMENT_LAT_SHIP_24', 'DISPLACEMENT_LON_SHIP_24', 'DISPLACEMENT_LAT_DSHP_24', 'DISPLACEMENT_LON_DSHP_24', 'DISPLACEMENT_LAT_LBAR_24', 'DISPLACEMENT_LON_LBAR_24', 'DISPLACEMENT_LAT_CMC_24', 'DISPLACEMENT_LON_CMC_24', 'DISPLACEMENT_LAT_NGPS_24', 'DISPLACEMENT_LON_NGPS_24', 'DISPLACEMENT_LAT_GFDL_24', 'DISPLACEMENT_LON_GFDL_24', 'DISPLACEMENT_LAT_HWRF_24', 'DISPLACEMENT_LON_HWRF_24', 'DISPLACEMENT_LAT_UKXI_24', 'DISPLACEMENT_LON_UKXI_24', 'DISPLACEMENT_LAT_FSSE_24', 'DISPLACEMENT_LON_FSSE_24', 'DISPLACEMENT_LAT_AEMN_24', 'DISPLACEMENT_LON_AEMN_24', 'DISPLACEMENT_LAT_OFCL_24', 'DISPLACEMENT_LON_OFCL_24', 'EMXI_24_lat', 'EMXI_24_lon', 'EMXI_24_vmax', 'EMXI_24_mslp', 'EMXI_24_COS_LAT', 'EMXI_24_SIN_LAT', 'EMXI_24_COS_LON', 'EMXI_24_SIN_LON', 'DISPLACEMENT_LAT_EMXI_24', 'DISPLACEMENT_LON_EMXI_24', 'GFSO_24_lat', 'GFSO_24_lon', 'GFSO_24_vmax', 'GFSO_24_mslp', 'GFSO_24_COS_LAT', 'GFSO_24_SIN_LAT', 'GFSO_24_COS_LON', 'GFSO_24_SIN_LON', 'DISPLACEMENT_LAT_GFSO_24', 'DISPLACEMENT_LON_GFSO_24', 'cat_basin_AN', 'cat_basin_EP', 'basin_NI', 'basin_SI', 'basin_SP', 'basin_WP', 'DISPLACEMENT_LAT', 'DISPLACEMENT_LON']
names_baselines = ['YEAR', 'MONTH', 'DAY', 'HOUR', 'LAT', 'LON', 'WMO_WIND', 'WMO_PRES', 'DIST2LAND', 'STORM_SPEED', 'cos_day', 'sin_day', 'COS_STORM_DIR', 'SIN_STORM_DIR', 'COS_LAT', 'SIN_LAT', 'COS_LON', 'SIN_LON', 'wind_category', 'DSHP_24_lat', 'DSHP_24_lon', 'DSHP_24_vmax', 'DSHP_24_mslp', 'DSHP_24_COS_LAT', 'DSHP_24_SIN_LAT', 'DSHP_24_COS_LON', 'DSHP_24_SIN_LON', 'OFCL_24_lat', 'OFCL_24_lon', 'OFCL_24_vmax', 'OFCL_24_mslp', 'OFCL_24_COS_LAT', 'OFCL_24_SIN_LAT', 'OFCL_24_COS_LON', 'OFCL_24_SIN_LON', 'UKXI_24_lat', 'UKXI_24_lon', 'UKXI_24_vmax', 'UKXI_24_mslp', 'UKXI_24_COS_LAT', 'UKXI_24_SIN_LAT', 'UKXI_24_COS_LON', 'UKXI_24_SIN_LON', 'CMC_24_lat', 'CMC_24_lon', 'CMC_24_vmax', 'CMC_24_mslp', 'CMC_24_COS_LAT', 'CMC_24_SIN_LAT', 'CMC_24_COS_LON', 'CMC_24_SIN_LON', 'SHIP_24_lat', 'SHIP_24_lon', 'SHIP_24_vmax', 'SHIP_24_mslp', 'SHIP_24_COS_LAT', 'SHIP_24_SIN_LAT', 'SHIP_24_COS_LON', 'SHIP_24_SIN_LON', 'FSSE_24_lat', 'FSSE_24_lon', 'FSSE_24_vmax', 'FSSE_24_mslp', 'FSSE_24_COS_LAT', 'FSSE_24_SIN_LAT', 'FSSE_24_COS_LON', 'FSSE_24_SIN_LON', 'CLP5_24_lat', 'CLP5_24_lon', 'CLP5_24_vmax', 'CLP5_24_mslp', 'CLP5_24_COS_LAT', 'CLP5_24_SIN_LAT', 'CLP5_24_COS_LON', 'CLP5_24_SIN_LON', 'AEMN_24_lat', 'AEMN_24_lon', 'AEMN_24_vmax', 'AEMN_24_mslp', 'AEMN_24_COS_LAT', 'AEMN_24_SIN_LAT', 'AEMN_24_COS_LON', 'AEMN_24_SIN_LON', 'LBAR_24_lat', 'LBAR_24_lon', 'LBAR_24_vmax', 'LBAR_24_mslp', 'LBAR_24_COS_LAT', 'LBAR_24_SIN_LAT', 'LBAR_24_COS_LON', 'LBAR_24_SIN_LON', 'GFDL_24_lat', 'GFDL_24_lon', 'GFDL_24_vmax', 'GFDL_24_mslp', 'GFDL_24_COS_LAT', 'GFDL_24_SIN_LAT', 'GFDL_24_COS_LON', 'GFDL_24_SIN_LON', 'HWRF_24_lat', 'HWRF_24_lon', 'HWRF_24_vmax', 'HWRF_24_mslp', 'HWRF_24_COS_LAT', 'HWRF_24_SIN_LAT', 'HWRF_24_COS_LON', 'HWRF_24_SIN_LON', 'NGPS_24_lat', 'NGPS_24_lon', 'NGPS_24_vmax', 'NGPS_24_mslp', 'NGPS_24_COS_LAT', 'NGPS_24_SIN_LAT', 'NGPS_24_COS_LON', 'NGPS_24_SIN_LON', 'DISPLACEMENT_LAT_CLP5_24', 'DISPLACEMENT_LON_CLP5_24', 'DISPLACEMENT_LAT_SHIP_24', 'DISPLACEMENT_LON_SHIP_24', 'DISPLACEMENT_LAT_DSHP_24', 'DISPLACEMENT_LON_DSHP_24', 'DISPLACEMENT_LAT_LBAR_24', 'DISPLACEMENT_LON_LBAR_24', 'DISPLACEMENT_LAT_CMC_24', 'DISPLACEMENT_LON_CMC_24', 'DISPLACEMENT_LAT_NGPS_24', 'DISPLACEMENT_LON_NGPS_24', 'DISPLACEMENT_LAT_GFDL_24', 'DISPLACEMENT_LON_GFDL_24', 'DISPLACEMENT_LAT_HWRF_24', 'DISPLACEMENT_LON_HWRF_24', 'DISPLACEMENT_LAT_UKXI_24', 'DISPLACEMENT_LON_UKXI_24', 'DISPLACEMENT_LAT_FSSE_24', 'DISPLACEMENT_LON_FSSE_24', 'DISPLACEMENT_LAT_AEMN_24', 'DISPLACEMENT_LON_AEMN_24', 'DISPLACEMENT_LAT_OFCL_24', 'DISPLACEMENT_LON_OFCL_24', 'EMXI_24_lat', 'EMXI_24_lon', 'EMXI_24_vmax', 'EMXI_24_mslp', 'EMXI_24_COS_LAT', 'EMXI_24_SIN_LAT', 'EMXI_24_COS_LON', 'EMXI_24_SIN_LON', 'DISPLACEMENT_LAT_EMXI_24', 'DISPLACEMENT_LON_EMXI_24', 'GFSO_24_lat', 'GFSO_24_lon', 'GFSO_24_vmax', 'GFSO_24_mslp', 'GFSO_24_COS_LAT', 'GFSO_24_SIN_LAT', 'GFSO_24_COS_LON', 'GFSO_24_SIN_LON', 'DISPLACEMENT_LAT_GFSO_24', 'DISPLACEMENT_LON_GFSO_24', 'cat_basin_AN', 'cat_basin_EP', 'basin_NI', 'basin_SI', 'basin_SP', 'basin_WP', 'DISPLACEMENT_LAT', 'DISPLACEMENT_LON']
names_all_baselines = names_baselines * 8#args.window_size

for i in range(len(names_all_baselines)):
    names_all_baselines[i] += '_' + str(i // 167)

X_test_baseline.columns = names_all_baselines

In [139]:
n = X_test_baseline.shape[0]
X_test_total = X_test_total[-n:]
tgt_intensity_test = tgt_intensity_test[-n:]
X_test = X_test[-n:]
X_test_embed = X_test_embed[-n:]
m = len(X_test_baseline[X_test_baseline['YEAR_0'] < 2017])

if full:
    tgt_intensity_train = np.concatenate((tgt_intensity_train, tgt_intensity_test[:m]), axis = 0)
    X_train = pd.concat((X_train, X_test[:m]), axis = 0)
    X_train_embed = np.concatenate((X_train_embed, X_test_embed[:m]), axis = 0)
    X_train_total = np.concatenate((X_train_total, X_test_total[:m]), axis = 0)
    
mean_intensity = tgt_intensity_train.mean()
std_intensity = tgt_intensity_train.std()
tgt_intensity_train = (tgt_intensity_train - mean_intensity)/std_intensity
tgt_intensity_test = (tgt_intensity_test - mean_intensity)/std_intensity

std_ = float(std_intensity)
mean_ = float(mean_intensity)

In [56]:
index = X_train[np.round(X_train['WMO_WIND_7']*1000%10, decimals = 2) == 0].index

In [57]:
X_train = X_train[np.round(X_train['WMO_WIND_7']*1000%10, decimals = 2) == 0].reset_index(drop = True)

In [58]:
X_train_total = X_train_total[index]
tgt_intensity_train = tgt_intensity_train[index]

In [59]:
X_train_embed = X_train_embed[index]

In [3]:
xgb2 = XGBRegressor(max_depth=6, n_estimators=140, learning_rate = 0.07, subsample = 0.7, min_child_weight = 5)
xgb2.fit(X_train, tgt_intensity_train)
print("MAE intensity: ", mean_absolute_error(np.array(tgt_intensity_test)*std_+mean_, np.array(xgb2.predict(X_test))*std_+mean_))

xgb = XGBRegressor(max_depth=8, n_estimators = 150, learning_rate = 0.07, subsample = 0.7, min_child_weight = 1)
xgb.fit(X_train_total, tgt_intensity_train)
print("MAE intensity: ", mean_absolute_error(np.array(tgt_intensity_test)*std_+mean_, np.array(xgb.predict(X_test_total))*std_+mean_))

MAE intensity:  12.559202
MAE intensity:  11.910126


In [12]:
def train_xgb_track(basin_only = False, sparse = False, max_depth = 8, n_estimators = 140, learning_rate = 0.15, subsample = 0.7, min_child_weight=5, basin = 'AN', forecast = 'SHIP', forecast2 = None):
    train_x = X_train_total
    train_y = X_train_total
    test_x = X_test_total
    test_y = X_test_total
    tgt_train = tgt_displacement_train
    if sparse:
        train_x, train_y = X_train_total_sparse_x, X_train_total_sparse_y
        test_x, test_y = X_test_total_sparse_x, X_test_total_sparse_y
    if basin_only:
        train_x = X_train_total[X_train['cat_basin_'+basin+'_0'] == 1]
        train_y = train_x
        tgt_train = tgt_displacement_train[X_train['cat_basin_'+basin+'_0'] == 1]
    xgb_x = XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, learning_rate=learning_rate, subsample=subsample, min_child_weight=min_child_weight)
    xgb_x.fit(train_x, tgt_train[:, 0])
    xgb_y = XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, learning_rate=learning_rate, subsample=subsample, min_child_weight=min_child_weight)
    xgb_y.fit(train_y, tgt_train[:, 1])
    DLATS_PRED = np.array(xgb_x.predict(test_x)) * std_dx + mean_dx
    DLONS_PRED = np.array(xgb_y.predict(test_y)) * std_dy + mean_dy
    LATS_PRED_ = X_test['LAT_7'] + DLATS_PRED
    LONS_PRED_ = X_test['LON_7'] + DLONS_PRED
    compare_perf_track(basin=basin, forecast=forecast, forecast2 = forecast2, LATS_PRED_=LATS_PRED_, LONS_PRED_=LONS_PRED_)

def train_xgb_track_all_years(use_forecast = False, basin_only = False, sparse = False, max_depth = 8, n_estimators = 140, learning_rate = 0.15, subsample = 0.7, min_child_weight=5, basin = 'AN', forecast = 'SHIP', forecast2 = None):
    train_x = X_train_total
    train_y = X_train_total
    test_x = X_test_total
    test_y = X_test_total
    tgt_train = tgt_displacement_train
    if sparse:
        train_x, train_y = X_train_total_sparse_x, X_train_total_sparse_y
        test_x, test_y = X_test_total_sparse_x, X_test_total_sparse_y
    if basin_only:
        train_x = X_train_total[X_train['cat_basin_'+basin+'_0'] == 1]
        train_y = train_x
        tgt_train = tgt_displacement_train[X_train['cat_basin_'+basin+'_0'] == 1]
    if use_forecast:
        train_for = X_train_forecasts
        tgt_train_for = tgt_train_dis_forecasts
        test_for = X_test_forecasts
        xgb_x = XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, learning_rate=learning_rate, subsample=subsample, min_child_weight=min_child_weight)
        xgb_x.fit(train_for, tgt_train_for[:, 0])
        xgb_y = XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, learning_rate=learning_rate, subsample=subsample, min_child_weight=min_child_weight)
        xgb_y.fit(train_for, tgt_train_for[:, 1])
        DLATS_PRED = np.array(xgb_x.predict(X_new)) * std_dx + mean_dx
        DLONS_PRED = np.array(xgb_y.predict(X_new)) * std_dy + mean_dy
    else:
        xgb_x = XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, learning_rate=learning_rate,
                             subsample=subsample, min_child_weight=min_child_weight)
        xgb_x.fit(train_x, tgt_train[:, 0])
        xgb_y = XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, learning_rate=learning_rate,
                             subsample=subsample, min_child_weight=min_child_weight)
        xgb_y.fit(train_y, tgt_train[:, 1])
        DLATS_PRED = np.array(xgb_x.predict(test_x)) * std_dx + mean_dx
        DLONS_PRED = np.array(xgb_y.predict(test_y)) * std_dy + mean_dy
    LATS_PRED_2012 = X_test['LAT_7'] + DLATS_PRED
    LONS_PRED_2012 = X_test['LON_7'] + DLONS_PRED
    compare_perf_track(basin=basin, forecast=forecast, forecast2 = forecast2, LATS_PRED_=LATS_PRED_2012, LONS_PRED_=LONS_PRED_2012)
    dict = {'year': [], 'num_samples': [], 'MAEs_full': [], 'std_full': [], 'MAES_2012': [], 'std_2012': [],
            'MAES_SHIP': [], 'std_SHIP': [], 'MAES_HWRF': [], 'std_HWRF': []}
    for year in range(2012, 2020):
        try:
            index = X_test_baseline.loc[
                X_test_baseline['YEAR'] < year].index
            X_test_to_train = X_test_total[index]
            train = np.concatenate((X_train_total, X_test_to_train), axis=0)
            tgt_train = np.concatenate((tgt_displacement_train, tgt_displacement_test[index]), axis=0)
            xgb_x = XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, learning_rate=learning_rate,
                                 subsample=subsample, min_child_weight=min_child_weight)
            xgb_x.fit(train, tgt_train[:, 0])
            xgb_y = XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, learning_rate=learning_rate,
                                 subsample=subsample, min_child_weight=min_child_weight)
            xgb_y.fit(train, tgt_train[:, 1])
            DLATS_PRED = np.array(xgb_x.predict(test_x)) * std_dx + mean_dx
            DLONS_PRED = np.array(xgb_y.predict(test_y)) * std_dy + mean_dy
            LATS_PRED_ = X_test['LAT_7'] + DLATS_PRED
            LONS_PRED_ = X_test['LON_7'] + DLONS_PRED
            compare_perf_track_per_year(dict, LATS_PRED_, LONS_PRED_, LATS_PRED_2012, LONS_PRED_2012, forecast=forecast,
                                            forecast2=forecast2, basin=basin,  year=year)
            print("\n")
        except:
            print("\n No forecasts for year ", year)
    return dict




def train_xgb_intensity(forecast = 'SHIP', basin_only = False, sparse = False, max_depth = 8, n_estimators = 140, learning_rate = 0.15, subsample = 0.7, min_child_weight=5, basin = 'AN', forecast2 = None):
    train = X_train_total
    #test = X_test_total
    tgt_train = tgt_intensity_train
    if sparse:
        train = X_train_total_sparse_x
        test = X_test_total_sparse_x
    if basin_only:
        train = X_train_total[X_train['cat_basin_'+basin+'_0'] == 1]
        tgt_train = tgt_intensity_train[X_train['cat_basin_'+basin+'_0'] == 1]
    xgb_total = XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, learning_rate=learning_rate, subsample=subsample, min_child_weight=min_child_weight)
    xgb_total.fit(train, tgt_train)
    compare_perf_intensity(xgb_total = xgb_total, basin=basin, forecast=forecast, mode='vmax', forecast2 = forecast2)




def compare_perf_intensity(xgb_total, basin = 'AN', forecast = 'SHIP', last_storms = 1000, mode = 'vmax', forecast2 = 'HWRF'):
    if forecast2 != None:
        index = X_test_baseline.loc[
            X_test_baseline[forecast2 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast + '_24_' + mode + '_7'] > -320].loc[X_test_baseline[
                                                                             'cat_basin_' + basin + '_0'] == 1].index  # .loc[#X_test_baseline['SHIP_24_'+mode+'_7'] > 0].index
        # X_test_withBASELINE = X_test.loc[X_test_baseline[forecast + '_24_'+mode+'_7'] > -320].loc[X_test_baseline['cat_basin_'+basin+'_0'] == 1]#.loc[#X_test_baseline['SHIP_24_'+mode+'_7'] > 0]
        baseline_ = X_test_baseline.loc[
            X_test_baseline[forecast2 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline['cat_basin_' + basin + '_0'] == 1]  # .loc[#X_test_baseline['SHIP_24_'+mode+'_7'] > 0]
        baseline_2 = baseline_[forecast2 + '_24_' + mode + '_7']
    else:
        index = X_test_baseline.loc[
            X_test_baseline[forecast + '_24_' + mode + '_7'] > -320].loc[X_test_baseline[
                                                                             'cat_basin_' + basin + '_0'] == 1].index  # .loc[#X_test_baseline['SHIP_24_'+mode+'_7'] > 0].index
        # X_test_withBASELINE = X_test.loc[X_test_baseline[forecast + '_24_'+mode+'_7'] > -320].loc[X_test_baseline['cat_basin_'+basin+'_0'] == 1]#.loc[#X_test_baseline['SHIP_24_'+mode+'_7'] > 0]
        baseline_ = X_test_baseline.loc[
            X_test_baseline[forecast + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline['cat_basin_' + basin + '_0'] == 1]  # .loc[#X_test_baseline['SHIP_24_'+mode+'_7'] > 0]
    X_test_withBASELINE_total = X_test_total[index]
    baseline_1 = baseline_[forecast + '_24_' + mode + '_7']
    if mode == 'vmax':
        tgt_ = np.array(tgt_intensity_test[index] * std_ + mean_)
        preds = xgb_total.predict(X_test_withBASELINE_total) * std_ + mean_
        #print("MAE intensity basin " + basin + " X stat vs "+ forecast + " : ", mean_absolute_error(tgt_intensity_test_withBASELINE * std_ + mean_,
                                                     #xgb.predict(X_test_withBASELINE) * std_ + mean_))
        print("Timesteps", len(tgt_))
        print("MAE intensity basin " + basin + " Hurricast : ", np.around(mean_absolute_error(tgt_, preds), decimals = 2), "with std ", np.around(np.std(tgt_ - preds), decimals=2))
        print("MAE intensity basin " + basin + " Official Forecast "+ forecast + " : ",
              np.around(mean_absolute_error(tgt_, baseline_1), decimals = 2), "with std ", np.around(np.std(tgt_ - baseline_1), decimals = 2))
        print("MAE intensity basin " + basin + " Official Forecast " + str(forecast2) + " : ",
              np.around(mean_absolute_error(tgt_, baseline_2), decimals=2), "with std ",
              np.around(np.std(tgt_ - baseline_2), decimals=2))
        print("Percentage of missed intensification > 20kn Hurricast: ", np.around(sum(abs(tgt_ - preds) > 20)/len(preds) * 100, decimals = 2))
        print("Percentage of missed intensification > 20kn Official Forecast"+ forecast + " : ", np.around(sum(abs(tgt_ - baseline_1) > 20) / len(baseline_1) * 100, decimals =2))
        print("Percentage of missed intensification > 20kn Official Forecast 2"+ str(forecast2) + " : ", np.around(sum(abs(tgt_ - baseline_2) > 20) / len(baseline_2) * 100, decimals =2))
        print("\nMAE intensity basin " + basin + " Hurricast last", last_storms, ": ", np.around(mean_absolute_error(tgt_[-last_storms:], preds[-last_storms:]), decimals=2))
        print("MAE intensity basin " + basin + " Official Forecast " + forecast + " : ",
              np.around(mean_absolute_error(tgt_[-last_storms:], baseline_1[-last_storms:]), decimals=2))


#train_xgb_intensity(forecast = 'SHIP', basin = 'EP', max_depth=8, n_estimators = 120, learning_rate = 0.07, subsample = 0.8, min_child_weight = 1)
#train_xgb_intensity(forecast = 'SHIP', basin = 'AN', max_depth=8, n_estimators = 150, learning_rate = 0.07, subsample = 0.8, min_child_weight = 1, forecast2 = 'HWRF')


def compare_perf_intensity_per_year(dict, xgb_tot, xgb_total, year, forecast2, basin = 'AN', forecast = 'HWRF', mode = 'vmax'):
    if forecast2 != None:
        index = X_test_baseline.loc[X_test_baseline['YEAR_0'] == year].loc[
            X_test_baseline[forecast2 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast + '_24_' + mode + '_7'] > -320].loc[X_test_baseline['cat_basin_' + basin + '_0'] == 1].index  # .loc[#X_test_baseline['SHIP_24_'+mode+'_7'] > 0].index
        # X_test_withBASELINE = X_test.loc[X_test_baseline[forecast + '_24_'+mode+'_7'] > -320].loc[X_test_baseline['cat_basin_'+basin+'_0'] == 1]#.loc[#X_test_baseline['SHIP_24_'+mode+'_7'] > 0]
        baseline_ = X_test_baseline.loc[X_test_baseline['YEAR_0'] == year].loc[
            X_test_baseline[forecast2 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline['cat_basin_' + basin + '_0'] == 1]  # .loc[#X_test_baseline['SHIP_24_'+mode+'_7'] > 0]
        baseline_2 = baseline_[forecast2 + '_24_' + mode + '_7']
    else:
        index = X_test_baseline.loc[X_test_baseline['YEAR_0'] == year].loc[X_test_baseline[forecast + '_24_'+mode+'_7'] > -320].loc[X_test_baseline['cat_basin_'+basin+'_0'] == 1].index#.loc[#X_test_baseline['SHIP_24_'+mode+'_7'] > 0].index
        #X_test_withBASELINE = X_test.loc[X_test_baseline[forecast + '_24_'+mode+'_7'] > -320].loc[X_test_baseline['cat_basin_'+basin+'_0'] == 1]#.loc[#X_test_baseline['SHIP_24_'+mode+'_7'] > 0]
        baseline_ = X_test_baseline.loc[X_test_baseline['YEAR_0'] == year].loc[X_test_baseline[forecast + '_24_'+mode+'_7'] > -320].loc[X_test_baseline['cat_basin_'+basin+'_0'] == 1]#.loc[#X_test_baseline['SHIP_24_'+mode+'_7'] > 0]
    X_test_withBASELINE_total = X_test_total[index]
    baseline_1 = baseline_[forecast + '_24_'+mode+'_7']
    if mode == 'vmax':
        tgt_ = np.array(tgt_intensity_test[index] * std_ + mean_)
        print("Total number of steps for comparison: ", len(tgt_))
        preds_1 = xgb_tot.predict(X_test_withBASELINE_total) * std_ + mean_
        preds = xgb_total.predict(X_test_withBASELINE_total) * std_ + mean_
        #print("MAE intensity basin " + basin + " X stat vs "+ forecast + " : ", mean_absolute_error(tgt_intensity_test_withBASELINE * std_ + mean_,
                                                     #xgb.predict(X_test_withBASELINE) * std_ + mean_))
        print("Year ", year, " MAE intensity basin " + basin + " Hurricast trained full: ", np.around(mean_absolute_error(tgt_, preds), decimals = 2), "with std ", np.around(np.std(tgt_ - preds), decimals=2))
        print("Year ", year, " MAE intensity basin " + basin + " Hurricast trained until 2012: ", np.around(mean_absolute_error(tgt_, preds_1), decimals = 2), "with std ", np.around(np.std(tgt_ - preds_1), decimals=2))
        print("Year ", year, " MAE intensity basin " + basin + " Official Forecast "+ forecast + " : ",
              np.around(mean_absolute_error(tgt_, baseline_1), decimals = 2), "with std ", np.around(np.std(tgt_ - baseline_1), decimals = 2))
        if forecast2 != None:
            print("Year ", year, " MAE intensity basin " + basin + " Official Forecast " + forecast2 + " : ",
                np.around(mean_absolute_error(tgt_, baseline_2), decimals=2), "with std ",
                np.around(np.std(tgt_ - baseline_2), decimals=2))
        append_dict_intensity(dict, tgt_, preds, preds_1, baseline_1, baseline_2, year)
        #print("Year ", year, " Percentage of missed intensification > 20kn Hurricast: ", np.around(sum(tgt_ - preds > 20)/len(preds) * 100, decimals = 2))
        #print("Year ", year, " Percentage of missed intensification > 20kn Official Forecast: ", np.around(sum(tgt_ - baseline_1 > 20) / len(baseline_1) * 100, decimals =2))



def append_dict_intensity(dict, tgt_, preds, preds_1, baseline_1, baseline_2, year):
    dict['year'].append(year)
    dict['num_samples'].append(len(tgt_))
    dict['MAEs_full'].append(np.around(mean_absolute_error(tgt_, preds), decimals = 2))
    dict['std_full'].append(np.around(np.std(tgt_ - preds), decimals = 2))
    dict['MAES_2012'].append(np.around(mean_absolute_error(tgt_, preds_1), decimals = 2))
    dict['std_2012'].append(np.around(np.std(tgt_ - preds_1), decimals=2))
    dict['MAES_SHIP'].append(np.around(mean_absolute_error(tgt_, baseline_1), decimals = 2))
    dict['std_SHIP'].append(np.around(np.std(tgt_ - baseline_1), decimals = 2))
    dict['MAES_HWRF'].append(np.around(mean_absolute_error(tgt_, baseline_2), decimals=2))
    dict['std_HWRF'].append(np.around(np.std(tgt_ - baseline_2), decimals=2))


def train_xgb_intensity_all_years(forecast2 = None, basin_only = False, sparse = False, max_depth = 8, n_estimators = 140, learning_rate = 0.15, subsample = 0.7, min_child_weight=5, basin = 'AN', forecast = 'HWRF'):
    train = X_train_total
    #test = X_test_total
    tgt_train = tgt_intensity_train
    if sparse:
        train = X_train_total_sparse_x
        #test = X_test_total_sparse_x
    if basin_only:
        train = X_train_total[X_train['cat_basin_'+basin+'_0'] == 1]
        tgt_train = tgt_intensity_train[X_train['cat_basin_'+basin+'_0'] == 1]
    xgb_total = XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, learning_rate=learning_rate, subsample=subsample, min_child_weight=min_child_weight)
    xgb_total.fit(train, tgt_train)
    for year in range(2012, 2020):
        try:
            compare_perf_intensity_per_year(forecast2 = forecast2, xgb_total = xgb_total, basin=basin, forecast=forecast, mode='vmax', year = year)
            print("\n")
        except:
            print("\n No forecasts for year ", year)

def train_xgb_intensity_all_years_full_train(forecast2 = None, basin_only = False, sparse = False, max_depth = 8, n_estimators = 140, learning_rate = 0.15, subsample = 0.7, min_child_weight=5, basin = 'AN', forecast = 'HWRF'):
    train = X_train_total
    #test = X_test_total
    tgt_train = tgt_intensity_train
    if sparse:
        train = X_train_total_sparse_x
        #test = X_test_total_sparse_x
    if basin_only:
        train = X_train_total[X_train['cat_basin_'+basin+'_0'] == 1]
        tgt_train = tgt_intensity_train[X_train['cat_basin_'+basin+'_0'] == 1]
    xgb_total_1 = XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, learning_rate=learning_rate,
                             subsample=subsample, min_child_weight=min_child_weight)
    xgb_total_1.fit(train, tgt_train)
    dict = {'year':[], 'num_samples':[], 'MAEs_full':[], 'std_full':[], 'MAES_2012':[], 'std_2012':[], 'MAES_SHIP':[], 'std_SHIP':[], 'MAES_HWRF':[], 'std_HWRF':[]}
    for year in range(2012, 2020):
        try:
            index = X_test_baseline.loc[X_test_baseline['YEAR_0'] < year].index  # .loc[#X_test_baseline['SHIP_24_'+mode+'_7'] > 0].index
            X_test_to_train = X_test_total[index]
            train = np.concatenate((X_train_total, X_test_to_train), axis = 0)
            tgt_train = np.concatenate((tgt_intensity_train, tgt_intensity_test[index]), axis = 0)
            xgb_total = XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, learning_rate=learning_rate,
                                     subsample=subsample, min_child_weight=min_child_weight)
            xgb_total.fit(train, tgt_train)
            compare_perf_intensity_per_year(dict = dict, xgb_tot = xgb_total_1, forecast2 = forecast2, xgb_total = xgb_total, basin=basin, forecast=forecast, mode='vmax', year = year)
            print("\n")
        except:
            print("\n No forecasts for year ", year)
    return dict


In [71]:
params = {
 'min_child_weight':[1,5],
 'n_estimators':[100, 120, 150],
 'subsample':[0.6,0.7,0.8,],
 'learning_rate':[0.07, 0.1, 0.15],
}
grid = GridSearchCV(estimator = XGBRegressor(learning_rate = 0.07, n_estimators=140, max_depth=8,
min_child_weight=1, subsample=0.8, seed=1),
param_grid = params, n_jobs=4, scoring = 'neg_mean_absolute_error')

grid.fit(X_train_total, np.array(tgt_intensity_train))

grid.best_params_, grid.best_score_

({'learning_rate': 0.07,
  'min_child_weight': 5,
  'n_estimators': 100,
  'subsample': 0.7},
 -0.2854653795560201)

In [13]:
def train_xgb_intensity_all_years_full_train_4cast(forecast2 = None, forecast3 = None, forecast4 = None, max_depth = 8, n_estimators = 140, learning_rate = 0.15, subsample = 0.7, min_child_weight=5, basin = 'AN', forecast = 'HWRF'):
    train = X_train_total
    test = X_test_total
    tgt_train = tgt_intensity_train
    xgb_total_1 = XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, learning_rate=learning_rate,
                             subsample=subsample, min_child_weight=min_child_weight)
    xgb_total_1.fit(train, tgt_train)
    dict = {'year':[], 'num_samples':[], 'MAEs_full':[], 'std_full':[], 'MAES_2012':[], 'std_2012':[], 'MAES_SHIP':[], 'std_SHIP':[], 'MAES_HWRF':[], 'std_HWRF':[], 'MAES_FSSE':[], 'std_FSSE':[], 'MAES_OFCL':[], 'std_OFCL':[]}
    for year in range(2012, 2020):
        try:
            index = X_test_baseline.loc[X_test_baseline['YEAR_0'] < year].index  # .loc[#X_test_baseline['SHIP_24_'+mode+'_7'] > 0].index
            X_test_to_train = X_test_total[index]
            train = np.concatenate((X_train_total, X_test_to_train), axis = 0)
            tgt_train = np.concatenate((tgt_intensity_train, tgt_intensity_test[index]), axis = 0)
            xgb_total = XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, learning_rate=learning_rate,
                                     subsample=subsample, min_child_weight=min_child_weight)
            xgb_total.fit(train, tgt_train)
            compare_perf_intensity_per_year_4cast(test, dict = dict, xgb_tot = xgb_total_1, forecast2 = forecast2, xgb_total = xgb_total, basin=basin, forecast=forecast, mode='vmax', year = year, forecast3= forecast3, forecast4 = forecast4)
            print("\n")
        except:
            print("\n No forecasts for year ", year)
    return dict

def train_xgb_intensity_all_years_full_train_6cast(forecast2 = None, forecast3 = None, forecast4 = None, forecast5 = None, forecast6 = None, max_depth = 8, n_estimators = 140, learning_rate = 0.07, subsample = 0.7, min_child_weight=5, basin = 'AN', forecast = 'SHIP'):
    train = X_train_total
    test = X_test_total
    tgt_train = tgt_intensity_train
    xgb_total_1 = XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, learning_rate=learning_rate,
                             subsample=subsample, min_child_weight=min_child_weight)
    xgb_total_1.fit(train, tgt_train)
    dict6 = {'year':[], 'num_samples':[], 'MAEs_full':[], 'std_full':[], 'MAES_2012':[], 'std_2012':[], 'MAES_SHIP':[], 'std_SHIP':[], 'MAES_HWRF':[], 'std_HWRF':[], 'MAES_FSSE':[], 'std_FSSE':[], 'MAES_OFCL':[], 'std_OFCL':[], 'MAES_GFSO':[], 'std_GFSO':[], 'MAES_DSHP':[], 'std_DSHP':[]}
    for year in range(2012, 2020):
        try:
            index = X_test_baseline.loc[X_test_baseline['YEAR_0'] < year].index  # .loc[#X_test_baseline['SHIP_24_'+mode+'_7'] > 0].index
            X_test_to_train = X_test_total[index]
            train = np.concatenate((X_train_total, X_test_to_train), axis = 0)
            tgt_train = np.concatenate((tgt_intensity_train, tgt_intensity_test[index]), axis = 0)
            xgb_total = XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, learning_rate=learning_rate,
                                     subsample=subsample, min_child_weight=min_child_weight)
            xgb_total.fit(train, tgt_train)
            compare_perf_intensity_per_year_6cast(test, dict6, xgb_tot = xgb_total_1, forecast2 = forecast2, xgb_total = xgb_total, basin=basin, forecast=forecast, year = year, forecast3= forecast3, forecast4 = forecast4, forecast5 = forecast5, forecast6 = forecast6)
            print("\n")
        except:
            print("\n No forecasts for year ", year)
    return dict6

def compare_perf_intensity_per_year_4cast(test, dict, xgb_tot, xgb_total, year, forecast2, forecast3, forecast4, basin = 'AN', forecast = 'SHIP', mode = 'vmax'):
    baseline_ = X_test_baseline.loc[X_test_baseline['YEAR_0'] >= year].loc[
            X_test_baseline[forecast4 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast3 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast2 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast + '_24_' + mode + '_7'] > -320].loc[X_test_baseline['cat_basin_' + basin + '_0'] == 1]  # .loc[#X_test_baseline['SHIP_24_'+mode+'_7'] > 0].index
    if basin == 'EP':
        baseline_ = X_test_baseline.loc[X_test_baseline['YEAR_0'] < 2019].loc[X_test_baseline['YEAR_0'] >= year].loc[
            X_test_baseline[forecast4 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast3 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast2 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast + '_24_' + mode + '_7'] > -320].loc[X_test_baseline['cat_basin_' + basin + '_0'] == 1]
    index = baseline_.index
    baseline_1 = baseline_[forecast + '_24_'+mode+'_7']
    baseline_2 = baseline_[forecast2 + '_24_' + mode + '_7']
    baseline_3 = baseline_[forecast3 + '_24_' + mode + '_7']
    baseline_4 = baseline_[forecast4 + '_24_' + mode + '_7']
    X_test_withBASELINE_total = np.array(test)[index]
    if mode == 'vmax':
        tgt_ = np.array(tgt_intensity_test[index] * std_ + mean_)
        print("Total number of steps for comparison: ", len(tgt_))
        print("Year", year, "Basin", basin, "MAE intensity | std | busts > 20kn %")
        preds_1 = xgb_tot.predict(X_test_withBASELINE_total) * std_ + mean_
        preds = xgb_total.predict(X_test_withBASELINE_total) * std_ + mean_
        #print("MAE intensity basin " + basin + " X stat vs "+ forecast + " : ", mean_absolute_error(tgt_intensity_test_withBASELINE * std_ + mean_,
                                                     #xgb.predict(X_test_withBASELINE) * std_ + mean_))
        print("Hurr full: ", np.around(mean_absolute_error(tgt_, preds), decimals = 2), np.around(np.std(tgt_ - preds), decimals=2), np.around(sum(abs(tgt_ - preds) > 20)/len(preds) * 100, decimals = 2))
        print("Hurr until 2012: ", np.around(mean_absolute_error(tgt_, preds_1), decimals = 2), np.around(np.std(tgt_ - preds_1), decimals=2), np.around(sum(abs(tgt_ - preds_1) > 20)/len(preds) * 100, decimals = 2))
        print(forecast,
              np.around(mean_absolute_error(tgt_, baseline_1), decimals = 2), np.around(np.std(tgt_ - baseline_1), decimals = 2), np.around(sum(abs(tgt_ - baseline_1) > 20)/len(preds) * 100, decimals = 2))
        if forecast2 != None:
            print(forecast2,
                np.around(mean_absolute_error(tgt_, baseline_2), decimals=2),
                np.around(np.std(tgt_ - baseline_2), decimals=2),
                np.around(sum(abs(tgt_ - baseline_2) > 20)/len(preds) * 100, decimals = 2))
            print(forecast3,
                np.around(mean_absolute_error(tgt_, baseline_3), decimals=2),
                np.around(np.std(tgt_ - baseline_3), decimals=2),
                np.around(sum(abs(tgt_ - baseline_3) > 20)/len(preds) * 100, decimals = 2))
            print(forecast4,
                np.around(mean_absolute_error(tgt_, baseline_4), decimals=2),
                np.around(np.std(tgt_ - baseline_4), decimals=2),
                np.around(sum(abs(tgt_ - baseline_4) > 20)/len(preds) * 100, decimals = 2))

        append_dict_intensity_4cast(dict, tgt_, preds, preds_1, baseline_1, baseline_2, baseline_3, baseline_4, year)
        
        
def append_dict_intensity_4cast(dict, tgt_, preds, preds_1, baseline_1, baseline_2, baseline_3, baseline_4, year):
    dict['year'].append(year)
    dict['num_samples'].append(len(tgt_))
    dict['MAEs_full'].append(np.around(mean_absolute_error(tgt_, preds), decimals = 2))
    dict['std_full'].append(np.around(np.std(tgt_ - preds), decimals = 2))
    dict['MAES_2012'].append(np.around(mean_absolute_error(tgt_, preds_1), decimals = 2))
    dict['std_2012'].append(np.around(np.std(tgt_ - preds_1), decimals=2))
    dict['MAES_SHIP'].append(np.around(mean_absolute_error(tgt_, baseline_1), decimals = 2))
    dict['std_SHIP'].append(np.around(np.std(tgt_ - baseline_1), decimals = 2))
    dict['MAES_HWRF'].append(np.around(mean_absolute_error(tgt_, baseline_2), decimals=2))
    dict['std_HWRF'].append(np.around(np.std(tgt_ - baseline_2), decimals=2))
    dict['MAES_OFCL'].append(np.around(mean_absolute_error(tgt_, baseline_3), decimals=2))
    dict['std_OFCL'].append(np.around(np.std(tgt_ - baseline_3), decimals=2))
    dict['MAES_FSSE'].append(np.around(mean_absolute_error(tgt_, baseline_4), decimals=2))
    dict['std_FSSE'].append(np.around(np.std(tgt_ - baseline_4), decimals=2))
    
def append_dict_intensity_6cast(dict, tgt_, preds, preds_1, baseline_1, baseline_2, baseline_3, baseline_4, baseline_5, baseline_6, year):
    dict['year'].append(year)
    dict['num_samples'].append(len(tgt_))
    dict['MAEs_full'].append(np.around(mean_absolute_error(tgt_, preds), decimals = 2))
    dict['std_full'].append(np.around(np.std(tgt_ - preds), decimals = 2))
    dict['MAES_2012'].append(np.around(mean_absolute_error(tgt_, preds_1), decimals = 2))
    dict['std_2012'].append(np.around(np.std(tgt_ - preds_1), decimals=2))
    dict['MAES_SHIP'].append(np.around(mean_absolute_error(tgt_, baseline_1), decimals = 2))
    dict['std_SHIP'].append(np.around(np.std(tgt_ - baseline_1), decimals = 2))
    dict['MAES_HWRF'].append(np.around(mean_absolute_error(tgt_, baseline_2), decimals=2))
    dict['std_HWRF'].append(np.around(np.std(tgt_ - baseline_2), decimals=2))
    dict['MAES_OFCL'].append(np.around(mean_absolute_error(tgt_, baseline_3), decimals=2))
    dict['std_OFCL'].append(np.around(np.std(tgt_ - baseline_3), decimals=2))
    dict['MAES_FSSE'].append(np.around(mean_absolute_error(tgt_, baseline_4), decimals=2))
    dict['std_FSSE'].append(np.around(np.std(tgt_ - baseline_4), decimals=2))
    dict['MAES_GFSO'].append(np.around(mean_absolute_error(tgt_, baseline_5), decimals=2))
    dict['std_GFSO'].append(np.around(np.std(tgt_ - baseline_5), decimals=2))
    dict['MAES_DSHP'].append(np.around(mean_absolute_error(tgt_, baseline_6), decimals=2))
    dict['std_DSHP'].append(np.around(np.std(tgt_ - baseline_6), decimals=2))
    
    #consensus_ofcl = (baseline_1+baseline_2+baseline_3+baseline_4+baseline_5+baseline_6)/6
    #consensus_hurr = (baseline_1+baseline_2+baseline_3+baseline_4+baseline_5+baseline_6+preds)/7
    consensus_ofcl = (baseline_1+baseline_2+baseline_3+baseline_4 + baseline_6)/5
    consensus_hurr = (baseline_1+baseline_2+baseline_3+baseline_4 + baseline_6+preds)/6
    print("Consensus ofcl", np.around(mean_absolute_error(tgt_, consensus_ofcl), decimals=2), np.around(np.std(tgt_ - consensus_ofcl), decimals = 2), np.around(sum(abs(tgt_ - consensus_ofcl) > 20)/len(preds) * 100, decimals = 2))
    print("Consensus Hurr", np.around(mean_absolute_error(tgt_, consensus_hurr), decimals=2), np.around(np.std(tgt_ - consensus_hurr), decimals = 2), np.around(sum(abs(tgt_ - consensus_hurr) > 20)/len(preds) * 100, decimals = 2))
    
    
def compare_perf_intensity_per_year_6cast(test, dict, xgb_tot, xgb_total, year, forecast2, forecast3, forecast4, forecast5, forecast6, basin = 'AN', forecast = 'SHIP'):
    mode = 'vmax'
    baseline_ = X_test_baseline.loc[X_test_baseline['YEAR_0'] >= year].loc[
            X_test_baseline[forecast6 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast5 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast4 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast3 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast2 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast + '_24_' + mode + '_7'] > -320].loc[X_test_baseline['cat_basin_' + basin + '_0'] == 1]  # .loc[#X_test_baseline['SHIP_24_'+mode+'_7'] > 0].index
    if basin == 'EP':
        baseline_ = X_test_baseline.loc[X_test_baseline['YEAR_0'] < 2019].loc[X_test_baseline['YEAR_0'] >= year].loc[
            X_test_baseline[forecast6 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast5 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast4 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast3 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast2 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast + '_24_' + mode + '_7'] > -320].loc[X_test_baseline['cat_basin_' + basin + '_0'] == 1]
    index = baseline_.index
    baseline_1 = baseline_[forecast + '_24_'+mode+'_7']
    baseline_2 = baseline_[forecast2 + '_24_' + mode + '_7']
    baseline_3 = baseline_[forecast3 + '_24_' + mode + '_7']
    baseline_4 = baseline_[forecast4 + '_24_' + mode + '_7']
    baseline_5 = baseline_[forecast5 + '_24_' + mode + '_7']
    baseline_6 = baseline_[forecast6 + '_24_' + mode + '_7']
    X_test_withBASELINE_total = np.array(test)[index]
    
    tgt_ = np.array(tgt_intensity_test[index] * std_ + mean_)
    print("Total number of steps for comparison: ", len(tgt_))
    print("Year", year, "Basin", basin, "MAE intensity | std | busts > 20kn %")
    preds_1 = xgb_tot.predict(X_test_withBASELINE_total) * std_ + mean_
    preds = xgb_total.predict(X_test_withBASELINE_total) * std_ + mean_
        #print("MAE intensity basin " + basin + " X stat vs "+ forecast + " : ", mean_absolute_error(tgt_intensity_test_withBASELINE * std_ + mean_,
                                                     #xgb.predict(X_test_withBASELINE) * std_ + mean_))
    print("Hurr full: ", np.around(mean_absolute_error(tgt_, preds), decimals = 2), np.around(np.std(tgt_ - preds), decimals=2), np.around(sum(abs(tgt_ - preds) > 20)/len(preds) * 100, decimals = 2))
    print("Hurr until 2012: ", np.around(mean_absolute_error(tgt_, preds_1), decimals = 2), np.around(np.std(tgt_ - preds_1), decimals=2), np.around(sum(abs(tgt_ - preds_1) > 20)/len(preds) * 100, decimals = 2))
    print(forecast,
              np.around(mean_absolute_error(tgt_, baseline_1), decimals = 2), np.around(np.std(tgt_ - baseline_1), decimals = 2), np.around(sum(abs(tgt_ - baseline_1) > 20)/len(preds) * 100, decimals = 2))
    if forecast2 != None:
        print(forecast2,
                np.around(mean_absolute_error(tgt_, baseline_2), decimals=2),
                np.around(np.std(tgt_ - baseline_2), decimals=2),
                np.around(sum(abs(tgt_ - baseline_2) > 20)/len(preds) * 100, decimals = 2))
        print(forecast3,
                np.around(mean_absolute_error(tgt_, baseline_3), decimals=2),
                np.around(np.std(tgt_ - baseline_3), decimals=2),
                np.around(sum(abs(tgt_ - baseline_3) > 20)/len(preds) * 100, decimals = 2))
        print(forecast4,
                np.around(mean_absolute_error(tgt_, baseline_4), decimals=2),
                np.around(np.std(tgt_ - baseline_4), decimals=2),
                np.around(sum(abs(tgt_ - baseline_4) > 20)/len(preds) * 100, decimals = 2))
        print(forecast5,
                np.around(mean_absolute_error(tgt_, baseline_5), decimals=2),
                np.around(np.std(tgt_ - baseline_5), decimals=2),
                np.around(sum(abs(tgt_ - baseline_5) > 20)/len(preds) * 100, decimals = 2))
        print(forecast6,
                np.around(mean_absolute_error(tgt_, baseline_6), decimals=2),
                np.around(np.std(tgt_ - baseline_6), decimals=2),
                np.around(sum(abs(tgt_ - baseline_6) > 20)/len(preds) * 100, decimals = 2))

    append_dict_intensity_6cast(dict, tgt_, preds, preds_1, baseline_1, baseline_2, baseline_3, baseline_4, baseline_5, baseline_6, year)
        

In [None]:
X_train_baseline = pd.DataFrame(np.load('../data/X_test_stat_1980_34_20_120_forecast_48_2012_v2_w' + str(window_size) + '_at_' + str(predict_at) + '.npy', allow_pickle=True))

In [None]:
#48 EP
{'MAES_2012': [16.65, 17.68, 16.56, 11.43, 19.61, 4.58],
 'MAES_FSSE': [13.57, 11.64, 11.12, 10.34, 12.83, 20.76],
 'MAES_HWRF': [15.53, 14.44, 12.54, 11.16, 17.52, 23.69],
 'MAES_OFCL': [13.61, 11.26, 11.24, 9.75, 12.56, 21.23],
 'MAES_SHIP': [13.46, 12.75, 11.17, 8.35, 12.59, 16.01],
 'MAEs_full': [16.16, 15.76, 16.23, 10.34, 19.29, 5.3],
 'num_samples': [205, 221, 259, 140, 315, 37],
 'std_2012': [20.16, 19.65, 21.14, 13.71, 23.47, 5.77],
 'std_FSSE': [15.51, 15.72, 14.16, 12.57, 15.3, 19.16],
 'std_HWRF': [19.13, 18.8, 15.9, 13.02, 20.54, 19.81],
 'std_OFCL': [16.65, 16.08, 14.69, 11.23, 16.02, 19.8],
 'std_SHIP': [16.03, 15.94, 15.2, 10.66, 16.6, 17.82],
 'std_full': [19.51, 17.97, 20.97, 12.3, 23.67, 6.5],
 'year': [2014, 2015, 2016, 2017, 2018, 2019]}

#48 AN
{'MAES_2012': [11.17, 23.33, 12.2, 16.07, 16.78, 17.89, 17.38, 16.59],
 'MAES_FSSE': [10.53, 16.12, 10.41, 9.23, 10.76, 9.37, 12.62, 10.07],
 'MAES_HWRF': [10.39, 37.12, 10.97, 11.8, 12.48, 16.58, 16.25, 15.48],
 'MAES_OFCL': [7.23, 15.62, 7.62, 9.06, 9.77, 11.26, 11.65, 10.31],
 'MAES_SHIP': [8.2, 14.62, 12.7, 10.04, 11.38, 11.85, 11.81, 12.3],
 'MAEs_full': [11.05, 19.54, 12.24, 15.34, 17.2, 18.07, 17.68, 16.88],
 'num_samples': [37, 4, 61, 101, 200, 200, 200, 179],
 'std_2012': [13.25, 8.36, 14.72, 21.36, 20.42, 21.7, 22.88, 20.23],
 'std_FSSE': [9.93, 9.36, 10.75, 12.95, 14.56, 12.63, 17.08, 12.78],
 'std_HWRF': [12.46, 11.24, 12.74, 15.01, 17.19, 22.44, 20.24, 18.51],
 'std_OFCL': [9.64, 2.07, 9.51, 12.34, 13.33, 14.9, 17.13, 13.26],
 'std_SHIP': [9.99, 11.62, 14.58, 14.63, 16.17, 14.8, 17.3, 16.36],
 'std_full': [12.09, 9.04, 14.51, 20.63, 20.63, 21.95, 23.03, 20.49],
 'year': [2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019]}

# Table intensity AN

In [164]:
from sklearn.feature_selection import SelectFromModel

n = X_test_baseline.shape[0]
X_test_total = X_test_total[-n:]
tgt_intensity_test = tgt_intensity_test[-n:]
X_test = X_test[-n:]
X_test_embed = X_test_embed[-n:]
m = len(X_test_baseline[X_test_baseline['YEAR_0'] < 2017]) 

In [14]:
dict6 = {'year':[], 'num_samples':[], 'MAEs_full':[], 'std_full':[], 'MAES_2012':[], 'std_2012':[], 'MAES_SHIP':[], 'std_SHIP':[], 'MAES_HWRF':[], 'std_HWRF':[], 'MAES_OFCL':[], 'std_OFCL':[], 'MAES_FSSE':[], 'std_FSSE':[], 'MAES_GFSO':[], 'std_GFSO':[], 'MAES_DSHP':[], 'std_DSHP':[]}

In [166]:
tgt_train = np.concatenate((tgt_intensity_train, tgt_intensity_test[:m]), axis = 0)
tgt_train5 = tgt_train*std_+mean_
mean_intensity2 = tgt_train5.mean()
std_intensity2 = tgt_train5.std()
tgt_train5 = (tgt_train5 - mean_intensity2)/std_intensity2
tgt_intensity_test = (tgt_intensity_test*std_+mean_ - mean_intensity2)/std_intensity2
std_ = float(std_intensity2)
mean_ = float(mean_intensity2)

In [None]:
tgt_train = np.concatenate((tgt_intensity_train, tgt_intensity_test[:m]), axis = 0)
xgb_x = XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, learning_rate=learning_rate, subsample=subsample, min_child_weight=min_child_weight)
xgb_x.fit(train_x, tgt_train[:, 0])
xgb_y = XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, learning_rate=learning_rate, subsample=subsample, min_child_weight=min_child_weight)
xgb_y.fit(train_y, tgt_train[:, 1])
DLATS_PRED = np.array(xgb_x.predict(test_x)) * std_dx + mean_dx
DLONS_PRED = np.array(xgb_y.predict(test_y)) * std_dy + mean_dy
LATS_PRED_ = X_test['LAT_7'] + DLATS_PRED
LONS_PRED_ = X_test['LON_7'] + DLONS_PRED
compare_perf_track(basin=basin, forecast=forecast, forecast2 = forecast2, LATS_PRED_=LATS_PRED_, LONS_PRED_=LONS_PRED_)



In [167]:
#FULL TRAIN WITH CNN/GRU EMBEDS + ONLY AN EP in training + only more than. 40kn winds
index_t = X_train.loc[X_train['cat_basin_AN_0'] + X_train['cat_basin_EP_0'] == 1].loc[X_train['WMO_WIND_7'] > 34].index
index_te = X_test[:m].loc[X_test['cat_basin_AN_0'] + X_test['cat_basin_EP_0'] == 1].loc[X_test['WMO_WIND_7'] > 34].index
train = np.concatenate((X_train_total[index_t], X_test_total[index_te]), axis = 0)
tgt_train = np.concatenate((tgt_intensity_train[index_t], tgt_intensity_test[index_te]), axis = 0)

xgb_total = XGBRegressor(max_depth=6, n_estimators=150, learning_rate=0.03, subsample=0.8, min_child_weight=1)
xgb_total.fit(train, tgt_train)
compare_perf_intensity_per_year_6cast(X_test_total, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='AN', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)
compare_perf_intensity_per_year_6cast(X_test_total, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='EP', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)

Total number of steps for comparison:  577
Year 2017 Basin AN MAE intensity | std | busts > 20kn %
Hurr full:  17.63 22.57 37.26
Hurr until 2012:  17.63 22.57 37.26
SHIP 16.15 20.79 31.37
HWRF 12.0 16.24 17.16
OFCL 11.13 15.3 13.34
FSSE 10.73 14.44 14.56
GFSO 14.73 18.39 22.7
DSHP 14.24 18.38 25.3
Consensus ofcl 11.36 15.15 15.42
Consensus Hurr 11.71 15.51 16.29
Total number of steps for comparison:  455
Year 2017 Basin EP MAE intensity | std | busts > 20kn %
Hurr full:  16.2 20.96 30.55
Hurr until 2012:  16.2 20.96 30.55
SHIP 15.56 19.27 29.67
HWRF 11.28 15.48 18.9
OFCL 11.7 15.05 15.16
FSSE 12.06 15.52 21.1
GFSO 17.33 21.75 31.21
DSHP 15.44 19.17 29.23
Consensus ofcl 11.81 15.07 19.34
Consensus Hurr 12.03 15.37 19.56


In [15]:
index_t = X_train.loc[X_train['cat_basin_AN_0'] + X_train['cat_basin_EP_0'] == 1].loc[X_train['WMO_WIND_7'] > 34].index
index_te = X_test[:m].loc[X_test['cat_basin_AN_0'] + X_test['cat_basin_EP_0'] == 1].loc[X_test['WMO_WIND_7'] > 34].index
train = np.concatenate((X_train_total[index_t], X_test_total[index_te]), axis = 0)
tgt_train = np.array(((np.concatenate((tgt_intensity_train[index_t], tgt_intensity_test[index_te]), axis = 0))*float(std_intensity)+float(mean_intensity)-mean_)/std_)
xgb_total = XGBRegressor(max_depth=8, n_estimators=150, learning_rate=0.03, subsample=0.8, min_child_weight=1)
xgb_total.fit(train, tgt_train)
compare_perf_intensity_per_year_6cast(X_test_total, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='AN', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)
compare_perf_intensity_per_year_6cast(X_test_total, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='EP', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)

ValueError: cannot reindex from a duplicate axis

In [37]:
#FULL TRAIN WITH CNN/GRU EMBEDS
index_t = X_train.loc[X_train['WMO_WIND_7'] > 25].index
index_te = X_test[:m].loc[X_test['WMO_WIND_7'] > 25].index
train = np.concatenate((X_train_total[index_t], X_test_total[index_te]), axis = 0)
tgt_train = np.concatenate((tgt_intensity_train[index_t], tgt_intensity_test[index_te]), axis = 0)

xgb_total = XGBRegressor(max_depth=9, n_estimators=150, learning_rate=0.05, subsample=0.8, min_child_weight=5)
#EP 15.4
#xgb_total = XGBRegressor(max_depth=9, n_estimators=150, learning_rate=0.05, subsample=0.8, min_child_weight=5)
#AN 16.87: XGBRegressor(max_depth=9, n_estimators=150, learning_rate=0.03, subsample=0.8, min_child_weight=5)

xgb_total.fit(train, tgt_train)
compare_perf_intensity_per_year_6cast(X_test_total, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='AN', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)
compare_perf_intensity_per_year_6cast(X_test_total, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='EP', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)

Total number of steps for comparison:  577
Year 2017 Basin AN MAE intensity | std | busts > 20kn %
Hurr full:  16.82 21.66 33.8
Hurr until 2012:  16.82 21.66 33.8
SHIP 16.15 20.79 31.37
HWRF 12.0 16.24 17.16
OFCL 11.13 15.3 13.34
FSSE 10.73 14.44 14.56
GFSO 14.73 18.39 22.7
DSHP 14.24 18.38 25.3
Consensus ofcl 11.36 15.15 15.42
Consensus Hurr 11.62 15.4 15.42
Total number of steps for comparison:  455
Year 2017 Basin EP MAE intensity | std | busts > 20kn %
Hurr full:  15.49 20.79 30.11
Hurr until 2012:  15.49 20.79 30.11
SHIP 15.56 19.27 29.67
HWRF 11.28 15.48 18.9
OFCL 11.7 15.05 15.16
FSSE 12.06 15.52 21.1
GFSO 17.33 21.75 31.21
DSHP 15.44 19.17 29.23
Consensus ofcl 11.81 15.07 19.34
Consensus Hurr 11.99 15.36 19.78


In [25]:
m = RandomForestRegressor(max_depth=8, n_estimators=150)
m.fit(train, tgt_train)
compare_perf_intensity_per_year_6cast(X_test_total, dict6, xgb_total = xgb_total, xgb_tot = m, basin='AN', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)
compare_perf_intensity_per_year_6cast(X_test_total, dict6, xgb_total = xgb_total, xgb_tot = m, basin='EP', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)

Total number of steps for comparison:  577
Year 2017 Basin AN MAE intensity | std | busts > 20kn %
Hurr full:  17.05 21.97 35.53
Hurr until 2012:  18.85 23.93 37.26
SHIP 16.15 20.79 31.37
HWRF 12.0 16.24 17.16
OFCL 11.13 15.3 13.34
FSSE 10.73 14.44 14.56
GFSO 14.73 18.39 22.7
DSHP 14.24 18.38 25.3
Consensus ofcl 11.36 15.15 15.42
Consensus Hurr 11.67 15.43 15.6
Total number of steps for comparison:  455
Year 2017 Basin EP MAE intensity | std | busts > 20kn %
Hurr full:  15.8 21.05 30.11
Hurr until 2012:  17.85 22.87 35.82
SHIP 15.56 19.27 29.67
HWRF 11.28 15.48 18.9
OFCL 11.7 15.05 15.16
FSSE 12.06 15.52 21.1
GFSO 17.33 21.75 31.21
DSHP 15.44 19.17 29.23
Consensus ofcl 11.81 15.07 19.34
Consensus Hurr 12.0 15.38 19.78


In [187]:
#FULL TRAIN WITH CNN/GRU EMBEDS
train = np.concatenate((X_train_total, X_test_total[:m]), axis = 0)
#tgt_train = np.concatenate((tgt_intensity_train, tgt_intensity_test[:17094]), axis = 0)

xgb_total = XGBRegressor(max_depth=9, n_estimators=150, learning_rate=0.03, subsample=0.8, min_child_weight=1)
xgb_total.fit(train, tgt_train5)
compare_perf_intensity_per_year_6cast(X_test_total, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='AN', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)
compare_perf_intensity_per_year_6cast(X_test_total, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='EP', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)

TypeError: slice indices must be integers or None or have an __index__ method

In [92]:
compare_perf_intensity_per_year_6cast(X_test_total, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='AN', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)
compare_perf_intensity_per_year_6cast(X_test_total, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='EP', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)

Total number of steps for comparison:  669
Year 2017 Basin AN MAE intensity | std | busts > 20kn %
Hurr full:  10.54 13.79 13.0
Hurr until 2012:  10.54 13.79 13.0
SHIP 11.29 15.51 17.34
HWRF 9.59 12.59 11.21
OFCL 8.55 11.85 7.32
FSSE 8.44 11.29 7.32
GFSO 14.0 15.64 22.42
DSHP 10.28 13.59 14.8
Consensus ofcl 8.44 11.35 8.52
Consensus Hurr 8.33 11.21 8.37
Total number of steps for comparison:  522
Year 2017 Basin EP MAE intensity | std | busts > 20kn %
Hurr full:  11.27 15.24 16.28
Hurr until 2012:  11.27 15.24 16.28
SHIP 11.97 15.22 16.86
HWRF 9.95 14.02 14.18
OFCL 9.55 12.99 7.85
FSSE 9.22 12.38 9.96
GFSO 16.95 19.67 31.99
DSHP 11.85 15.13 16.86
Consensus ofcl 9.39 12.52 10.73
Consensus Hurr 9.35 12.54 10.73


In [121]:
year = 2017
forecast='SHIP'
forecast2 = 'HWRF'
forecast3 = 'OFCL'
forecast4 = 'FSSE'
forecast5 = 'GFSO'
forecast6 = 'DSHP'
mode = 'vmax'
basin = 'EP'
test = X_test_total
if True:
    baseline_ = X_test_baseline.loc[X_test_baseline['YEAR_0'] < year].loc[
            X_test_baseline[forecast6 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast5 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast4 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast3 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast2 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast + '_24_' + mode + '_7'] > -320].loc[X_test_baseline['cat_basin_' + basin + '_0'] == 1]  # .loc[#X_test_baseline['SHIP_24_'+mode+'_7'] > 0].index
    if basin == 'EP':
        baseline_ = X_test_baseline.loc[X_test_baseline['YEAR_0'] < year].loc[
            X_test_baseline[forecast6 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast5 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast4 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast3 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast2 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast + '_24_' + mode + '_7'] > -320].loc[X_test_baseline['cat_basin_' + basin + '_0'] == 1]
    index = baseline_.index
    baseline_1 = baseline_[forecast + '_24_'+ mode + '_7']
    baseline_2 = baseline_[forecast2 + '_24_' + mode + '_7']
    baseline_3 = baseline_[forecast3 + '_24_' + mode + '_7']
    baseline_4 = baseline_[forecast4 + '_24_' + mode + '_7']
    baseline_5 = baseline_[forecast5 + '_24_' + mode + '_7']
    baseline_6 = baseline_[forecast6 + '_24_' + mode + '_7']
    X_test_withBASELINE_total = np.array(test)[index]
    
    tgt_ = np.array(tgt_intensity_test[index] * std_ + mean_)
    preds = xgb_total.predict(X_test_withBASELINE_total) * std_ + mean_
    train_consensus = np.stack((baseline_1, baseline_2, baseline_3, baseline_4, baseline_5, baseline_6), axis = 1)
    train_consensus_hurr = np.stack((baseline_1, baseline_2, baseline_3, baseline_4, baseline_5, baseline_6, preds), axis = 1)
   
if True:
    baseline_ = X_test_baseline.loc[X_test_baseline['YEAR_0'] >= year].loc[
            X_test_baseline[forecast6 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast5 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast4 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast3 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast2 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast + '_24_' + mode + '_7'] > -320].loc[X_test_baseline['cat_basin_' + basin + '_0'] == 1]  # .loc[#X_test_baseline['SHIP_24_'+mode+'_7'] > 0].index
    if basin == 'EP':
        baseline_ = X_test_baseline.loc[X_test_baseline['YEAR_0'] < 2019].loc[X_test_baseline['YEAR_0'] >= year].loc[
            X_test_baseline[forecast6 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast5 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast4 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast3 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast2 + '_24_' + mode + '_7'] > -320].loc[
            X_test_baseline[forecast + '_24_' + mode + '_7'] > -320].loc[X_test_baseline['cat_basin_' + basin + '_0'] == 1]
    index = baseline_.index
    baseline_1 = baseline_[forecast + '_24_'+ mode + '_7']
    baseline_2 = baseline_[forecast2 + '_24_' + mode + '_7']
    baseline_3 = baseline_[forecast3 + '_24_' + mode + '_7']
    baseline_4 = baseline_[forecast4 + '_24_' + mode + '_7']
    baseline_5 = baseline_[forecast5 + '_24_' + mode + '_7']
    baseline_6 = baseline_[forecast6 + '_24_' + mode + '_7']
    X_test_withBASELINE_total = np.array(test)[index]
    
    tgt_test = np.array(tgt_intensity_test[index] * std_ + mean_)
    preds = xgb_total.predict(X_test_withBASELINE_total) * std_ + mean_
    test_consensus = np.stack((baseline_1, baseline_2, baseline_3, baseline_4, baseline_5, baseline_6), axis = 1)
    test_consensus_hurr = np.stack((baseline_1, baseline_2, baseline_3, baseline_4, baseline_5, baseline_6, preds), axis = 1)
        

In [22]:
from sklearn.linear_model import Lasso

In [21]:
from sklearn.linear_model import Ridge

In [39]:
#AN
m = Lasso(alpha = 10)
m.fit(train_consensus, tgt_)
preds = np.array(m.predict(test_consensus))

print("consensus without Hurricast",
                np.around(mean_absolute_error(np.array(tgt_test), preds), decimals=2),
                np.around(np.std(tgt_test - preds), decimals=2),
                np.around(sum(abs(tgt_test - preds) > 20)/len(preds) * 100, decimals = 2))print(m.coef_)

m = Lasso(alpha = 3)
m.fit(train_consensus_hurr, tgt_)
preds = np.array(m.predict(test_consensus_hurr))


print("consensus with Hurricast",
                np.around(mean_absolute_error(np.array(tgt_test), preds), decimals=2),
                np.around(np.std(tgt_test - preds), decimals=2),
                np.around(sum(abs(tgt_test - preds) > 20)/len(preds) * 100, decimals = 2))print(m.coef_)

SyntaxError: invalid syntax (<ipython-input-39-c169a7e7bf3e>, line 9)

In [115]:
#AN
m = Lasso(alpha = 30)
m.fit(train_consensus, tgt_)
preds = np.array(m.predict(test_consensus))

print("consensus without Hurricast",
                np.around(mean_absolute_error(np.array(tgt_test), preds), decimals=2),
                np.around(np.std(tgt_test - preds), decimals=2),
                np.around(sum(abs(tgt_test - preds) > 20)/len(preds) * 100, decimals = 2))

m2 = Lasso(alpha = 30)
m2.fit(train_consensus_hurr, tgt_)
preds = np.array(m2.predict(test_consensus_hurr))


print("consensus with Hurricast",
                np.around(mean_absolute_error(np.array(tgt_test), preds), decimals=2),
                np.around(np.std(tgt_test - preds), decimals=2),
                np.around(sum(abs(tgt_test - preds) > 20)/len(preds) * 100, decimals = 2))
print(m2.coef_)

consensus without Hurricast 10.62 14.51 14.38
consensus with Hurricast 10.62 14.51 14.38
[0.         0.223805   0.7595393  0.         0.12981753 0.
 0.        ]


In [135]:
#EP
m = Lasso(alpha = 50)
m.fit(train_consensus, tgt_)
#print("MAE intensity: ", mean_absolute_error(np.array(tgt_test), np.array(m.predict(test_consensus))))
#print(m.coef_)
preds = np.array(m.predict(test_consensus))
print("consensus without Hurricast",
                np.around(mean_absolute_error(np.array(tgt_test), preds), decimals=2),
                np.around(np.std(tgt_test - preds), decimals=2),
                np.around(sum(abs(tgt_test - preds) > 20)/len(preds) * 100, decimals = 2))

m = Lasso(alpha = 50)
m.fit(train_consensus_hurr, tgt_)
#print("MAE intensity: ", mean_absolute_error(np.array(tgt_test), np.array(m.predict(test_consensus_hurr))))
#print(m.coef_)
preds = np.array(m.predict(test_consensus_hurr))
print("consensus with Hurricast",
                np.around(mean_absolute_error(np.array(tgt_test), preds), decimals=2),
                np.around(np.std(tgt_test - preds), decimals=2),
                np.around(sum(abs(tgt_test - preds) > 20)/len(preds) * 100, decimals = 2))

consensus without Hurricast 10.15 13.71 16.92
consensus with Hurricast 10.15 13.71 16.92


In [46]:
xgb_total = RandomForestRegressor(max_depth=8, n_estimators=250, criterion = 'mae')
xgb_total.fit(train, tgt_train5)
compare_perf_intensity_per_year_6cast(X_test_total, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='AN', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)
compare_perf_intensity_per_year_6cast(X_test_total, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='EP', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)

KeyboardInterrupt: 

In [39]:
compare_perf_intensity_per_year_6cast(X_test_total, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='EP', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFDL', forecast6 = 'DSHP', year = 2017)

Total number of steps for comparison:  42
Year 2017 Basin EP MAE intensity | std | busts > 20kn %
Hurr full:  11.55 14.03 16.67
Hurr until 2012:  11.55 14.03 16.67
SHIP 10.74 12.94 11.9
HWRF 7.74 10.23 14.29
OFCL 8.45 10.79 2.38
FSSE 6.64 8.01 0.0
GFDL 17.38 18.83 33.33
DSHP 10.74 12.94 11.9


In [102]:
#TRAIN UNTIL 2012
#AN
#xgb_total = XGBRegressor(max_depth=8, n_estimators=150, learning_rate=0.07, subsample=0.8, min_child_weight=5, colsample_bytree = 0.7)
#EP
#xgb_total = XGBRegressor(max_depth=8, n_estimators=150, learning_rate=0.05, subsample=0.7, min_child_weight=5, colsample_bytree = 0.7)
xgb_total = XGBRegressor(max_depth=8, n_estimators=150, learning_rate=0.07, subsample=0.8, min_child_weight=5, colsample_bytree = 0.7)
xgb_total.fit(X_train_total, tgt_intensity_train)
compare_perf_intensity_per_year_6cast(X_test_total, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='AN', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)
compare_perf_intensity_per_year_6cast(X_test_total, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='EP', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)

Total number of steps for comparison:  577
Year 2017 Basin AN MAE intensity | std | busts > 20kn %
Hurr full:  17.41 22.0 36.05
Hurr until 2012:  17.41 22.0 36.05
SHIP 16.15 20.79 31.37
HWRF 12.0 16.24 17.16
OFCL 11.13 15.3 13.34
FSSE 10.73 14.44 14.56
GFSO 14.73 18.39 22.7
DSHP 14.24 18.38 25.3
Consensus ofcl 11.36 15.15 15.42
Consensus Hurr 11.72 15.45 16.29
Total number of steps for comparison:  455
Year 2017 Basin EP MAE intensity | std | busts > 20kn %
Hurr full:  17.21 22.03 31.65
Hurr until 2012:  17.21 22.03 31.65
SHIP 15.56 19.27 29.67
HWRF 11.28 15.48 18.9
OFCL 11.7 15.05 15.16
FSSE 12.06 15.52 21.1
GFSO 17.33 21.75 31.21
DSHP 15.44 19.17 29.23
Consensus ofcl 11.81 15.07 19.34
Consensus Hurr 12.09 15.46 19.78


In [142]:
#TABULAR ONLY
#train2 = np.concatenate((X_train, X_test[:m]), axis = 0)
#tgt_train = np.concatenate((tgt_intensity_train, tgt_intensity_test[:17094]), axis = 0)
xgb_total = XGBRegressor(max_depth=7, n_estimators=120, learning_rate=0.07, subsample=0.7, min_child_weight=5)
xgb_total.fit(np.array(X_train), tgt_intensity_train)
compare_perf_intensity_per_year_6cast(np.array(X_test), dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='AN', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)
compare_perf_intensity_per_year_6cast(np.array(X_test), dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='EP', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)

Total number of steps for comparison:  577
Year 2017 Basin AN MAE intensity | std | busts > 20kn %
Hurr full:  17.41 21.86 36.74
Hurr until 2012:  17.41 21.86 36.74
SHIP 16.15 20.79 31.37
HWRF 12.0 16.24 17.16
OFCL 11.13 15.3 13.34
FSSE 10.73 14.44 14.56
GFSO 14.73 18.39 22.7
DSHP 14.24 18.38 25.3
Consensus ofcl 11.36 15.15 15.42
Consensus Hurr 11.77 15.43 16.12
Total number of steps for comparison:  455
Year 2017 Basin EP MAE intensity | std | busts > 20kn %
Hurr full:  16.58 21.59 31.21
Hurr until 2012:  16.58 21.59 31.21
SHIP 15.56 19.27 29.67
HWRF 11.28 15.48 18.9
OFCL 11.7 15.05 15.16
FSSE 12.06 15.52 21.1
GFSO 17.33 21.75 31.21
DSHP 15.44 19.17 29.23
Consensus ofcl 11.81 15.07 19.34
Consensus Hurr 12.06 15.43 19.78


In [147]:
X_train.shape

(104651, 128)

In [150]:
#EMBED ONLY
#train2 = np.concatenate((X_train_embed, X_test_embed[:m]), axis = 0)
#tgt_train = np.concatenate((tgt_intensity_train, tgt_intensity_test[:17094]), axis = 0)
xgb_total = XGBRegressor(max_depth=8, n_estimators=150, learning_rate=0.07, subsample=0.7, min_child_weight=5)
xgb_total.fit(np.array(X_train_embed), tgt_intensity_train)
compare_perf_intensity_per_year_6cast(np.array(X_test_embed), dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='AN', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)
compare_perf_intensity_per_year_6cast(np.array(X_test_embed), dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='EP', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)

Total number of steps for comparison:  577
Year 2017 Basin AN MAE intensity | std | busts > 20kn %
Hurr full:  21.03 26.67 40.38
Hurr until 2012:  21.03 26.67 40.38
SHIP 16.15 20.79 31.37
HWRF 12.0 16.24 17.16
OFCL 11.13 15.3 13.34
FSSE 10.73 14.44 14.56
GFSO 14.73 18.39 22.7
DSHP 14.24 18.38 25.3
Consensus ofcl 11.36 15.15 15.42
Consensus Hurr 11.91 15.81 16.64
Total number of steps for comparison:  455
Year 2017 Basin EP MAE intensity | std | busts > 20kn %
Hurr full:  20.17 24.91 45.05
Hurr until 2012:  20.17 24.91 45.05
SHIP 15.56 19.27 29.67
HWRF 11.28 15.48 18.9
OFCL 11.7 15.05 15.16
FSSE 12.06 15.52 21.1
GFSO 17.33 21.75 31.21
DSHP 15.44 19.17 29.23
Consensus ofcl 11.81 15.07 19.34
Consensus Hurr 12.35 15.7 21.32


In [165]:
#USING OFCL
train = np.concatenate((X_test_baseline[:m], X_test_embed[:m]), axis = 1)
tgt_train3 = tgt_intensity_test[:m]
test = np.concatenate((X_test_baseline, X_test_embed), axis = 1)
xgb_total = XGBRegressor(max_depth=4, n_estimators=300, learning_rate=0.03, subsample=0.8, min_child_weight=1)
xgb_total.fit(train, tgt_train3)
compare_perf_intensity_per_year_6cast(test, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='AN', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)
compare_perf_intensity_per_year_6cast(test, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='EP', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)

Total number of steps for comparison:  577
Year 2017 Basin AN MAE intensity | std | busts > 20kn %
Hurr full:  12.91 17.79 19.93
Hurr until 2012:  12.91 17.79 19.93
SHIP 16.15 20.79 31.37
HWRF 12.0 16.24 17.16
OFCL 11.13 15.3 13.34
FSSE 10.73 14.44 14.56
GFSO 14.73 18.39 22.7
DSHP 14.24 18.38 25.3
Consensus ofcl 11.36 15.15 15.42
Consensus Hurr 11.34 15.15 14.9
Total number of steps for comparison:  455
Year 2017 Basin EP MAE intensity | std | busts > 20kn %
Hurr full:  11.94 15.01 18.46
Hurr until 2012:  11.94 15.01 18.46
SHIP 15.56 19.27 29.67
HWRF 11.28 15.48 18.9
OFCL 11.7 15.05 15.16
FSSE 12.06 15.52 21.1
GFSO 17.33 21.75 31.21
DSHP 15.44 19.17 29.23
Consensus ofcl 11.81 15.07 19.34
Consensus Hurr 11.49 14.79 16.7


In [167]:
#OFCL ONLY
#train = np.array(X_test_baseline[:17094])
#tgt_train3 = tgt_intensity_test[:17094]
train = X_test_baseline[:m].loc[X_test_baseline['cat_basin_AN_0'] + X_test_baseline['cat_basin_EP_0'] == 1]
tgt_train3 = tgt_intensity_test[train.index]
xgb_total = XGBRegressor(max_depth=8, n_estimators=300, learning_rate=0.03, subsample=0.7, min_child_weight=1)
xgb_total.fit(np.array(train), tgt_train3)
#xgb_total = XGBRegressor(max_depth=7, n_estimators=300, learning_rate=0.03, subsample=0.55, min_child_weight=1)
#xgb_total.fit(np.array(train), tgt_train3)
compare_perf_intensity_per_year_6cast(X_test_baseline, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='AN', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)
compare_perf_intensity_per_year_6cast(X_test_baseline, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='EP', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)
#compare_perf_intensity_per_year_4cast(X_test_baseline, dict = {'year':[], 'num_samples':[], 'MAEs_full':[], 'std_full':[], 'MAES_2012':[], 'std_2012':[], 'MAES_SHIP':[], 'std_SHIP':[], 'MAES_HWRF':[], 'std_HWRF':[], 'MAES_OFCL':[], 'std_OFCL':[], 'MAES_FSSE':[], 'std_FSSE':[]}, xgb_total = xgb_total, xgb_tot = xgb_total, basin='AN', forecast='SHIP', mode='vmax', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', year = 2017)
#xgb_total = XGBRegressor(max_depth=9, n_estimators=150, learning_rate=0.05, subsample=0.7, min_child_weight=7)
#xgb_total.fit(train, tgt_train3)
#compare_perf_intensity_per_year_4cast(X_test_baseline, dict = {'year':[], 'num_samples':[], 'MAEs_full':[], 'std_full':[], 'MAES_2012':[], 'std_2012':[], 'MAES_SHIP':[], 'std_SHIP':[], 'MAES_HWRF':[], 'std_HWRF':[], 'MAES_OFCL':[], 'std_OFCL':[], 'MAES_FSSE':[], 'std_FSSE':[]}, xgb_total = xgb_total, xgb_tot = xgb_total, basin='EP', forecast='SHIP', mode='vmax', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', year = 2017)

Total number of steps for comparison:  577
Year 2017 Basin AN MAE intensity | std | busts > 20kn %
Hurr full:  11.81 15.84 17.5
Hurr until 2012:  11.81 15.84 17.5
SHIP 16.15 20.79 31.37
HWRF 12.0 16.24 17.16
OFCL 11.13 15.3 13.34
FSSE 10.73 14.44 14.56
GFSO 14.73 18.39 22.7
DSHP 14.24 18.38 25.3
Consensus ofcl 11.36 15.15 15.42
Consensus Hurr 11.11 14.86 14.9
Total number of steps for comparison:  455
Year 2017 Basin EP MAE intensity | std | busts > 20kn %
Hurr full:  11.66 14.4 18.46
Hurr until 2012:  11.66 14.4 18.46
SHIP 15.56 19.27 29.67
HWRF 11.28 15.48 18.9
OFCL 11.7 15.05 15.16
FSSE 12.06 15.52 21.1
GFSO 17.33 21.75 31.21
DSHP 15.44 19.17 29.23
Consensus ofcl 11.81 15.07 19.34
Consensus Hurr 11.39 14.62 16.7


In [168]:
#OFCL with AN EP ONLY
train_0 = X_test_baseline[:m].loc[X_test_baseline['cat_basin_AN_0'] + X_test_baseline['cat_basin_EP_0'] == 1]
train = np.concatenate((train_0, X_test_total[train_0.index]), axis = 1)
tgt_train3 = tgt_intensity_test[train_0.index]
test = np.concatenate((X_test_baseline, X_test_total), axis = 1)
#xgb_total = XGBRegressor(max_depth=5, n_estimators=150, learning_rate=0.05, subsample=0.8, min_child_weight=1, colsample_bytree = 0.7)
#xgb_total.fit(train, tgt_train3)
xgb_total = XGBRegressor(max_depth=7, n_estimators=300, learning_rate=0.03, subsample=0.55, min_child_weight=1)
xgb_total.fit(np.array(train), tgt_train3)
compare_perf_intensity_per_year_6cast(test, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='AN', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)
compare_perf_intensity_per_year_6cast(test, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='EP', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)

Total number of steps for comparison:  577
Year 2017 Basin AN MAE intensity | std | busts > 20kn %
Hurr full:  12.13 16.64 16.29
Hurr until 2012:  12.13 16.64 16.29
SHIP 16.15 20.79 31.37
HWRF 12.0 16.24 17.16
OFCL 11.13 15.3 13.34
FSSE 10.73 14.44 14.56
GFSO 14.73 18.39 22.7
DSHP 14.24 18.38 25.3
Consensus ofcl 11.36 15.15 15.42
Consensus Hurr 11.23 15.03 14.9
Total number of steps for comparison:  455
Year 2017 Basin EP MAE intensity | std | busts > 20kn %
Hurr full:  12.02 15.09 18.68
Hurr until 2012:  12.02 15.09 18.68
SHIP 15.56 19.27 29.67
HWRF 11.28 15.48 18.9
OFCL 11.7 15.05 15.16
FSSE 12.06 15.52 21.1
GFSO 17.33 21.75 31.21
DSHP 15.44 19.17 29.23
Consensus ofcl 11.81 15.07 19.34
Consensus Hurr 11.53 14.77 17.8


In [169]:
#OFCL with AN EP ONLY
train_0 = X_test_baseline[:m].loc[X_test_baseline['cat_basin_AN_0'] + X_test_baseline['cat_basin_EP_0'] == 1]
train = np.concatenate((train_0, X_test_total[train_0.index]), axis = 1)
tgt_train3 = tgt_intensity_test[train_0.index]
test = np.concatenate((X_test_baseline, X_test_total), axis = 1)
#xgb_total = XGBRegressor(max_depth=5, n_estimators=150, learning_rate=0.05, subsample=0.8, min_child_weight=1, colsample_bytree = 0.7)
#xgb_total.fit(train, tgt_train3)
xgb_total = XGBRegressor(max_depth=6, n_estimators=200, learning_rate=0.03, subsample=0.8, min_child_weight=3, colsample_bytree = 1)
xgb_total.fit(train, tgt_train3)
compare_perf_intensity_per_year_6cast(test, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='AN', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)
#xgb_total = XGBRegressor(max_depth=7, n_estimators=200, learning_rate=0.03, subsample=0.8, min_child_weight=3, colsample_bytree = 1)
#xgb_total.fit(train, tgt_train3)
#reached 9.1§
compare_perf_intensity_per_year_6cast(test, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='EP', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)

Total number of steps for comparison:  577
Year 2017 Basin AN MAE intensity | std | busts > 20kn %
Hurr full:  12.12 16.46 17.16
Hurr until 2012:  12.12 16.46 17.16
SHIP 16.15 20.79 31.37
HWRF 12.0 16.24 17.16
OFCL 11.13 15.3 13.34
FSSE 10.73 14.44 14.56
GFSO 14.73 18.39 22.7
DSHP 14.24 18.38 25.3
Consensus ofcl 11.36 15.15 15.42
Consensus Hurr 11.22 15.0 14.56
Total number of steps for comparison:  455
Year 2017 Basin EP MAE intensity | std | busts > 20kn %
Hurr full:  11.31 14.52 16.48
Hurr until 2012:  11.31 14.52 16.48
SHIP 15.56 19.27 29.67
HWRF 11.28 15.48 18.9
OFCL 11.7 15.05 15.16
FSSE 12.06 15.52 21.1
GFSO 17.33 21.75 31.21
DSHP 15.44 19.17 29.23
Consensus ofcl 11.81 15.07 19.34
Consensus Hurr 11.47 14.72 17.36


In [170]:
#OFCL with AN EP ONLY
train_0 = X_test_baseline[:m].loc[X_test_baseline['cat_basin_AN_0'] + X_test_baseline['cat_basin_EP_0'] == 1]
train = np.concatenate((train_0, X_test_embed[train_0.index]), axis = 1)
tgt_train3 = tgt_intensity_test[train_0.index]
test = np.concatenate((X_test_baseline, X_test_embed), axis = 1)
xgb_total = XGBRegressor(max_depth=6, n_estimators=300, learning_rate=0.03, subsample=0.55, min_child_weight=1)
xgb_total.fit(np.array(train), tgt_train3)
#xgb_total = XGBRegressor(max_depth=6, n_estimators=150, learning_rate=0.03, subsample=0.8, min_child_weight=2)
#xgb_total.fit(train, tgt_train3)
compare_perf_intensity_per_year_6cast(test, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='AN', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)
compare_perf_intensity_per_year_6cast(test, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='EP', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)

Total number of steps for comparison:  577
Year 2017 Basin AN MAE intensity | std | busts > 20kn %
Hurr full:  12.18 16.7 17.33
Hurr until 2012:  12.18 16.7 17.33
SHIP 16.15 20.79 31.37
HWRF 12.0 16.24 17.16
OFCL 11.13 15.3 13.34
FSSE 10.73 14.44 14.56
GFSO 14.73 18.39 22.7
DSHP 14.24 18.38 25.3
Consensus ofcl 11.36 15.15 15.42
Consensus Hurr 11.23 15.02 14.56
Total number of steps for comparison:  455
Year 2017 Basin EP MAE intensity | std | busts > 20kn %
Hurr full:  12.12 15.25 20.0
Hurr until 2012:  12.12 15.25 20.0
SHIP 15.56 19.27 29.67
HWRF 11.28 15.48 18.9
OFCL 11.7 15.05 15.16
FSSE 12.06 15.52 21.1
GFSO 17.33 21.75 31.21
DSHP 15.44 19.17 29.23
Consensus ofcl 11.81 15.07 19.34
Consensus Hurr 11.56 14.82 18.24


In [100]:
train.shape

(97493, 640)

In [184]:
#problem with NaN
#xgb2 = XGBRegressor(max_depth=8, n_estimators=140, learning_rate = 0.07, subsample = 0.7, min_child_weight = 5)
#xgb2.fit(train, tgt_train3)
m = Lasso(alpha = 0.001)
m.fit(train_scaled, tgt_train5)
select = SelectFromModel(m, threshold = '0.25*mean', prefit=True)
X_train_sparse = select.transform(train_scaled)
X_test_sparse = select.transform(test_scaled)
print("Number of features kept", X_train_sparse.shape[1])
xgb_sparse2 = XGBRegressor(max_depth=8, n_estimators=150, learning_rate=0.05, subsample=0.7, min_child_weight=1, colsample_bytree = 0.7)
xgb_sparse2.fit(X_train_sparse, tgt_train5)
compare_perf_intensity_per_year_6cast(X_test_sparse, dict6, xgb_total = xgb_sparse2, xgb_tot = xgb_sparse2, basin='AN', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)
compare_perf_intensity_per_year_6cast(X_test_sparse, dict6, xgb_total = xgb_sparse2, xgb_tot = xgb_sparse2, basin='EP', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)

Number of features kept 196
Total number of steps for comparison:  577
Year 2017 Basin AN MAE intensity | std | busts > 20kn %
Hurr full:  17.44 22.17 34.49
Hurr until 2012:  17.44 22.17 34.49
SHIP 16.15 20.79 31.37
HWRF 12.0 16.24 17.16
OFCL 11.13 15.3 13.34
FSSE 10.73 14.44 14.56
GFSO 14.73 18.39 22.7
DSHP 14.24 18.38 25.3
Consensus ofcl 11.36 15.15 15.42
Consensus Hurr 11.7 15.45 16.12
Total number of steps for comparison:  455
Year 2017 Basin EP MAE intensity | std | busts > 20kn %
Hurr full:  16.62 21.67 31.87
Hurr until 2012:  16.62 21.67 31.87
SHIP 15.56 19.27 29.67
HWRF 11.28 15.48 18.9
OFCL 11.7 15.05 15.16
FSSE 12.06 15.52 21.1
GFSO 17.33 21.75 31.21
DSHP 15.44 19.17 29.23
Consensus ofcl 11.81 15.07 19.34
Consensus Hurr 12.06 15.47 20.0


In [181]:
select = SelectFromModel(m, threshold = '0.5mean', prefit=True)

In [170]:
from sklearn import preprocessing
scaler = preprocessing.StandardScaler().fit(train)
train_scaled = scaler.transform(train)
test_scaled = scaler.transform(X_test_total)

In [186]:
#FULL TRAIN WITH CNN/GRU EMBEDS
#train = np.concatenate((X_train_total, X_test_total[:m]), axis = 0)
#tgt_train = np.concatenate((tgt_intensity_train, tgt_intensity_test[:17094]), axis = 0)

xgb_total = XGBRegressor(max_depth=8, n_estimators=150, learning_rate=0.03, subsample=0.8, min_child_weight=5)
xgb_total.fit(train_scaled, tgt_train5)
compare_perf_intensity_per_year_6cast(test_scaled, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='AN', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)
compare_perf_intensity_per_year_6cast(test_scaled, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='EP', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)

Total number of steps for comparison:  577
Year 2017 Basin AN MAE intensity | std | busts > 20kn %
Hurr full:  17.37 22.23 35.7
Hurr until 2012:  17.37 22.23 35.7
SHIP 16.15 20.79 31.37
HWRF 12.0 16.24 17.16
OFCL 11.13 15.3 13.34
FSSE 10.73 14.44 14.56
GFSO 14.73 18.39 22.7
DSHP 14.24 18.38 25.3
Consensus ofcl 11.36 15.15 15.42
Consensus Hurr 11.69 15.48 15.77
Total number of steps for comparison:  455
Year 2017 Basin EP MAE intensity | std | busts > 20kn %
Hurr full:  16.9 21.81 32.09
Hurr until 2012:  16.9 21.81 32.09
SHIP 15.56 19.27 29.67
HWRF 11.28 15.48 18.9
OFCL 11.7 15.05 15.16
FSSE 12.06 15.52 21.1
GFSO 17.33 21.75 31.21
DSHP 15.44 19.17 29.23
Consensus ofcl 11.81 15.07 19.34
Consensus Hurr 12.14 15.53 20.22


In [185]:
#FULL TRAIN WITH CNN/GRU EMBEDS
train = np.concatenate((np.array(X_test_baseline[:m]), X_test_embed[:m]), axis = 1)
tgt_train3 = tgt_intensity_test[:m]
test = np.concatenate((X_test_baseline, X_test_embed), axis = 1)
xgb_total = XGBRegressor(max_depth=9, n_estimators=150, learning_rate=0.05, subsample=0.7, min_child_weight=7)
xgb_total.fit(train, tgt_train3)
compare_perf_intensity_per_year_6cast(test, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='AN', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)
compare_perf_intensity_per_year_6cast(test, dict6, xgb_total = xgb_total, xgb_tot = xgb_total, basin='EP', forecast='SHIP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP', year = 2017)

TypeError: cannot do slice indexing on <class 'pandas.core.indexes.range.RangeIndex'> with these indexers [Lasso(alpha=0.001, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)] of <class 'sklearn.linear_model.coordinate_descent.Lasso'>

In [32]:
train_xgb_intensity_all_years_full_train_4cast(forecast = 'SHIP', basin_only = False, sparse = False, max_depth = 8, n_estimators = 100, learning_rate = 0.07, subsample = 0.7, min_child_weight=5, basin = 'AN', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE')

Total number of steps for comparison:  41
Year  2012  MAE intensity basin AN Hurricast trained full:  9.13 with std  11.62
Year  2012  MAE intensity basin AN Hurricast trained until 2012:  9.19 with std  11.32
Year  2012  MAE intensity basin AN Official Forecast SHIP :  9.56 with std  10.79
Year  2012  MAE intensity basin AN Official Forecast HWRF :  8.1 with std  11.14
Year  2012  MAE intensity basin AN Official Forecast OFCL :  9.02 with std  11.36
Year  2012  MAE intensity basin AN Official Forecast FSSE :  10.37 with std  11.62


Total number of steps for comparison:  8
Year  2013  MAE intensity basin AN Hurricast trained full:  12.06 with std  11.75
Year  2013  MAE intensity basin AN Hurricast trained until 2012:  11.28 with std  10.57
Year  2013  MAE intensity basin AN Official Forecast SHIP :  19.31 with std  12.82
Year  2013  MAE intensity basin AN Official Forecast HWRF :  9.31 with std  9.22
Year  2013  MAE intensity basin AN Official Forecast OFCL :  14.06 with std  5.44
Yea

{'MAES_2012': [9.19, 11.28, 6.36, 8.48, 11.36, 12.42, 9.89, 10.52],
 'MAES_FSSE': [10.37, 8.56, 8.16, 8.0, 8.87, 7.72, 8.82, 8.73],
 'MAES_HWRF': [8.1, 9.31, 12.13, 9.44, 10.11, 10.05, 8.8, 9.89],
 'MAES_OFCL': [9.02, 14.06, 6.7, 7.93, 8.42, 8.59, 8.53, 8.55],
 'MAES_SHIP': [9.56, 19.31, 7.02, 9.52, 10.38, 12.32, 10.79, 10.89],
 'MAEs_full': [9.13, 12.06, 6.73, 8.29, 11.42, 12.37, 9.78, 10.36],
 'num_samples': [41, 8, 72, 122, 231, 228, 236, 208],
 'std_2012': [11.32, 10.57, 8.0, 11.76, 14.99, 15.8, 12.82, 13.46],
 'std_FSSE': [11.62, 8.28, 8.09, 11.84, 12.22, 10.07, 12.16, 11.42],
 'std_HWRF': [11.14, 9.22, 13.58, 13.77, 14.34, 12.65, 12.37, 12.68],
 'std_OFCL': [11.36, 5.44, 8.39, 11.75, 11.4, 11.83, 12.1, 11.53],
 'std_SHIP': [10.79, 12.82, 8.36, 12.95, 14.57, 17.27, 14.97, 14.22],
 'std_full': [11.62, 11.75, 8.32, 11.25, 14.84, 15.73, 12.7, 13.38],
 'year': [2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019]}

In [88]:
train_xgb_intensity_all_years_full_train_6cast(forecast = 'SHIP', max_depth = 8, n_estimators = 150, learning_rate = 0.07, subsample = 0.7, min_child_weight=5, basin = 'AN', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP')

Total number of steps for comparison:  0
Year 2012 Basin AN MAE intensity | std | busts > 20kn %

 No forecasts for year  2012
Total number of steps for comparison:  0
Year 2013 Basin AN MAE intensity | std | busts > 20kn %

 No forecasts for year  2013
Total number of steps for comparison:  0
Year 2014 Basin AN MAE intensity | std | busts > 20kn %

 No forecasts for year  2014
Total number of steps for comparison:  113
Year 2015 Basin AN MAE intensity | std | busts > 20kn %
Hurr full:  8.25 11.46 11.5
Hurr until 2012:  8.48 11.83 11.5
SHIP 9.88 13.33 11.5
HWRF 9.86 14.21 9.73
OFCL 8.5 12.2 8.85
FSSE 8.35 12.25 10.62
GFSO 13.36 16.04 22.12
DSHP 9.87 13.33 11.5


Total number of steps for comparison:  230
Year 2016 Basin AN MAE intensity | std | busts > 20kn %
Hurr full:  11.49 15.0 16.09
Hurr until 2012:  11.41 15.03 15.65
SHIP 10.36 14.57 11.74
HWRF 10.06 14.32 13.48
OFCL 8.37 11.36 4.78
FSSE 8.83 12.2 7.83
GFSO 14.74 18.35 23.48
DSHP 10.1 13.78 10.87


Total number of steps for compa

{'MAES_2012': [8.48, 11.41, 12.3, 9.82, 10.42],
 'MAES_DSHP': [9.87, 10.1, 10.02, 10.24, 10.6],
 'MAES_FSSE': [8.35, 8.83, 7.74, 8.87, 8.73],
 'MAES_GFSO': [13.36, 14.74, 17.29, 11.21, 13.54],
 'MAES_HWRF': [9.86, 10.06, 10.06, 8.85, 9.89],
 'MAES_OFCL': [8.5, 8.37, 8.56, 8.56, 8.55],
 'MAES_SHIP': [9.88, 10.36, 12.13, 10.84, 10.89],
 'MAEs_full': [8.25, 11.49, 12.17, 9.8, 10.35],
 'num_samples': [113, 230, 227, 234, 208],
 'std_2012': [11.83, 15.03, 15.56, 12.8, 13.34],
 'std_DSHP': [13.33, 13.78, 13.38, 13.42, 13.9],
 'std_FSSE': [12.25, 12.2, 10.09, 12.21, 11.42],
 'std_GFSO': [16.04, 18.35, 17.77, 12.87, 15.47],
 'std_HWRF': [14.21, 14.32, 12.67, 12.42, 12.68],
 'std_OFCL': [12.2, 11.36, 11.82, 12.13, 11.53],
 'std_SHIP': [13.33, 14.57, 16.94, 15.02, 14.22],
 'std_full': [11.46, 15.0, 15.41, 12.75, 13.28],
 'year': [2015, 2016, 2017, 2018, 2019]}

In [89]:
train_xgb_intensity_all_years_full_train_6cast(forecast = 'SHIP', max_depth = 8, n_estimators = 150, learning_rate = 0.07, subsample = 0.7, min_child_weight=5, basin = 'EP', forecast2 = 'HWRF', forecast3 = 'OFCL', forecast4 = 'FSSE', forecast5 = 'GFSO', forecast6 = 'DSHP')

Total number of steps for comparison:  0
Year 2012 Basin EP MAE intensity | std | busts > 20kn %

 No forecasts for year  2012
Total number of steps for comparison:  0
Year 2013 Basin EP MAE intensity | std | busts > 20kn %

 No forecasts for year  2013
Total number of steps for comparison:  0
Year 2014 Basin EP MAE intensity | std | busts > 20kn %

 No forecasts for year  2014
Total number of steps for comparison:  255
Year 2015 Basin EP MAE intensity | std | busts > 20kn %
Hurr full:  11.65 16.07 17.65
Hurr until 2012:  11.83 16.22 17.65
SHIP 12.75 17.89 17.65
HWRF 11.79 17.29 16.86
OFCL 10.42 15.35 11.37
FSSE 9.93 13.96 11.76
GFSO 15.62 20.14 25.49
DSHP 12.41 16.99 17.65


Total number of steps for comparison:  315
Year 2016 Basin EP MAE intensity | std | busts > 20kn %
Hurr full:  9.64 13.24 13.97
Hurr until 2012:  9.95 13.44 14.29
SHIP 9.87 12.84 10.79
HWRF 10.38 13.74 11.75
OFCL 8.72 11.89 6.67
FSSE 8.51 11.53 8.57
GFSO 13.02 16.38 21.27
DSHP 9.69 12.58 10.16


Total number of st

{'MAES_2012': [11.83, 9.95, 9.1, 12.4],
 'MAES_DSHP': [12.41, 9.69, 9.49, 12.96],
 'MAES_FSSE': [9.93, 8.51, 7.92, 9.83],
 'MAES_GFSO': [15.62, 13.02, 11.93, 19.32],
 'MAES_HWRF': [11.79, 10.38, 7.87, 10.94],
 'MAES_OFCL': [10.42, 8.72, 8.14, 10.22],
 'MAES_SHIP': [12.75, 9.87, 9.97, 12.91],
 'MAEs_full': [11.65, 9.64, 9.02, 12.09],
 'num_samples': [255, 315, 167, 355],
 'std_2012': [16.22, 13.44, 13.04, 16.1],
 'std_DSHP': [16.99, 12.58, 12.71, 15.85],
 'std_FSSE': [13.96, 11.53, 10.67, 12.64],
 'std_GFSO': [20.14, 16.38, 16.27, 20.45],
 'std_HWRF': [17.29, 13.74, 11.61, 14.86],
 'std_OFCL': [15.35, 11.89, 11.67, 13.48],
 'std_SHIP': [17.89, 12.84, 13.0, 15.83],
 'std_full': [16.07, 13.24, 12.96, 15.73],
 'year': [2015, 2016, 2017, 2018]}