# Estimating VaR in EURUSD from IV using ML and QR

## Modeling-Ensemble

### Data Preparation

In [1]:
import pandas as pd
import numpy as np
import pickle
import joblib
import statsmodels.api as sm

In [2]:
with open('data_scale.pickle', 'rb') as f:
    data_scale = pickle.load(f)

with open('model_qr.pickle', 'rb') as f:
    model_qr = pickle.load(f)

with open('model_qr2.pickle', 'rb') as f:
    model_qr2 = pickle.load(f)

### QR-IM generated dataset

In [3]:
quantiles = [0.01, 0.025, 0.05, 0.95, 0.975, 0.99]

In [4]:
df_spot_is_x = data_scale['df_spot_is'].iloc[:,:-1]
df_spot2_is_x = data_scale['df_spot2_is'].iloc[:,:-1]
df_spread_is_x = data_scale['df_spread_is'].iloc[:,:-1]

In [5]:
predict_qr = dict()
for quantile, model in model_qr.items():
    predict_qr[quantile] = model.predict(df_spot_is_x)

predict_qr2 = dict()
for quantile, model in model_qr2.items():
    predict_qr2[quantile] = model.predict(df_spot2_is_x)

### Random Forest: QR-IM dataset

In [6]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.metrics import mean_squared_error

In [7]:
rf_models = dict()
for quantile in quantiles:
    print(f'###### Random Forest-Quantile {quantile} ######')
    X = df_spread_is_x
    y = predict_qr[quantile]

    # more sample weight toward recent data
    sample_weights = np.arange(1,len(y)+1)

    param_grid = {
       'n_estimators': [50, 100, 200],
       'max_depth': [None, 10, 20],
       'min_samples_split': [2, 5, 10],
       'min_samples_leaf': [1, 2, 4]
    }

    tscv = TimeSeriesSplit(n_splits=5)

    best_params = []
    best_scores = []

    # walk forward cv
    split = 0
    for train_index, test_index in tscv.split(X):
        # train test split
        split += 1
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        weights_train = sample_weights[train_index]
        weights_test = sample_weights[test_index]

        # random forest regression
        rf = RandomForestRegressor()
        grid_search = GridSearchCV(rf,
                                   param_grid,
                                   cv=TimeSeriesSplit(n_splits=3),
                                   n_jobs=-1)
        grid_search.fit(X_train, y_train,
                        sample_weight=weights_train)

        # hyper-parameter tuning
        best_params.append(grid_search.best_params_)
        best_rf = grid_search.best_estimator_
        y_pred = best_rf.predict(X_test)
        mse = mean_squared_error(y_test, y_pred)
        best_scores.append(mse)
        print(f'Split {split} MSE: {mse}')

    # final rf model
    mean_score = np.mean(best_scores)
    final_rf = RandomForestRegressor(**best_params[-1])
    final_rf.fit(X, y, sample_weight=sample_weights)
    y_pred = final_rf.predict(X)
    mse = mean_squared_error(y, y_pred)
    print(f'Quantile {quantile} Final MSE: {mse}')
    
    # append final model
    rf_models[quantile] = final_rf

###### Random Forest-Quantile 0.01 ######
Split 1 MSE: 6.967384580720063e-06
Split 2 MSE: 1.0629873652852539e-05
Split 3 MSE: 6.30690548150144e-06
Split 4 MSE: 3.972141033007014e-06
Split 5 MSE: 2.9526196250320914e-06
Quantile 0.01 Final MSE: 2.3936719104996926e-07
###### Random Forest-Quantile 0.025 ######
Split 1 MSE: 7.545363440117452e-06
Split 2 MSE: 7.763754519647189e-06
Split 3 MSE: 4.706107619585811e-06
Split 4 MSE: 1.647826825773993e-06
Split 5 MSE: 1.5268991938127748e-06
Quantile 0.025 Final MSE: 1.4786550674171527e-07
###### Random Forest-Quantile 0.05 ######
Split 1 MSE: 2.160494278812935e-06
Split 2 MSE: 2.7714713738416393e-06
Split 3 MSE: 1.952839148014563e-06
Split 4 MSE: 1.1923769809691968e-06
Split 5 MSE: 8.830360438086358e-07
Quantile 0.05 Final MSE: 6.34971200952708e-08
###### Random Forest-Quantile 0.95 ######
Split 1 MSE: 2.3577080683673276e-06
Split 2 MSE: 1.8650947816326237e-06
Split 3 MSE: 2.2685331582898964e-06
Split 4 MSE: 1.1548634795030172e-06
Split 5 MSE: 9.

In [8]:
joblib.dump(rf_models, 'rf_models.pkl')

['rf_models.pkl']

### XGBoost(1): QR-IM Dataset

In [9]:
import xgboost as xgb

In [10]:
xgb1_models = dict()
for quantile in quantiles:
    print(f'###### XGBoost 1-Quantile {quantile} ######')
    X = df_spread_is_x
    y = predict_qr[quantile]

    # more sample weight toward recent data
    sample_weights = np.arange(1,len(y)+1)

    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [3, 5, 7],
        'learning_rate': [0.01, 0.1, 0.2],
        'subsample': [0.8, 0.9, 1.0]
    }

    tscv = TimeSeriesSplit(n_splits=5)

    best_params = []
    best_scores = []

    # walk forward cv
    split = 0
    for train_index, test_index in tscv.split(X):
        # train test split
        split += 1
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        weights_train = sample_weights[train_index]
        weights_test = sample_weights[test_index]

        # xgboost regression
        xgb_reg = xgb.XGBRegressor()
        grid_search = GridSearchCV(xgb_reg,
                                   param_grid,
                                   cv=TimeSeriesSplit(n_splits=3),
                                   n_jobs=-1)
        grid_search.fit(X_train, y_train,
                        sample_weight=weights_train)

        # hyper-parameter tuning
        best_params.append(grid_search.best_params_)
        best_xgb_reg = grid_search.best_estimator_
        y_pred = best_xgb_reg.predict(X_test)
        mse = mean_squared_error(y_test, y_pred)
        best_scores.append(mse)
        print(f'Split {split} MSE: {mse}')

    # final xgboost model
    mean_score = np.mean(best_scores)
    final_xgb_reg = xgb.XGBRegressor(**best_params[-1])
    final_xgb_reg.fit(X, y, sample_weight=sample_weights)
    y_pred = final_xgb_reg.predict(X)
    mse = mean_squared_error(y, y_pred)
    print(f'Quantile {quantile} Final MSE: {mse}')
    
    # append final model
    xgb1_models[quantile] = final_xgb_reg

###### XGBoost 1-Quantile 0.01 ######
Split 1 MSE: 4.364300606291534e-06
Split 2 MSE: 6.134583748193137e-06
Split 3 MSE: 3.897691646392137e-06
Split 4 MSE: 1.3767724225202197e-06
Split 5 MSE: 1.5200019564135488e-06
Quantile 0.01 Final MSE: 1.0239899822086427e-07
###### XGBoost 1-Quantile 0.025 ######
Split 1 MSE: 4.350092596191196e-06
Split 2 MSE: 4.792770046641507e-06
Split 3 MSE: 4.439026016672763e-06
Split 4 MSE: 1.1942631912543654e-06
Split 5 MSE: 1.207309349108191e-06
Quantile 0.025 Final MSE: 1.5844692590847445e-07
###### XGBoost 1-Quantile 0.05 ######
Split 1 MSE: 1.2614027851460886e-06
Split 2 MSE: 1.1432090467277545e-06
Split 3 MSE: 8.530419506532179e-07
Split 4 MSE: 3.2879262642173264e-07
Split 5 MSE: 2.7325893091697286e-07
Quantile 0.05 Final MSE: 5.5420398408488253e-08
###### XGBoost 1-Quantile 0.95 ######
Split 1 MSE: 1.6053545860510248e-06
Split 2 MSE: 7.907917281273657e-07
Split 3 MSE: 1.002558896903793e-06
Split 4 MSE: 3.091210907570536e-07
Split 5 MSE: 2.58201674233267

In [11]:
joblib.dump(xgb1_models, 'xgb1_models.pkl')

['xgb1_models.pkl']

### XGBoost(2): Gradient Boosting generated Dataset

In [12]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import make_scorer

#### Gradient Boosting Dataset Generation

In [13]:
import warnings
warnings.filterwarnings(action='ignore')

In [14]:
sample_weights = np.arange(1,len(y)+1)

param_grid = {
   'n_estimators': [50, 100, 200],
   'max_depth': [3, 5, 7],
   'learning_rate': [0.01, 0.1, 0.2],
   'subsample': [0.8, 0.9, 1.0]
}

def pinball_loss(y_true, y_pred, quantile):
    errors = y_true - y_pred
    return np.maximum((quantile - 1) * errors, quantile * errors).mean()

pinball_scorer = make_scorer(pinball_loss, greater_is_better=False)

tscv = TimeSeriesSplit(n_splits=5)

best_params = {q: [] for q in quantiles}
best_scores = {q: [] for q in quantiles}

X = df_spread_is_x
y = data_scale['df_spread_is'].iloc[:,-1]

# walk forward cv
split = 0
for train_index, test_index in tscv.split(X):
    split += 1
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    weights_train = sample_weights[train_index]
    weights_test = sample_weights[test_index]

    for quantile in quantiles:
        # gradient boosting quantile regressor
        gb_quantile = GradientBoostingRegressor(loss='quantile',
                                                alpha=quantile)
        grid_search = GridSearchCV(gb_quantile, param_grid,
                                   cv=TimeSeriesSplit(n_splits=3),
                                   n_jobs=-1, scoring=pinball_scorer)
        grid_search.fit(X_train, y_train, sample_weight=weights_train)

        # hyper-parameter tuning
        best_params[quantile].append(grid_search.best_params_)
        best_gb_quantile = GradientBoostingRegressor(
            loss='quantile', alpha=quantile, **grid_search.best_params_)
        best_gb_quantile.fit(X_train, y_train)
        y_pred = best_gb_quantile.predict(X_test)
        pinball = pinball_loss(y_test, y_pred, quantile)
        best_scores[quantile].append(pinball)
        print(f'{quantile} Quantile Split {split} Pinball: {pinball}')

# append final gb model
mean_scores = {q: np.mean(scores) for q, scores in best_scores.items()}

final_models = dict()
for quantile in quantiles:
    final_gb_quantile = GradientBoostingRegressor(
        loss='quantile', alpha=quantile, **best_params[quantile][-1])
    final_gb_quantile.fit(X, y, sample_weight=sample_weights)
    print(f'{quantile} Quantile Final Pinball: {pinball}')
    final_models[quantile] = final_gb_quantile

# generate target dataset
predict_gb = dict()
for quantile, model in final_models.items():
    predict_gb[quantile] = model.predict(X)

0.01 Quantile Split 1 Pinball: 0.00017546812646003086
0.025 Quantile Split 1 Pinball: 0.00036586692358215515
0.05 Quantile Split 1 Pinball: 0.0006023285584226296
0.95 Quantile Split 1 Pinball: 0.0006093254469747829
0.975 Quantile Split 1 Pinball: 0.00034987287199872137
0.99 Quantile Split 1 Pinball: 0.00015828310517877568
0.01 Quantile Split 2 Pinball: 0.00014323809157300478
0.025 Quantile Split 2 Pinball: 0.00030395676314739524
0.05 Quantile Split 2 Pinball: 0.0004828984806873742
0.95 Quantile Split 2 Pinball: 0.0005411609818277345
0.975 Quantile Split 2 Pinball: 0.0003172619091624715
0.99 Quantile Split 2 Pinball: 0.00014534779084667712
0.01 Quantile Split 3 Pinball: 0.00024169082302465523
0.025 Quantile Split 3 Pinball: 0.0004896385552133623
0.05 Quantile Split 3 Pinball: 0.0008298115872578058
0.95 Quantile Split 3 Pinball: 0.0008598591885771985
0.975 Quantile Split 3 Pinball: 0.0005023659198649535
0.99 Quantile Split 3 Pinball: 0.00023484645186014347
0.01 Quantile Split 4 Pinball: 

#### XGBoost Regression

In [15]:
xgb2_models = dict()
for quantile in quantiles:
    print(f'###### XGBoost 2-Quantile {quantile} ######')
    X = df_spread_is_x
    y = pd.Series(predict_gb[quantile],
                  index=df_spread_is_x.index)

    # more sample weight toward recent data
    sample_weights = np.arange(1,len(y)+1)

    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [3, 5, 7],
        'learning_rate': [0.01, 0.1, 0.2],
        'subsample': [0.8, 0.9, 1.0]
    }

    tscv = TimeSeriesSplit(n_splits=5)

    best_params = []
    best_scores = []

    # walk forward cv
    split = 0
    for train_index, test_index in tscv.split(X):
        # train test split
        split += 1
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        weights_train = sample_weights[train_index]
        weights_test = sample_weights[test_index]

        # xgboost regression
        xgb_reg = xgb.XGBRegressor()
        grid_search = GridSearchCV(xgb_reg,
                                   param_grid,
                                   cv=TimeSeriesSplit(n_splits=3),
                                   n_jobs=-1)
        grid_search.fit(X_train, y_train,
                        sample_weight=weights_train)

        # hyper-parameter tuning
        best_params.append(grid_search.best_params_)
        best_xgb_reg = grid_search.best_estimator_
        y_pred = best_xgb_reg.predict(X_test)
        mse = mean_squared_error(y_test, y_pred)
        best_scores.append(mse)
        print(f'Split {split} MSE: {mse}')

    # final xgboost model
    mean_score = np.mean(best_scores)
    final_xgb_reg = xgb.XGBRegressor(**best_params[-1])
    final_xgb_reg.fit(X, y, sample_weight=sample_weights)
    y_pred = final_xgb_reg.predict(X)
    mse = mean_squared_error(y, y_pred)
    print(f'Quantile {quantile} Final MSE: {mse}')
    
    # append final model
    xgb2_models[quantile] = final_xgb_reg

###### XGBoost 2-Quantile 0.01 ######
Split 1 MSE: 7.307612806749463e-08
Split 2 MSE: 1.4021814470576e-08
Split 3 MSE: 6.351682449351885e-08
Split 4 MSE: 9.377528480490676e-08
Split 5 MSE: 1.1421858027709228e-08
Quantile 0.01 Final MSE: 1.2566056099887234e-09
###### XGBoost 2-Quantile 0.025 ######
Split 1 MSE: 4.848615196168912e-08
Split 2 MSE: 5.603477355268361e-09
Split 3 MSE: 1.388442724595969e-07
Split 4 MSE: 4.766691227011342e-07
Split 5 MSE: 1.19903444438093e-07
Quantile 0.025 Final MSE: 8.01009619361447e-09
###### XGBoost 2-Quantile 0.05 ######
Split 1 MSE: 2.9978428062607034e-08
Split 2 MSE: 1.6745153823134963e-08
Split 3 MSE: 3.752973953081494e-08
Split 4 MSE: 8.149396713161648e-08
Split 5 MSE: 4.353449429570527e-08
Quantile 0.05 Final MSE: 3.8288769678713434e-10
###### XGBoost 2-Quantile 0.95 ######
Split 1 MSE: 4.3662144994545155e-08
Split 2 MSE: 1.7037366292774007e-08
Split 3 MSE: 3.835885426510018e-08
Split 4 MSE: 6.894424577070314e-09
Split 5 MSE: 1.335705417224384e-08
Qu

In [16]:
joblib.dump(xgb2_models, 'xgb2_models.pkl')

['xgb2_models.pkl']

### LightGBM(1): QR-IM Dataset

In [17]:
import lightgbm as lgb

In [18]:
lgb1_models = dict()
for quantile in quantiles:
    print(f'###### LightGBM 1-Quantile {quantile} ######')
    X = df_spread_is_x
    y = predict_qr[quantile]

    # more sample weight toward recent data
    sample_weights = np.arange(1,len(y)+1)

    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [3, 5, 7],
        'learning_rate': [0.01, 0.1, 0.2],
        'subsample': [0.8, 0.9, 1.0],
        'verbose': [-1]
    }

    tscv = TimeSeriesSplit(n_splits=5)

    best_params = []
    best_scores = []

    # walk forward cv
    split = 0
    for train_index, test_index in tscv.split(X):
        # train test split
        split += 1
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        weights_train = sample_weights[train_index]
        weights_test = sample_weights[test_index]

        # light gbm regression
        lgb_reg = lgb.LGBMRegressor()
        grid_search = GridSearchCV(lgb_reg,
                                   param_grid,
                                   cv=TimeSeriesSplit(n_splits=3),
                                   n_jobs=-1)
        grid_search.fit(X_train, y_train,
                        sample_weight=weights_train)

        # hyper-parameter tuning
        best_params.append(grid_search.best_params_)
        best_lgb_reg = grid_search.best_estimator_
        y_pred = best_lgb_reg.predict(X_test)
        mse = mean_squared_error(y_test, y_pred)
        best_scores.append(mse)
        print(f'Split {split} MSE: {mse}')

    # final light gbm model
    mean_score = np.mean(best_scores)
    final_lgb_reg = lgb.LGBMRegressor(**best_params[-1])
    final_lgb_reg.fit(X, y, sample_weight=sample_weights)
    y_pred = final_lgb_reg.predict(X)
    mse = mean_squared_error(y, y_pred)
    print(f'Quantile {quantile} Final MSE: {mse}')
    
    # append final model
    lgb1_models[quantile] = final_lgb_reg

###### LightGBM 1-Quantile 0.01 ######
Split 1 MSE: 1.3111320254944707e-05
Split 2 MSE: 4.668214740576395e-06
Split 3 MSE: 4.525413736269189e-06
Split 4 MSE: 1.537225588222237e-06
Split 5 MSE: 1.551974870308574e-06
Quantile 0.01 Final MSE: 1.8704468673037263e-07
###### LightGBM 1-Quantile 0.025 ######
Split 1 MSE: 8.132928402712987e-06
Split 2 MSE: 2.445318390364542e-06
Split 3 MSE: 2.8358946708898604e-06
Split 4 MSE: 9.274955450984581e-07
Split 5 MSE: 8.494210879831056e-07
Quantile 0.025 Final MSE: 6.173404246711626e-08
###### LightGBM 1-Quantile 0.05 ######
Split 1 MSE: 1.406540137122763e-06
Split 2 MSE: 9.972778643679887e-07
Split 3 MSE: 1.7183306523769235e-06
Split 4 MSE: 3.71919204439502e-07
Split 5 MSE: 2.5254836670255917e-07
Quantile 0.05 Final MSE: 4.7403141491927486e-08
###### LightGBM 1-Quantile 0.95 ######
Split 1 MSE: 2.255398558215045e-06
Split 2 MSE: 7.651187758643004e-07
Split 3 MSE: 1.5225601884078053e-06
Split 4 MSE: 3.996409711741835e-07
Split 5 MSE: 3.124584960323944

In [19]:
joblib.dump(lgb1_models, 'lgb1_models.pkl')

['lgb1_models.pkl']

### LightGBM(2): QR-IM Dataset with Interaction

In [20]:
lgb2_models = dict()
for quantile in quantiles:
    print(f'###### LightGBM 2-Quantile {quantile} ######')
    X = df_spread_is_x
    y = predict_qr2[quantile]

    # more sample weight toward recent data
    sample_weights = np.arange(1,len(y)+1)

    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [3, 5, 7],
        'learning_rate': [0.01, 0.1, 0.2],
        'subsample': [0.8, 0.9, 1.0],
        'verbose': [-1]
    }

    tscv = TimeSeriesSplit(n_splits=5)

    best_params = []
    best_scores = []

    # walk forward cv
    split = 0
    for train_index, test_index in tscv.split(X):
        # train test split
        split += 1
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        weights_train = sample_weights[train_index]
        weights_test = sample_weights[test_index]

        # light gbm regression
        lgb_reg = lgb.LGBMRegressor()
        grid_search = GridSearchCV(lgb_reg,
                                   param_grid,
                                   cv=TimeSeriesSplit(n_splits=3),
                                   n_jobs=-1)
        grid_search.fit(X_train, y_train,
                        sample_weight=weights_train)

        # hyper-parameter tuning
        best_params.append(grid_search.best_params_)
        best_lgb_reg = grid_search.best_estimator_
        y_pred = best_lgb_reg.predict(X_test)
        mse = mean_squared_error(y_test, y_pred)
        best_scores.append(mse)
        print(f'Split {split} MSE: {mse}')

    # final light gbm model
    mean_score = np.mean(best_scores)
    final_lgb_reg = lgb.LGBMRegressor(**best_params[-1])
    final_lgb_reg.fit(X, y, sample_weight=sample_weights)
    y_pred = final_lgb_reg.predict(X)
    mse = mean_squared_error(y, y_pred)
    print(f'Quantile {quantile} Final MSE: {mse}')
    
    # append final model
    lgb2_models[quantile] = final_lgb_reg

###### LightGBM 2-Quantile 0.01 ######
Split 1 MSE: 6.197820868209659e-06
Split 2 MSE: 4.099706913161765e-06
Split 3 MSE: 3.111167447806778e-06
Split 4 MSE: 3.223985872979535e-06
Split 5 MSE: 1.3650058366431899e-06
Quantile 0.01 Final MSE: 3.1940041113940153e-07
###### LightGBM 2-Quantile 0.025 ######
Split 1 MSE: 5.481005773353765e-06
Split 2 MSE: 7.074666251647047e-06
Split 3 MSE: 6.651897162353503e-06
Split 4 MSE: 1.8804754999128199e-06
Split 5 MSE: 1.1166626945806187e-06
Quantile 0.025 Final MSE: 1.3996913824014615e-07
###### LightGBM 2-Quantile 0.05 ######
Split 1 MSE: 2.5748327614673263e-06
Split 2 MSE: 1.4283394367405512e-06
Split 3 MSE: 1.7929151317338928e-06
Split 4 MSE: 1.0693043965380001e-06
Split 5 MSE: 7.585797400817663e-07
Quantile 0.05 Final MSE: 2.181624419145098e-07
###### LightGBM 2-Quantile 0.95 ######
Split 1 MSE: 1.2825291959412522e-06
Split 2 MSE: 1.8303120629254254e-06
Split 3 MSE: 5.422890823261235e-06
Split 4 MSE: 2.015033496174597e-06
Split 5 MSE: 2.0681811279

In [21]:
joblib.dump(lgb2_models, 'lgb2_models.pkl')

['lgb2_models.pkl']

### LightGBM(3): LightGBM generated Dataset

In [22]:
from sklearn.metrics import mean_pinball_loss

In [23]:
lgb3_models = dict()
for quantile in quantiles:
    print(f'###### LightGBM 3-Quantile {quantile} ######')
    X = df_spread_is_x
    y = data_scale['df_spread_is'].iloc[:,-1]

    # more sample weight toward recent data
    sample_weights = np.arange(1,len(y)+1)

    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [3, 5, 7],
        'learning_rate': [0.01, 0.1, 0.2],
        'subsample': [0.8, 0.9, 1.0],
        'verbose': [-1]
    }

    tscv = TimeSeriesSplit(n_splits=5)

    best_params = []
    best_scores = []

    # walk forward cv
    split = 0
    for train_index, test_index in tscv.split(X):
        # train test split
        split += 1
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        weights_train = sample_weights[train_index]
        weights_test = sample_weights[test_index]

        # light gbm regression
        lgb_reg = lgb.LGBMRegressor(objective='quantile',
                                    alpha=quantile)
        grid_search = GridSearchCV(lgb_reg,
                                   param_grid,
                                   cv=TimeSeriesSplit(n_splits=3),
                                   n_jobs=-1)
        grid_search.fit(X_train, y_train,
                        sample_weight=weights_train)

        # hyper-parameter tuning
        best_params.append(grid_search.best_params_)
        best_lgb_reg = grid_search.best_estimator_
        y_pred = best_lgb_reg.predict(X_test)
        pinball_loss = mean_pinball_loss(y_test, y_pred, alpha=quantile)
        best_scores.append(pinball_loss)
        print(f'Split {split} Pinball Loss: {pinball_loss}')

    # final light gbm model
    mean_score = np.mean(best_scores)
    final_lgb_reg = lgb.LGBMRegressor(**best_params[-1])
    final_lgb_reg.fit(X, y, sample_weight=sample_weights)
    y_pred = final_lgb_reg.predict(X)
    pinball_loss = mean_pinball_loss(y, y_pred, alpha=quantile)
    print(f'Quantile {quantile} Final Pinball Loss: {pinball_loss}')
    
    # append final model
    lgb3_models[quantile] = final_lgb_reg

###### LightGBM 3-Quantile 0.01 ######
Split 1 Pinball Loss: 0.00017402617161312335
Split 2 Pinball Loss: 0.000190481887137032
Split 3 Pinball Loss: 0.0002219686543982026
Split 4 Pinball Loss: 0.00026203378489943603
Split 5 Pinball Loss: 0.00017455753212675525
Quantile 0.01 Final Pinball Loss: 0.0004449432652286783
###### LightGBM 3-Quantile 0.025 ######
Split 1 Pinball Loss: 0.0003517059237107489
Split 2 Pinball Loss: 0.0003236065446776177
Split 3 Pinball Loss: 0.0005909626317129026
Split 4 Pinball Loss: 0.0005299909298053481
Split 5 Pinball Loss: 0.0003590977558146008
Quantile 0.025 Final Pinball Loss: 0.0011894969731532186
###### LightGBM 3-Quantile 0.05 ######
Split 1 Pinball Loss: 0.0006441580040349503
Split 2 Pinball Loss: 0.0005803761128867921
Split 3 Pinball Loss: 0.0008036625639679795
Split 4 Pinball Loss: 0.0006928668933962735
Split 5 Pinball Loss: 0.0005694809961671982
Quantile 0.05 Final Pinball Loss: 0.001689673011712048
###### LightGBM 3-Quantile 0.95 ######
Split 1 Pinba

In [24]:
joblib.dump(lgb3_models, 'lgb3_models.pkl')

['lgb3_models.pkl']