In [1]:
# standard imports
import time
import pickle

# external imports
import pandas as pd
import numpy as np
import sklearn as sk
from sklearn import metrics

# own imports
from evaluation import R2OS


In [2]:
# loading Goyal data
GoyalData_updated = pd.read_csv("GoyalData_2021.csv", thousands=',')

# starting and ending periods
start = 192612
end = 201612

# keep only necessary time periods
start_index = GoyalData_updated[GoyalData_updated['yyyymm'] == start].index[0]
end_index = GoyalData_updated[GoyalData_updated['yyyymm'] == end].index[0] + 1

data = GoyalData_updated

TRAIN_START = 192612
TRAIN_END = 195612
TEST_START = 195701
TEST_END = 201612

IND_TRAIN_START = data[data['yyyymm'] == TRAIN_START].index[0]
IND_TRAIN_END = data[data['yyyymm'] == TRAIN_END].index[0] + 1
IND_TEST_START = data[data['yyyymm'] == TEST_START].index[0]
IND_TEST_END = data[data['yyyymm'] == TEST_END].index[0] + 1

data

Unnamed: 0,yyyymm,Index,D12,E12,b/m,tbl,AAA,BAA,lty,ntis,Rfree,infl,ltr,corpr,svar,csp,CRSP_SPvw,CRSP_SPvwx
0,187101,4.44,0.2600,0.4000,,,,,,,,,,,,,,
1,187102,4.50,0.2600,0.4000,,,,,,,0.004967,,,,,,,
2,187103,4.61,0.2600,0.4000,,,,,,,0.004525,,,,,,,
3,187104,4.74,0.2600,0.4000,,,,,,,0.004252,,,,,,,
4,187105,4.86,0.2600,0.4000,,,,,,,0.004643,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1807,202108,4522.68,58.7913,169.8333,0.184756,0.0005,0.0255,0.0324,0.0128,0.014846,0.000000,0.002066,-0.0035,-0.0045,0.000602,,0.030600,0.029205
1808,202109,4307.54,59.2545,175.3700,0.193036,0.0004,0.0253,0.0323,0.0137,0.015598,0.000000,0.002716,-0.0250,-0.0194,0.001393,,-0.046076,-0.047152
1809,202110,4605.38,59.6354,182.8600,0.182389,0.0005,0.0268,0.0335,0.0158,0.013368,0.000000,0.008308,0.0051,0.0159,0.001151,,0.070510,0.069627
1810,202111,4567.00,60.0162,190.3500,0.189455,0.0005,0.0262,0.0328,0.0156,0.015640,0.000100,0.004913,0.0210,0.0094,0.001327,,-0.007256,-0.008665


In [3]:
data_stocks = data.copy()[['yyyymm']]

data_stocks['EQPREM'] = data.loc[:,'CRSP_SPvw'].shift(-1)

data_stocks['DP'] = np.log(data['D12'] / data['Index'])

data_stocks['DY'] = np.log(data['D12'] / data['Index'].shift(1))

data_stocks['DE'] = np.log(data['D12'] / data['E12'])

data_stocks['EP'] = np.log(data['E12'] / data['Index'])

data_stocks['SVAR'] = data['svar']

data_stocks['BM'] = data['b/m']

data_stocks['NTIS'] = data['ntis']

data_stocks['TBL'] = data['tbl']

data_stocks['LTY'] = data['lty']

data_stocks['LTR'] = data['ltr']

data_stocks['TMS'] = data['lty'] - data['tbl']

data_stocks['DFY'] = data['BAA'] - data['AAA']

data_stocks['DFR'] = data['corpr'] - data['ltr']

data_stocks['INFL'] = data['infl']

data_stocks = data_stocks.drop(columns=['LTY', 'DE'])

data_stocks.iloc[IND_TRAIN_START:IND_TEST_END,:].to_csv('data_e.csv', index=False)

data_stocks

Unnamed: 0,yyyymm,EQPREM,DP,DY,EP,SVAR,BM,NTIS,TBL,LTR,TMS,DFY,DFR,INFL
0,187101,,-2.837728,,-2.406945,,,,,,,,,
1,187102,,-2.851151,-2.837728,-2.420368,,,,,,,,,
2,187103,,-2.875302,-2.851151,-2.444519,,,,,,,,,
3,187104,,-2.903111,-2.875302,-2.472328,,,,,,,,,
4,187105,,-2.928112,-2.903111,-2.497329,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1807,202108,-0.046076,-4.342866,-4.314288,-3.282043,0.000602,0.184756,0.014846,0.0005,-0.0035,0.0123,0.0069,-0.0010,0.002066
1808,202109,0.070510,-4.286281,-4.335018,-3.201224,0.001393,0.193036,0.015598,0.0004,-0.0250,0.0133,0.0070,0.0056,0.002716
1809,202110,-0.007256,-4.346731,-4.279873,-3.226260,0.001151,0.182389,0.013368,0.0005,0.0051,0.0153,0.0067,0.0108,0.008308
1810,202111,0.043485,-4.331997,-4.340366,-3.177747,0.001327,0.189455,0.015640,0.0005,0.0210,0.0151,0.0066,-0.0116,0.004913


In [7]:
from sklearn.preprocessing import StandardScaler

X = np.array(data_stocks.drop(columns=['yyyymm', 'EQPREM']).iloc[IND_TRAIN_START:IND_TEST_END-1,:])
y = np.array(data_stocks.loc[:,['EQPREM']].iloc[IND_TRAIN_START:IND_TEST_END-1,:]).flatten()

X_train = X[0:360,:]
X_test = X[360:1080,:]
y_train = y[0:360]
y_test = y[360:1080]


### CW Test

In [8]:
### DONE WITH FORMULA IN THE PAPER ###

def CW_test(y_test, pred_bmk, pred_model):
    p = y_test.shape[0]
    e_1_hat_2 = (y_test - pred_bmk) ** 2
    e_2_hat_2 = (y_test - pred_model) ** 2
    adj = (pred_bmk - pred_model) ** 2
    
    f_hat = e_1_hat_2 - (e_2_hat_2 - adj)
    f_bar = np.sum(f_hat) / p
    
    return (p ** 0.5) * f_bar / (np.var(f_hat - f_bar, ddof=1) ** 0.5)

### Parallel Expanding Window

In [32]:
from joblib import Parallel, delayed
from sklearn.linear_model import LinearRegression, LassoCV, ElasticNetCV
from sklearn.model_selection import TimeSeriesSplit
from sklearn import metrics

def expanding_window_parallel(X_train, X_test, y_train, y_test, forecast_model, model_args=None, workers=-1, verbosity=10):
    """
    DESCRIPTION
    -----------
    Produce 1-step ahead forecasts, trained with recursive expanding windows.
    Uses parallelization on multiple CPU cores to speed up the process.
    
    PARAMETERS
    -----------
    X_train: 2-dimensional numpy array, independent variables used to train the forecast model
    
    X_test: 2-dimensional numpy array, independent variables used for forecasting
    
    y_train: 1-dimensional numpy array, dependent variable used to train the forecast model
    
    y_test: 1-dimensional numpy array, dependent variable to forecast
    
    forecast_model: python object, model used for forecasting. Make this object similar to the
    
    sklearn models, with a fit() and predict() function
    
    model_args: dict, arguments to pass when initializing the model
    
    workers: int, how many processes are used. -1 denotes using all available logical processors
    """

    start_time = time.time()
    
    # if no model arguments, parse an empty dictionary
    if model_args is None:
        model_args = {}
        
    n_periods = y_test.shape[0]
    pred_model = np.zeros((n_periods,))
    pred_bmk = np.zeros((n_periods,))
    
    # function to produce recursive expanding window forecasts
    def ExpW_forecast(X_train, y_train, i):
        print(f'{i+1}/{n_periods}', end='\r')
        
        # take the correct subsets of the dataset
        X_train = np.vstack([X_train, X_test[:i,:]])
        y_train = np.hstack([y_train, y_test[:i]])
        
        # scale the data
        X_scaler = StandardScaler().fit(X_train)
        X = X_scaler.transform(X_train)
        y = y_train.reshape(-1,1)
        y_scaler = StandardScaler().fit(y)
        y = y_scaler.transform(y).flatten()
        
        # forecasts of model
        model = forecast_model(**model_args).fit(X, y)
        pred = model.predict(X_scaler.transform(X_test[[i],:]))
        
        # historical average forecast as the baseline
        bmk = np.mean(y_train)
        
        # return a tuple of the baseline and the expanding window forecasts
        return bmk, y_scaler.inverse_transform(pred)[0]
    
    # run the forecast models in parallel
    pred_pairs = Parallel(n_jobs=workers, backend='loky', verbose=verbosity) \
        (delayed(ExpW_forecast)(X_train, y_train, i) for i in range(n_periods))

    # split the prediction tuples
    for i, pred_pair in enumerate(pred_pairs):
        pred_bmk[i] = pred_pair[0]
        pred_model[i] = pred_pair[1]
    
    # calculate metrics
    MSPE_model = metrics.mean_squared_error(y_test, pred_model)
    MSPE_bmk = metrics.mean_squared_error(y_test, pred_bmk)
    R2OS_val = R2OS(MSPE_model, MSPE_bmk)
    
    end_time = time.time()
    elapsed_time = end_time - start_time

    return {'out-of-sample R2': R2OS_val, 
            'MSPE_bmk': MSPE_bmk, 
            'MSPE_model': MSPE_model, 
            'pred_bmk': pred_bmk,
            'pred_model': pred_model,
            'elapsed time': elapsed_time}


# needed for LassoCV and ElasticNetCV
cv = TimeSeriesSplit(n_splits=5)


In [17]:
results_expw_LinearRegression = expanding_window_parallel(X_train, X_test[0:], y_train, y_test[0:], 
                                                          LinearRegression, model_args=None)
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']
for item in display:
    print(f'{item}: {results_expw_LinearRegression[item]}')
    
with open('forecast_results/results_expw_LinearRegression.pickle', 'wb') as file:
    pickle.dump(results_expw_LinearRegression, file)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Batch computation too fast (0.0120s.) Setting batch_size=2.
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.0360s.) Setting batch_size=4.
[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.0200s.) Setting batch_size=8.
[Parallel(n_jobs=-1)]: Done  28 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.0280s.) Setting batch_size=16.
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.0679s.) Setting batch_size=32.
[Parallel(n_jobs=-1)]: Batch computation too fast (0.1278s.) Setting batch_size=64.
[Parallel(n_jobs=-1)]: Done 312 tasks      | elapsed:    0.2s


out-of-sample R2: -0.09479595117769124
MSPE_bmk: 0.0017631590057930802
MSPE_model: 0.0019302993408247479
elapsed time: 0.6271622180938721


[Parallel(n_jobs=-1)]: Done 529 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 584 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 649 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 720 out of 720 | elapsed:    0.5s finished


In [22]:
results_expw_LassoCV = expanding_window_parallel(X_train, X_test[0:], y_train, y_test[0:], 
                                                 LassoCV, model_args={'cv': cv})
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']
for item in display:
    print(f'{item}: {results_expw_LassoCV[item]}')
    
with open('forecast_results/results_expw_LassoCV.pickle', 'wb') as file:
    pickle.dump(results_expw_LassoCV, file)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    2.9s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    3.0s
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    3.4s
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    3.7s
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:    4.0s
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    4.4s
[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed:    4.9s
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    5.4s
[Parallel(n_jobs=-1)]: Done  77 tasks      | elapsed:    6.0s
[Parallel(n_jobs=-1)]: Done  90 tasks      | elapsed:    6.5s
[Parallel(n_jobs=-1)]: Done 105 tasks      | elapsed:    7.1s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:    7.8s
[Parallel(n_jobs=-1)]: Done 137 tasks      | elapsed:    8.5s
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:    9.1s
[Parallel(n_jobs=-1)]: Done 173 tasks      | elapsed:   

out-of-sample R2: -0.00023184786504271138
MSPE_bmk: 0.0017631590057930802
MSPE_model: 0.0017635677904443044
elapsed time: 29.50590991973877


[Parallel(n_jobs=-1)]: Done 720 out of 720 | elapsed:   29.4s finished


In [23]:
results_expw_ElasticNetCV = expanding_window_parallel(X_train, X_test[0:], y_train, y_test[0:], 
                                                      ElasticNetCV, model_args={'cv': cv})
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']
for item in display:
    print(f'{item}: {results_expw_ElasticNetCV[item]}')
    
with open('forecast_results/results_expw_ElasticNetCV.pickle', 'wb') as file:
    pickle.dump(results_expw_ElasticNetCV, file)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    2.0s
[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed:    2.5s
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    2.9s
[Parallel(n_jobs=-1)]: Done  77 tasks      | elapsed:    3.5s
[Parallel(n_jobs=-1)]: Done  90 tasks      | elapsed:    4.1s
[Parallel(n_jobs=-1)]: Done 105 tasks      | elapsed:    4.7s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:    5.3s
[Parallel(n_jobs=-1)]: Done 137 tasks      | elapsed:    6.2s
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:    6.9s
[Parallel(n_jobs=-1)]: Done 173 tasks      | elapsed:   

out-of-sample R2: 1.6082268055495064e-05
MSPE_bmk: 0.0017631590057930802
MSPE_model: 0.0017631306501973247
elapsed time: 27.55993390083313


[Parallel(n_jobs=-1)]: Done 720 out of 720 | elapsed:   27.5s finished


### Parallel AveW

In [31]:
import joblib
from joblib import Parallel, delayed
from sklearn.linear_model import LinearRegression, LassoCV, ElasticNetCV
from sklearn.model_selection import TimeSeriesSplit
from sklearn import metrics

import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)

def AveW_parallel(X_train, X_test, y_train, y_test, forecast_model, m, w_min, model_args=None, workers=-1, verbosity=10):
    """
    DESCRIPTION
    -----------
    Produce 1-step ahead forecasts, trained with recursive expanding windows.
    Uses parallelization on multiple CPU cores to speed up the process.
    
    PARAMETERS
    -----------
    X_train: 2-dimensional numpy array, independent variables used to train the forecast model
    
    X_test: 2-dimensional numpy array, independent variables used for forecasting
    
    y_train: 1-dimensional numpy array, dependent variable used to train the forecast model
    
    y_test: 1-dimensional numpy array, dependent variable to forecast
    
    forecast_model: python object, model used for forecasting. Make this object similar to the

    m: number of windows for AveW forecasting
    
    w_min: minimum AveW window size
    
    sklearn models, with a fit() and predict() function
    
    model_args: dict, arguments to pass when initializing the model
    
    workers: int, how many processes are used. -1 denotes using all available logical processors
    """
    
    start_time = time.time()
    
    # parse an empty dict as the additional arguments
    if model_args is None:
        model_args = {}
    
    n_periods = y_test.shape[0]
    
    # arrays to store forecasts
    pred_model = np.zeros((n_periods,))
    pred_bmk = np.zeros((n_periods,))
    
    # loop through all forecasting periods
    def AveW_forecast(X_train, y_train, j, m):
        print(f'{j+1}/{n_periods}', end='\r')
        # properly load the subsets for each point forecast
        X_train = np.vstack([X_train, X_test[:j,:]])
        y_train = np.hstack([y_train, y_test[:j]])
        
        T = X_train.shape[0]
        single_preds = np.zeros((m,))

        # AveW part
        for i in range(1, m+1):
            # calculate size of window i
            w_i = int(w_min + (i - 1)/(m - 1) * (T - w_min)) - 1

            # starting and ending index of window i
            start, end = T-1-w_i, T-1
            # print(f'start:{T-1-w_i}, end:{T-1}')

            # scale the data
            X = X_train[start:end,:]
            X_scaler = StandardScaler().fit(X)
            X = X_scaler.transform(X)
            y = y_train[start:end].reshape(-1,1)
            y_scaler = StandardScaler().fit(y)
            y = y_scaler.transform(y).flatten()
            
            # train model and predict
            model = forecast_model(**model_args).fit(X, y)
            pred = model.predict(X_scaler.transform(X_test[[j],:]))
            single_preds[i-1] = y_scaler.inverse_transform(pred)
        
        # historical average is the benchmark
        bmk = np.mean(y_train)

        # return tuple of hist. avg and AveW forecast
        return bmk, np.mean(single_preds)
    
    # run predictions with multiple processors
    pred_pairs = Parallel(n_jobs=workers, backend='loky', verbose=verbosity) \
        (delayed(AveW_forecast)(X_train, y_train, j, m) for j in range(n_periods))

    # split tuple of predictions
    for i, pred_pair in enumerate(pred_pairs):
        pred_bmk[i] = pred_pair[0]
        pred_model[i] = pred_pair[1]
    
    # calculate metrics
    MSPE_model = metrics.mean_squared_error(y_test, pred_model)
    MSPE_bmk = metrics.mean_squared_error(y_test, pred_bmk)
    R2OS_val = R2OS(MSPE_model, MSPE_bmk)
    
    end_time = time.time()
    elapsed_time = end_time - start_time

    return {'out-of-sample R2': R2OS_val, 
            'MSPE_bmk': MSPE_bmk, 
            'MSPE_model': MSPE_model, 
            'pred_bmk': pred_bmk,
            'pred_model': pred_model,
            'elapsed time': elapsed_time}


# needed for LassoCV and ElasticNetCV
cv = TimeSeriesSplit(n_splits=5)


In [13]:
results_AveW_LinearRegression = AveW_parallel(X_train, X_test[0:,:], y_train, y_test[0:], 
                                     LinearRegression, m=10, w_min=240, model_args={'fit_intercept': True})
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']
for item in display:
    print(f'{item}: {results_AveW_LinearRegression[item]}')
    
with open('forecast_results/results_AveW_LinearRegression.pickle', 'wb') as file:
    pickle.dump(results_AveW_LinearRegression, file)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Batch computation too fast (0.0394s.) Setting batch_size=2.
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.1620s.) Setting batch_size=4.
[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done  28 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done  92 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:    1.5s
[Parallel(n_jobs=-1)]: Done 172 tasks      | elapsed:    2.0s
[Parallel(n_jobs=-1)]: Done 216 tasks      | elapsed:    2.5s
[Parallel(n_jobs=-1)]: Done 268 tasks      | elapsed:    3.2s
[Parallel(n_jobs=-1)]: Done 320 tasks      | elapsed:    3.9s
[Parallel(n_jobs=-1)]: Done 380 tasks      | elapsed:    4.6s
[Parallel(n_jobs=-1)]: Done 440 tasks      | elapsed:    5.4s
[Parallel(n_jo

out-of-sample R2: -0.04698292355780809
MSPE_bmk: 0.0017631590057930802
MSPE_model: 0.0018459973705825176
elapsed time: 8.846430778503418


[Parallel(n_jobs=-1)]: Done 720 out of 720 | elapsed:    8.7s finished


In [26]:
results_AveW_LassoCV = AveW_parallel(X_train, X_test[0:,:], y_train, y_test[0:],
           LassoCV, m=10, w_min=240, model_args={'cv': cv, 'max_iter':5000})
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']
for item in display:
    print(f'{item}: {results_AveW_LassoCV[item]}')
    
with open('forecast_results/results_AveW_LassoCV.pickle', 'wb') as file:
    pickle.dump(results_AveW_LassoCV, file)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    2.7s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    4.2s
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    6.9s
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    8.3s
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:   12.1s
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   14.7s
[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed:   18.5s
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:   21.1s
[Parallel(n_jobs=-1)]: Done  77 tasks      | elapsed:   26.0s
[Parallel(n_jobs=-1)]: Done  90 tasks      | elapsed:   29.8s
[Parallel(n_jobs=-1)]: Done 105 tasks      | elapsed:   34.7s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:   39.5s
[Parallel(n_jobs=-1)]: Done 137 tasks      | elapsed:   46.1s
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:   53.0s
[Parallel(n_jobs=-1)]: Done 173 tasks      | elapsed:  1

out-of-sample R2: 0.0012518531138231426
MSPE_bmk: 0.0017631590057930802
MSPE_model: 0.001760951789701513
elapsed time: 252.27812671661377


[Parallel(n_jobs=-1)]: Done 720 out of 720 | elapsed:  4.2min finished


In [31]:
results_AveW_ElasticNetCV = AveW_parallel(X_train, X_test[0:,:], y_train, y_test[0:], 
           ElasticNetCV, m=10, w_min=240, model_args={'cv': cv, 'max_iter':5000})
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']
for item in display:
    print(f'{item}: {results_AveW_ElasticNetCV[item]}')
    
with open('forecast_results/results_AveW_ElasticNetCV.pickle', 'wb') as file:
    pickle.dump(results_AveW_ElasticNetCV, file)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    3.4s
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    5.7s
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    7.0s
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:   11.0s
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   13.4s
[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed:   16.9s
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:   19.4s
[Parallel(n_jobs=-1)]: Done  77 tasks      | elapsed:   24.4s
[Parallel(n_jobs=-1)]: Done  90 tasks      | elapsed:   28.6s
[Parallel(n_jobs=-1)]: Done 105 tasks      | elapsed:   33.3s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:   37.0s
[Parallel(n_jobs=-1)]: Done 137 tasks      | elapsed:   42.9s
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:   47.8s
[Parallel(n_jobs=-1)]: Done 173 tasks      | elapsed:   

out-of-sample R2: 0.0027540616533363593
MSPE_bmk: 0.0017631590057930802
MSPE_model: 0.001758303157186491
elapsed time: 235.4264612197876


[Parallel(n_jobs=-1)]: Done 720 out of 720 | elapsed:  3.9min finished


### HA Combination

In [27]:
def HA_combination(y_train, y_test, y_pred, delta=0.5):
    """
    DESCRIPTION
    -----------
    Evaluate the forecast combination method of Zhang et al (2020) where
    forecasts of a historical model are combined with the historical average
    
    PARAMETERS
    -----------    
    y_train: 1-dimensional numpy array, dependent variable used to train the forecast model
    
    y_test: 1-dimensional numpy array, dependent variable to forecast
    
    y_pred: 1-dimensional numpy array, predictions of a sophisticated model

    delta: float, value in range [0,1], forecast combination weight
    """

    start_time = time.time()
    n_periods = y_test.shape[0]
    pred_model = np.zeros((n_periods,))
    pred_bmk = np.zeros((n_periods,))
    
    # run a forecast for every test period
    for i in range(n_periods):
        print(f'{i+1}/{n_periods}', end='\r')
        
        y = y_train.flatten()
        
        # forecasts of historical average
        bmk = np.mean(y_train)
        pred_bmk[i] = bmk
        
        # add new row to training data
        y_train = np.hstack([y_train, y_test[i]])
        
        # combination forecast
        pred_model[i] = (1 - delta) * bmk + delta * y_pred[i]
    
    print('')
    
    MSPE_model = metrics.mean_squared_error(y_test, pred_model)
    MSPE_bmk = metrics.mean_squared_error(y_test, pred_bmk)
    R2OS_val = R2OS(MSPE_model, MSPE_bmk)
    
    end_time = time.time()
    elapsed_time = end_time - start_time

    return {'out-of-sample R2': R2OS_val, 
            'MSPE_bmk': MSPE_bmk, 
            'MSPE_model': MSPE_model, 
            'pred_bmk': pred_bmk,
            'pred_model': pred_model,
            'elapsed time': elapsed_time}

In [36]:
# LinearRegression
filename = 'forecast_results/results_AveW_LinearRegression.pickle'
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']

with open(filename, 'rb') as file:
    y_pred = pickle.load(file)['pred_model']
    results = HA_combination(y_train, y_test, y_pred)
    for item in display:
        print(f'{item}: {results[item]}')
        
    cw = CW_test(y_test, results['pred_bmk'], results['pred_model'])
    print(f'CW_test: {cw}')

1/7202/7203/7204/7205/7206/7207/7208/7209/72010/72011/72012/72013/72014/72015/72016/72017/72018/72019/72020/72021/72022/72023/72024/72025/72026/72027/72028/72029/72030/72031/72032/72033/72034/72035/72036/72037/72038/72039/72040/72041/72042/72043/72044/72045/72046/72047/72048/72049/72050/72051/72052/72053/72054/72055/72056/72057/72058/72059/72060/72061/72062/72063/72064/72065/72066/72067/72068/72069/72070/72071/72072/72073/72074/72075/72076/72077/72078/72079/72080/72081/72082/72083/72084/72085/72086/72087/72088/72089/72090/72091/72092/72093/72094/72095/72096/72097/72098/72099/720100/720101/720102/720103/720104/720105/720106/720107/720108/720109/720110/720111/720112/720113/720114/720115/720116/720117/720118/720119/720120/720121/720122/720123/720124/720125/720126/720127/720128/720129/720130/720131/720132/720133/720134/720135/720136/720137/720138/720139/

In [37]:
# LassoCV
filename = 'forecast_results/results_AveW_LassoCV.pickle'
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']

with open(filename, 'rb') as file:
    y_pred = pickle.load(file)['pred_model']
    results = HA_combination(y_train, y_test, y_pred)
    for item in display:
        print(f'{item}: {results[item]}')

    cw = CW_test(y_test, results['pred_bmk'], results['pred_model'])
    print(f'CW_test: {cw}')

1/7202/7203/7204/7205/7206/7207/7208/7209/72010/72011/72012/72013/72014/72015/72016/72017/72018/72019/72020/72021/72022/72023/72024/72025/72026/72027/72028/72029/72030/72031/72032/72033/72034/72035/72036/72037/72038/72039/72040/72041/72042/72043/72044/72045/72046/72047/72048/72049/72050/72051/72052/72053/72054/72055/72056/72057/72058/72059/72060/72061/72062/72063/72064/72065/72066/72067/72068/72069/72070/72071/72072/72073/72074/72075/72076/72077/72078/72079/72080/72081/72082/72083/72084/72085/72086/72087/72088/72089/72090/72091/72092/72093/72094/72095/72096/72097/72098/72099/720100/720101/720102/720103/720104/720105/720106/720107/720108/720109/720110/720111/720112/720113/720114/720115/720116/720117/720118/720119/720120/720121/720122/720123/720124/720125/720126/720127/720128/720129/720130/720131/720132/720133/720134/720135/720136/720137/720138/720139/

In [38]:
# ElasticNetCV
filename = 'forecast_results/results_AveW_ElasticNetCV.pickle'
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']

with open(filename, 'rb') as file:
    y_pred = pickle.load(file)['pred_model']
    results = HA_combination(y_train, y_test, y_pred)
    for item in display:
        print(f'{item}: {results[item]}')

    cw = CW_test(y_test, results['pred_bmk'], results['pred_model'])
    print(f'CW_test: {cw}')

1/7202/7203/7204/7205/7206/7207/7208/7209/72010/72011/72012/72013/72014/72015/72016/72017/72018/72019/72020/72021/72022/72023/72024/72025/72026/72027/72028/72029/72030/72031/72032/72033/72034/72035/72036/72037/72038/72039/72040/72041/72042/72043/72044/72045/72046/72047/72048/72049/72050/72051/72052/72053/72054/72055/72056/72057/72058/72059/72060/72061/72062/72063/72064/72065/72066/72067/72068/72069/72070/72071/72072/72073/72074/72075/72076/72077/72078/72079/72080/72081/72082/72083/72084/72085/72086/72087/72088/72089/72090/72091/72092/72093/72094/72095/72096/72097/72098/72099/720100/720101/720102/720103/720104/720105/720106/720107/720108/720109/720110/720111/720112/720113/720114/720115/720116/720117/720118/720119/720120/720121/720122/720123/720124/720125/720126/720127/720128/720129/720130/720131/720132/720133/720134/720135/720136/720137/720138/720139/

In [39]:
# BMA
filename = 'forecast_results/results_AveW_BMA.pickle'
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']

with open(filename, 'rb') as file:
    y_pred = pickle.load(file)['pred_model']
    results = HA_combination(y_train, y_test, y_pred)
    for item in display:
        print(f'{item}: {results[item]}')

    cw = CW_test(y_test, results['pred_bmk'], results['pred_model'])
    print(f'CW_test: {cw}')

1/7202/7203/7204/7205/7206/7207/7208/7209/72010/72011/72012/72013/72014/72015/72016/72017/72018/72019/72020/72021/72022/72023/72024/72025/72026/72027/72028/72029/72030/72031/72032/72033/72034/72035/72036/72037/72038/72039/72040/72041/72042/72043/72044/72045/72046/72047/72048/72049/72050/72051/72052/72053/72054/72055/72056/72057/72058/72059/72060/72061/72062/72063/72064/72065/72066/72067/72068/72069/72070/72071/72072/72073/72074/72075/72076/72077/72078/72079/72080/72081/72082/72083/72084/72085/72086/72087/72088/72089/72090/72091/72092/72093/72094/72095/72096/72097/72098/72099/720100/720101/720102/720103/720104/720105/720106/720107/720108/720109/720110/720111/720112/720113/720114/720115/720116/720117/720118/720119/720120/720121/720122/720123/720124/720125/720126/720127/720128/720129/720130/720131/720132/720133/720134/720135/720136/720137/720138/720139/

In [471]:
# MMA
filename = 'forecast_results/results_AveW_MMA.pickle'
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']

with open(filename, 'rb') as file:
    y_pred = pickle.load(file)['pred_model']
    results = HA_combination(y_train, y_test, y_pred, delta=0.5)
    for item in display:
        print(f'{item}: {results[item]}')

    cw = CW_test(y_test, results['pred_bmk'], results['pred_model'])
    print(f'CW_test: {cw}')

1/7202/7203/7204/7205/7206/7207/7208/7209/72010/72011/72012/72013/72014/72015/72016/72017/72018/72019/72020/72021/72022/72023/72024/72025/72026/72027/72028/72029/72030/72031/72032/72033/72034/72035/72036/72037/72038/72039/72040/72041/72042/72043/72044/72045/72046/72047/72048/72049/72050/72051/72052/72053/72054/72055/72056/72057/72058/72059/72060/72061/72062/72063/72064/72065/72066/72067/72068/72069/72070/72071/72072/72073/72074/72075/72076/72077/72078/72079/72080/72081/72082/72083/72084/72085/72086/72087/72088/72089/72090/72091/72092/72093/72094/72095/72096/72097/72098/72099/720100/720101/720102/720103/720104/720105/720106/720107/720108/720109/720110/720111/720112/720113/720114/720115/720116/720117/720118/720119/720120/720121/720122/720123/720124/720125/720126/720127/720128/720129/720130/720131/720132/720133/720134/720135/720136/720137/720138/720139/

### BMA

In [10]:
class BlankModel:
    """
    A blank forecast model. Used to ensure every subclass has
    all the required functions.
    """
    
    def __init__(self):
        self.fitted = False
        self.n_predictors = None

    ### -------------------------- ###
    
    def _check_fit_args(self, X_train, y_train):
        if self.fitted:
            raise Exception('Model is already fitted')
        if len(X_train.shape) != 2:
            raise ValueError('X_train has to be 2D array')
        if len(y_train.shape) != 1:
            raise ValueError('y_train has to be 1D array')
        if X_train.shape[0] != y_train.shape[0]:
            raise ValueError('X_train and y_train have different numbers of rows')
        if X_train.shape[1] <= 0:
            raise ValueError('X_train contains no predictors')
            
    def _check_predict_args(self, X_pred):
        if not self.fitted:
            raise Exception('Model is not fitted')
        if len(X_pred.shape) != 2:
            raise ValueError('X_pred has to be 2D array')
        if X_pred.shape[1] != self.n_predictors:
            raise ValueError('''X_pred does not contain the same number of predictors as
                             training data''')
        
    ### -------------------------- ###
        
    def fit(self, X_train, y_train):
        raise NotImplementedError()

    def predict(self, X_pred):
        raise NotImplementedError()
        

In [232]:
class BMA(BlankModel):
    """
    Object to produce 1-step ahead forecasts using 
    Bayesian Model Averaging
    """
    
    def __init__(self):
        super().__init__()

    def fit(self, X_train, y_train):
        self._check_fit_args(X_train, y_train)
        self.n_predictors = X.shape[1]
        self.fitted = True
    
    def predict(self, X_pred):
        self._check_predict_args(X_pred)


In [233]:
import itertools
from sklearn.linear_model import LinearRegression, LassoCV, ElasticNetCV

class BMA(BlankModel):
    """
    Object to produce 1-step ahead forecasts using 
    Bayesian Model Averaging
    """
    
    def __init__(self):
        super().__init__()

    ### -------------------------- ###

    def _BIC(self, ls_model, X_train, y_train):
        T = X_train.shape[0]
        N = X_train.shape[1]
        fitted_y = ls_model.predict(X_train)
        resid = y_train - fitted_y
        sigma = np.var(resid, ddof=0)
        
        return T * np.log(sigma) + np.log(T) * N
    
    ### -------------------------- ###
    
    def fit(self, X_train, y_train):
        self._check_fit_args(X_train, y_train)
        self.n_predictors = X.shape[1]
        self.fitted = True
        
        # compute all combinations
        indices = list(range(0, self.n_predictors))
        self.combinations = []
        for n in range(1, self.n_predictors+1):
            self.combinations.extend(list(itertools.combinations(indices, n)))
            
        n_models = len(self.combinations)
            
        # compute linear models and bic
        ls_models = np.ndarray((n_models,), LinearRegression)
        ls_bic = np.zeros((n_models,))
        for i, combination in enumerate(self.combinations):
            ls_models[i] = LinearRegression(fit_intercept=False).fit(X_train[:,combination], y_train)
    
        def calc_bic(combinations, X_train, y_train):
            ls_bic = np.zeros((n_models,))
            for i, combination in enumerate(combinations):
                ls_bic[i] = self._BIC(ls_models[i], X_train[:,combination], y_train)
            return ls_bic
        
        ls_bic = calc_bic(self.combinations, X_train, y_train)
            
        self.ls_models = ls_models
        self.ls_bic = ls_bic
        
        def calculate_BMA_weights(ls_bic):
            weights_BMA = np.zeros((n_models,))
            for i in range(n_models):
                bic_i = ls_bic[i]
                denom = np.sum(np.exp(0.5 * (bic_i - ls_bic)))
                weights_BMA[i] = 1 / denom
                
            return weights_BMA
        
        self.weights_BMA = calculate_BMA_weights(self.ls_bic)
        
        return self
    
    def predict(self, X_pred):
        self._check_predict_args(X_pred)
        
        prediction = np.zeros((1,))
        n_models = len(self.ls_models)
        
        for i, combination in enumerate(self.combinations):
            prediction += self.weights_BMA[i] * self.ls_models[i].predict(X_pred[[0],combination].reshape(1,-1))
        
        return prediction

start = time.time()
m = BMA()
print(m.fit(X_train, y_train))
print(m.predict(X_test[[0],:]))
end = time.time()

print(end-start)

<__main__.BMA object at 0x000002475E950370>
[0.00991009]
2.3279483318328857


In [20]:
filename = 'forecast_results/results_AveW_BMA.pickle'
with open(filename, 'rb') as file:
    results = pickle.load(file)
    print(CW_test(y_test, results['pred_bmk'], results['pred_model']))

0.36536150945164503


In [17]:
results_expw_BMA = expanding_window_parallel(X_train, X_test[0:], y_train, y_test[0:], 
                    BMA, model_args=None, verbosity=100)
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']
for item in display:
    print(f'{item}: {results_expw_BMA[item]}')
    
with open('forecast_results/results_expw_BMA.pickle', 'wb') as file:
    pickle.dump(results_expw_BMA, file)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:    5.1s
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    5.1s
[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:    5.1s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:    5.2s
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    8.0s
[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:    8.1s
[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:    8.1s
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    8.2s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:   11.0s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:   11.0s
[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:   11.1s
[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:   11.1s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:   14.5s
[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:   14.6s
[Parallel(n_jobs=-1)]: Done  15 tasks      | elapsed:   

[Parallel(n_jobs=-1)]: Done 132 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 133 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 134 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 135 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 137 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 138 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 139 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 140 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 141 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 142 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 143 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 144 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 145 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 147 tasks      | elapsed:  2.4min
[Paralle

[Parallel(n_jobs=-1)]: Done 265 tasks      | elapsed:  4.5min
[Parallel(n_jobs=-1)]: Done 266 tasks      | elapsed:  4.5min
[Parallel(n_jobs=-1)]: Done 267 tasks      | elapsed:  4.5min
[Parallel(n_jobs=-1)]: Done 268 tasks      | elapsed:  4.5min
[Parallel(n_jobs=-1)]: Done 269 tasks      | elapsed:  4.5min
[Parallel(n_jobs=-1)]: Done 270 tasks      | elapsed:  4.6min
[Parallel(n_jobs=-1)]: Done 271 tasks      | elapsed:  4.6min
[Parallel(n_jobs=-1)]: Done 272 tasks      | elapsed:  4.6min
[Parallel(n_jobs=-1)]: Done 273 tasks      | elapsed:  4.6min
[Parallel(n_jobs=-1)]: Done 274 tasks      | elapsed:  4.6min
[Parallel(n_jobs=-1)]: Done 275 tasks      | elapsed:  4.7min
[Parallel(n_jobs=-1)]: Done 276 tasks      | elapsed:  4.7min
[Parallel(n_jobs=-1)]: Done 277 tasks      | elapsed:  4.7min
[Parallel(n_jobs=-1)]: Done 278 tasks      | elapsed:  4.7min
[Parallel(n_jobs=-1)]: Done 279 tasks      | elapsed:  4.7min
[Parallel(n_jobs=-1)]: Done 280 tasks      | elapsed:  4.7min
[Paralle

[Parallel(n_jobs=-1)]: Done 398 tasks      | elapsed:  7.0min
[Parallel(n_jobs=-1)]: Done 399 tasks      | elapsed:  7.0min
[Parallel(n_jobs=-1)]: Done 400 tasks      | elapsed:  7.0min
[Parallel(n_jobs=-1)]: Done 401 tasks      | elapsed:  7.0min
[Parallel(n_jobs=-1)]: Done 402 tasks      | elapsed:  7.0min
[Parallel(n_jobs=-1)]: Done 403 tasks      | elapsed:  7.1min
[Parallel(n_jobs=-1)]: Done 404 tasks      | elapsed:  7.1min
[Parallel(n_jobs=-1)]: Done 405 tasks      | elapsed:  7.1min
[Parallel(n_jobs=-1)]: Done 406 tasks      | elapsed:  7.1min
[Parallel(n_jobs=-1)]: Done 407 tasks      | elapsed:  7.1min
[Parallel(n_jobs=-1)]: Done 408 tasks      | elapsed:  7.2min
[Parallel(n_jobs=-1)]: Done 409 tasks      | elapsed:  7.2min
[Parallel(n_jobs=-1)]: Done 410 tasks      | elapsed:  7.2min
[Parallel(n_jobs=-1)]: Done 411 tasks      | elapsed:  7.2min
[Parallel(n_jobs=-1)]: Done 412 tasks      | elapsed:  7.2min
[Parallel(n_jobs=-1)]: Done 413 tasks      | elapsed:  7.3min
[Paralle

[Parallel(n_jobs=-1)]: Done 531 tasks      | elapsed:  9.8min
[Parallel(n_jobs=-1)]: Done 532 tasks      | elapsed:  9.9min
[Parallel(n_jobs=-1)]: Done 533 tasks      | elapsed:  9.9min
[Parallel(n_jobs=-1)]: Done 534 tasks      | elapsed:  9.9min
[Parallel(n_jobs=-1)]: Done 535 tasks      | elapsed:  9.9min
[Parallel(n_jobs=-1)]: Done 536 tasks      | elapsed: 10.0min
[Parallel(n_jobs=-1)]: Done 537 tasks      | elapsed: 10.0min
[Parallel(n_jobs=-1)]: Done 538 tasks      | elapsed: 10.0min
[Parallel(n_jobs=-1)]: Done 539 tasks      | elapsed: 10.0min
[Parallel(n_jobs=-1)]: Done 540 tasks      | elapsed: 10.1min
[Parallel(n_jobs=-1)]: Done 541 tasks      | elapsed: 10.1min
[Parallel(n_jobs=-1)]: Done 542 tasks      | elapsed: 10.1min
[Parallel(n_jobs=-1)]: Done 543 tasks      | elapsed: 10.1min
[Parallel(n_jobs=-1)]: Done 544 tasks      | elapsed: 10.1min
[Parallel(n_jobs=-1)]: Done 545 tasks      | elapsed: 10.2min
[Parallel(n_jobs=-1)]: Done 546 tasks      | elapsed: 10.2min
[Paralle

[Parallel(n_jobs=-1)]: Done 664 tasks      | elapsed: 12.8min
[Parallel(n_jobs=-1)]: Done 665 tasks      | elapsed: 12.8min
[Parallel(n_jobs=-1)]: Done 666 tasks      | elapsed: 12.9min
[Parallel(n_jobs=-1)]: Done 667 tasks      | elapsed: 12.9min
[Parallel(n_jobs=-1)]: Done 668 tasks      | elapsed: 12.9min
[Parallel(n_jobs=-1)]: Done 669 tasks      | elapsed: 13.0min
[Parallel(n_jobs=-1)]: Done 670 tasks      | elapsed: 13.0min
[Parallel(n_jobs=-1)]: Done 671 tasks      | elapsed: 13.0min
[Parallel(n_jobs=-1)]: Done 672 tasks      | elapsed: 13.0min
[Parallel(n_jobs=-1)]: Done 673 tasks      | elapsed: 13.0min
[Parallel(n_jobs=-1)]: Done 674 tasks      | elapsed: 13.1min
[Parallel(n_jobs=-1)]: Done 675 tasks      | elapsed: 13.1min
[Parallel(n_jobs=-1)]: Done 676 tasks      | elapsed: 13.1min
[Parallel(n_jobs=-1)]: Done 677 tasks      | elapsed: 13.1min
[Parallel(n_jobs=-1)]: Done 678 tasks      | elapsed: 13.2min
[Parallel(n_jobs=-1)]: Done 679 tasks      | elapsed: 13.2min
[Paralle

In [10]:
results_AveW_BMA = AveW_parallel(X_train, X_test[0:,:], y_train, y_test[0:], 
           BMA, m=10, w_min=240, model_args=None, verbosity=100)
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']
for item in display:
    print(f'{item}: {results_AveW_BMA[item]}')
    
with open('forecast_results/results_AveW_BMA.pickle', 'wb') as file:
    pickle.dump(results_AveW_BMA, file)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   33.5s
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   33.6s
[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:   33.8s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:   34.1s
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done  15 tasks      | elapsed:  2

[Parallel(n_jobs=-1)]: Done 132 tasks      | elapsed: 19.3min
[Parallel(n_jobs=-1)]: Done 133 tasks      | elapsed: 19.8min
[Parallel(n_jobs=-1)]: Done 134 tasks      | elapsed: 19.8min
[Parallel(n_jobs=-1)]: Done 135 tasks      | elapsed: 19.8min
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed: 19.9min
[Parallel(n_jobs=-1)]: Done 137 tasks      | elapsed: 20.4min
[Parallel(n_jobs=-1)]: Done 138 tasks      | elapsed: 20.4min
[Parallel(n_jobs=-1)]: Done 139 tasks      | elapsed: 20.4min
[Parallel(n_jobs=-1)]: Done 140 tasks      | elapsed: 20.4min
[Parallel(n_jobs=-1)]: Done 141 tasks      | elapsed: 21.0min
[Parallel(n_jobs=-1)]: Done 142 tasks      | elapsed: 21.0min
[Parallel(n_jobs=-1)]: Done 143 tasks      | elapsed: 21.0min
[Parallel(n_jobs=-1)]: Done 144 tasks      | elapsed: 21.0min
[Parallel(n_jobs=-1)]: Done 145 tasks      | elapsed: 21.6min
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed: 21.6min
[Parallel(n_jobs=-1)]: Done 147 tasks      | elapsed: 21.6min
[Paralle

[Parallel(n_jobs=-1)]: Done 265 tasks      | elapsed: 39.5min
[Parallel(n_jobs=-1)]: Done 266 tasks      | elapsed: 39.5min
[Parallel(n_jobs=-1)]: Done 267 tasks      | elapsed: 39.6min
[Parallel(n_jobs=-1)]: Done 268 tasks      | elapsed: 39.6min
[Parallel(n_jobs=-1)]: Done 269 tasks      | elapsed: 40.1min
[Parallel(n_jobs=-1)]: Done 270 tasks      | elapsed: 40.1min
[Parallel(n_jobs=-1)]: Done 271 tasks      | elapsed: 40.2min
[Parallel(n_jobs=-1)]: Done 272 tasks      | elapsed: 40.3min
[Parallel(n_jobs=-1)]: Done 273 tasks      | elapsed: 40.7min
[Parallel(n_jobs=-1)]: Done 274 tasks      | elapsed: 40.7min
[Parallel(n_jobs=-1)]: Done 275 tasks      | elapsed: 40.8min
[Parallel(n_jobs=-1)]: Done 276 tasks      | elapsed: 40.9min
[Parallel(n_jobs=-1)]: Done 277 tasks      | elapsed: 41.3min
[Parallel(n_jobs=-1)]: Done 278 tasks      | elapsed: 41.4min
[Parallel(n_jobs=-1)]: Done 279 tasks      | elapsed: 41.5min
[Parallel(n_jobs=-1)]: Done 280 tasks      | elapsed: 41.5min
[Paralle

[Parallel(n_jobs=-1)]: Done 398 tasks      | elapsed: 59.7min
[Parallel(n_jobs=-1)]: Done 399 tasks      | elapsed: 59.8min
[Parallel(n_jobs=-1)]: Done 400 tasks      | elapsed: 59.9min
[Parallel(n_jobs=-1)]: Done 401 tasks      | elapsed: 60.2min
[Parallel(n_jobs=-1)]: Done 402 tasks      | elapsed: 60.3min
[Parallel(n_jobs=-1)]: Done 403 tasks      | elapsed: 60.5min
[Parallel(n_jobs=-1)]: Done 404 tasks      | elapsed: 60.5min
[Parallel(n_jobs=-1)]: Done 405 tasks      | elapsed: 60.8min
[Parallel(n_jobs=-1)]: Done 406 tasks      | elapsed: 60.9min
[Parallel(n_jobs=-1)]: Done 407 tasks      | elapsed: 61.1min
[Parallel(n_jobs=-1)]: Done 408 tasks      | elapsed: 61.1min
[Parallel(n_jobs=-1)]: Done 409 tasks      | elapsed: 61.5min
[Parallel(n_jobs=-1)]: Done 410 tasks      | elapsed: 61.5min
[Parallel(n_jobs=-1)]: Done 411 tasks      | elapsed: 61.7min
[Parallel(n_jobs=-1)]: Done 412 tasks      | elapsed: 61.7min
[Parallel(n_jobs=-1)]: Done 413 tasks      | elapsed: 62.1min
[Paralle

[Parallel(n_jobs=-1)]: Done 531 tasks      | elapsed: 80.3min
[Parallel(n_jobs=-1)]: Done 532 tasks      | elapsed: 80.3min
[Parallel(n_jobs=-1)]: Done 533 tasks      | elapsed: 80.7min
[Parallel(n_jobs=-1)]: Done 534 tasks      | elapsed: 80.7min
[Parallel(n_jobs=-1)]: Done 535 tasks      | elapsed: 80.9min
[Parallel(n_jobs=-1)]: Done 536 tasks      | elapsed: 81.0min
[Parallel(n_jobs=-1)]: Done 537 tasks      | elapsed: 81.3min
[Parallel(n_jobs=-1)]: Done 538 tasks      | elapsed: 81.3min
[Parallel(n_jobs=-1)]: Done 539 tasks      | elapsed: 81.6min
[Parallel(n_jobs=-1)]: Done 540 tasks      | elapsed: 81.6min
[Parallel(n_jobs=-1)]: Done 541 tasks      | elapsed: 81.9min
[Parallel(n_jobs=-1)]: Done 542 tasks      | elapsed: 82.0min
[Parallel(n_jobs=-1)]: Done 543 tasks      | elapsed: 82.2min
[Parallel(n_jobs=-1)]: Done 544 tasks      | elapsed: 82.2min
[Parallel(n_jobs=-1)]: Done 545 tasks      | elapsed: 82.6min
[Parallel(n_jobs=-1)]: Done 546 tasks      | elapsed: 82.6min
[Paralle

[Parallel(n_jobs=-1)]: Done 664 tasks      | elapsed: 101.0min
[Parallel(n_jobs=-1)]: Done 665 tasks      | elapsed: 101.3min
[Parallel(n_jobs=-1)]: Done 666 tasks      | elapsed: 101.3min
[Parallel(n_jobs=-1)]: Done 667 tasks      | elapsed: 101.6min
[Parallel(n_jobs=-1)]: Done 668 tasks      | elapsed: 101.7min
[Parallel(n_jobs=-1)]: Done 669 tasks      | elapsed: 101.9min
[Parallel(n_jobs=-1)]: Done 670 tasks      | elapsed: 102.0min
[Parallel(n_jobs=-1)]: Done 671 tasks      | elapsed: 102.3min
[Parallel(n_jobs=-1)]: Done 672 tasks      | elapsed: 102.3min
[Parallel(n_jobs=-1)]: Done 673 tasks      | elapsed: 102.6min
[Parallel(n_jobs=-1)]: Done 674 tasks      | elapsed: 102.6min
[Parallel(n_jobs=-1)]: Done 675 tasks      | elapsed: 102.9min
[Parallel(n_jobs=-1)]: Done 676 tasks      | elapsed: 102.9min
[Parallel(n_jobs=-1)]: Done 677 tasks      | elapsed: 103.2min
[Parallel(n_jobs=-1)]: Done 678 tasks      | elapsed: 103.2min
[Parallel(n_jobs=-1)]: Done 679 tasks      | elapsed: 1

### MMA

In [234]:
### DONE UP TO VARIABLE SELECTION PART ###

import random
from scipy import optimize
from sklearn.linear_model import LinearRegression, LassoCV
from sklearn.model_selection import TimeSeriesSplit

class MMA(BlankModel):
    """
    Object to produce 1-step ahead forecasts using 
    Mallows Model Averaging
    """
    
    def __init__(self):
        super().__init__()

    def fit(self, X_train, y_train):
        self._check_fit_args(X_train, y_train)
        self.n_predictors = X_train.shape[1]
        self.fitted = True
        
        n_train = X_train.shape[0]
        
        ### VARIABLE SELECTION ###
        
        # use lasso to see which regressors will be included
        cv = TimeSeriesSplit(n_splits=5)
        lasso = LassoCV(cv=cv).fit(X_train, y_train)
        included = lasso.coef_ > 0
    
        # sort variables based on lasso beta values
        # assumes input data have been standardized
        lasso_selected = []
        
        for i in range(len(included)):
            if included[i]:
                lasso_selected.append((i, lasso.coef_[i]))
        
        lasso_selected = sorted(lasso_selected, key=lambda lst: lst[1], reverse=True)
        lasso_selected = [item[0] for item in lasso_selected]
        
        # randomly sort zero-coeff regressors
        leftover = set(range(0,self.n_predictors)).difference(set(lasso_selected))
        leftover = list(leftover)
        random.shuffle(leftover)
        
        # final ordering of nested models
        lasso_selected.extend(leftover)
        self.order = np.array(lasso_selected)
        self.model_regresors = []
        for i in range(self.n_predictors+1):
            self.model_regresors.append(self.order[:i])
        
        ### MMA PART ###
        
        return self.model_regresors
    
    def predict(self, X_pred):
        self._check_predict_args(X_pred)

start = time.time()
m = MMA()
print(m.fit(X_train, y_train))
print(m.predict(X_test[[0],:]))
end = time.time()

print(end-start)

[array([], dtype=int32), array([2]), array([2, 9]), array([2, 9, 7]), array([2, 9, 7, 3]), array([2, 9, 7, 3, 1]), array([ 2,  9,  7,  3,  1, 10]), array([ 2,  9,  7,  3,  1, 10,  0]), array([ 2,  9,  7,  3,  1, 10,  0,  8]), array([ 2,  9,  7,  3,  1, 10,  0,  8,  6]), array([ 2,  9,  7,  3,  1, 10,  0,  8,  6,  4]), array([ 2,  9,  7,  3,  1, 10,  0,  8,  6,  4,  5]), array([ 2,  9,  7,  3,  1, 10,  0,  8,  6,  4,  5, 11])]
None
0.1339890956878662


In [None]:
### DONE UP TO VARIABLE SELECTION PART ###

import random
from scipy import optimize
from sklearn.linear_model import LinearRegression, LassoCV
from sklearn.model_selection import TimeSeriesSplit

class MMA(BlankModel):
    """
    Object to produce 1-step ahead forecasts using 
    Mallows Model Averaging
    """
    
    def __init__(self):
        super().__init__()

    def fit(self, X_train, y_train):
        self._check_fit_args(X_train, y_train)
        self.n_predictors = X_train.shape[1]
        self.fitted = True
        
        n_train = X_train.shape[0]
        
        ### VARIABLE SELECTION ###
        
        # use lasso to see which regressors will be included
        cv = TimeSeriesSplit(n_splits=5)
        lasso = LassoCV(cv=cv).fit(X_train, y_train)
        included = lasso.coef_ > 0
    
        # sort variables based on lasso beta values
        # assumes input data have been standardized
        lasso_selected = []
        
        for i in range(len(included)):
            if included[i]:
                lasso_selected.append((i, lasso.coef_[i]))
        
        lasso_selected = sorted(lasso_selected, key=lambda lst: lst[1], reverse=True)
        lasso_selected = [item[0] for item in lasso_selected]
        
        # randomly sort zero-coeff regressors
        leftover = set(range(0,self.n_predictors)).difference(set(lasso_selected))
        leftover = list(leftover)
        random.shuffle(leftover)
        
        # final ordering of nested models
        lasso_selected.extend(leftover)
        self.order = np.array(lasso_selected)
        self.model_regressors = []
        for i in range(self.n_predictors+1):
            self.model_regressors.append(self.order[:i])
        
        s = np.zeros((self.n_predictors+1, self.n_predictors))
        for i in range(self.n_predictors+1):
            regr = self.model_regressors[i]
            for j in regr:
                s[i,j] = 1
        
        ### MMA PART ###
        y_train = y_train.reshape(-1,1)
        n, p = X_train.shape
        m = s.shape[0]
        bbeta = np.zeros((p,m))

        for j in range(m):
            #print(j+1)
            #print('')
            ss = np.ones((n,1)) @ s[[j],:] > 0;

            xs = X_train[ss]
            xs = xs.reshape(n,int(len(xs)/n))
            betas = np.linalg.lstsq(xs.T @ xs, xs.T @ y_train)[0]

            sj = (s[[j],:] > 0) * 1

            #print(sj)
            #print(sj.shape)
            #print(betas.shape)
            #print(bbeta[sj,j].shape)
            #print('')
            
            #bbeta[sj,j][:,0:j] = betas.flatten()
            
            #bbeta[sj.reshape(-1,1)] = betas
        
        ee = y_train @ np.ones((1,m)) - X_train @ bbeta
        
        return
    
    def predict(self, X_pred):
        self._check_predict_args(X_pred)

start = time.time()
m = MMA()
print(m.fit(X_train, y_train))
print(m.predict(X_test[[0],:]))
end = time.time()

print(end-start)

In [450]:
### DONE UP TO VARIABLE SELECTION PART ###

import random
import cvxopt
from scipy import optimize
from sklearn.linear_model import LinearRegression, LassoCV
from sklearn.model_selection import TimeSeriesSplit
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

class MMA(BlankModel):
    """
    Object to produce 1-step ahead forecasts using 
    Mallows Model Averaging
    """
    
    def __init__(self):
        super().__init__()

    def fit(self, X_train, y_train):
        self._check_fit_args(X_train, y_train)
        self.n_predictors = X_train.shape[1]
        self.fitted = True
        
        n_train = X_train.shape[0]
        
        ### VARIABLE SELECTION ###
        
        # use lasso to see which regressors will be included
        cv = TimeSeriesSplit(n_splits=5)
        lasso = LassoCV(cv=cv).fit(X_train, y_train)
        included = lasso.coef_ > 0
    
        # sort variables based on lasso beta values
        # assumes input data have been standardized
        lasso_selected = []
        
        for i in range(len(included)):
            if included[i]:
                lasso_selected.append((i, lasso.coef_[i]))
        
        lasso_selected = sorted(lasso_selected, key=lambda lst: lst[1], reverse=True)
        lasso_selected = [item[0] for item in lasso_selected]
        
        # randomly sort zero-coeff regressors
        leftover = set(range(0,self.n_predictors)).difference(set(lasso_selected))
        leftover = list(leftover)
        random.shuffle(leftover)
        
        # final ordering of nested models
        lasso_selected.extend(leftover)
        self.order = np.array(lasso_selected)
        self.model_regressors = []
        for i in range(self.n_predictors+1):
            self.model_regressors.append(self.order[:i])
        
        s = np.zeros((self.n_predictors+1, self.n_predictors))
        for i in range(self.n_predictors+1):
            regr = self.model_regressors[i]
            for j in regr:
                s[i,j] = 1
        
        ### MMA PART ###
        y_train = y_train.reshape(-1,1)
        n, p = X_train.shape
        m = s.shape[0]
        bbeta = np.zeros((p,m))

        for j in range(m):
            #print(j+1)
            #print('')
            ss = np.ones((n,1)) @ s[[j],:] > 0;

            xs = X_train[ss]
            xs = xs.reshape(n,int(len(xs)/n))
            betas = np.linalg.lstsq(xs.T @ xs, xs.T @ y_train, rcond=None)[0]
            
            for i, num in enumerate(betas):
                bbeta[i,j] = num

            sj = (s[[j],:] > 0) * 1

            #print(sj)
            #print(sj.shape)
            #print(betas.shape)
            #print(bbeta[sj,j].shape)
            #print('')
            
            #bbeta[sj,j][:,0:j] = betas.flatten()
            
            #bbeta[sj.reshape(-1,1)] = betas
        
        ee = y_train @ np.ones((1,m)) - X_train @ bbeta
        ehat = y_train - X_train @ bbeta[:,[m-1]]
        sighat = (ehat.T @ ehat) / (n-p)

        a1 = ee.T  @ ee
        a2 = (np.sum(s, axis=1) * sighat).T
        
        w0 = np.ones((m,1)) / m
        
        #print(cvxopt.matrix(0, (1,1)).size)
        
        P = cvxopt.matrix(a1, a1.shape)
        q = cvxopt.matrix(a2, a2.shape)
        
        G = np.zeros((1,m))
        G = np.vstack([G, np.diag(np.ones((13,)))])
        G = np.vstack([G, -np.diag(np.ones((13,)))])
        G = cvxopt.matrix(G, G.shape, 'd')

        h = np.zeros((1,1))
        h = np.vstack([h, np.ones((m,1))])
        h = np.vstack([h, np.zeros((m,1))])
        h = cvxopt.matrix(h, h.shape, 'd')

        A = cvxopt.matrix(np.ones((1,m)), (1,m))
        b = cvxopt.matrix(1.0, (1,1))
        
        cvxopt.solvers.options['show_progress'] = False
        w = cvxopt.solvers.qp(P=P, q=q, G=G, h=h, A=A, b=b, initvals=w0)
        
        self.mallows_weight = np.array(w['x'])
        
        self.betahat = bbeta @ self.mallows_weight
        
        return self
    
    def predict(self, X_pred):
        self._check_predict_args(X_pred)
        
        return X_pred @ self.betahat

start = time.time()
m = MMA()
print(m.fit(X_train, y_train))
print(m.predict(X_test[[0],:]))
end = time.time()

print(end-start)

<__main__.MMA object at 0x0000024761171070>
[[0.01308424]]
0.10970759391784668


In [464]:
results_expw_MMA = expanding_window_parallel(X_train, X_test[0:], y_train, y_test[0:], MMA, model_args=None)
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']
for item in display:
    print(f'{item}: {res[item]}')
    
with open('forecast_results/results_expw_MMA.pickle', 'wb') as file:
    pickle.dump(results_expw_MMA, file)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    3.2s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    3.4s
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    3.8s
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    4.2s
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:    4.8s
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    5.3s
[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed:    5.9s
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    6.5s
[Parallel(n_jobs=-1)]: Done  77 tasks      | elapsed:    7.2s
[Parallel(n_jobs=-1)]: Done  90 tasks      | elapsed:    7.9s
[Parallel(n_jobs=-1)]: Done 105 tasks      | elapsed:    8.7s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:    9.5s
[Parallel(n_jobs=-1)]: Done 137 tasks      | elapsed:   10.4s
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:   11.3s
[Parallel(n_jobs=-1)]: Done 173 tasks      | elapsed:   

out-of-sample R2: -0.01979177369484053
MSPE_bmk: 0.0017631590057930802
MSPE_model: 0.001798055049823757
elapsed time: 328.19082140922546


[Parallel(n_jobs=-1)]: Done 720 out of 720 | elapsed:   38.3s finished


In [465]:
results_AveW_MMA = AveW_parallel(X_train, X_test[0:,:], y_train, y_test[0:], 
                                     MMA, m=10, w_min=240, model_args=None)
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']
for item in display:
    print(f'{item}: {res[item]}')
    
with open('forecast_results/results_AveW_MMA.pickle', 'wb') as file:
    pickle.dump(results_AveW_MMA, file)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    3.8s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    5.6s
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    9.2s
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:   11.2s
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:   16.2s
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   19.6s
[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed:   24.5s
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:   28.1s
[Parallel(n_jobs=-1)]: Done  77 tasks      | elapsed:   34.4s
[Parallel(n_jobs=-1)]: Done  90 tasks      | elapsed:   39.3s
[Parallel(n_jobs=-1)]: Done 105 tasks      | elapsed:   45.6s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:   50.9s
[Parallel(n_jobs=-1)]: Done 137 tasks      | elapsed:   58.9s
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 173 tasks      | elapsed:  1

out-of-sample R2: -0.01979177369484053
MSPE_bmk: 0.0017631590057930802
MSPE_model: 0.001798055049823757
elapsed time: 328.19082140922546


[Parallel(n_jobs=-1)]: Done 720 out of 720 | elapsed:  5.1min finished


### JMA

In [29]:
### DONE UP TO VARIABLE SELECTION PART ###

import random
import cvxopt
from scipy import optimize
from sklearn.linear_model import LinearRegression, LassoCV
from sklearn.model_selection import TimeSeriesSplit
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

class JMA(BlankModel):
    """
    Object to produce 1-step ahead forecasts using 
    Jackknife Model Averaging
    """
    
    def __init__(self):
        super().__init__()

    def fit(self, X_train, y_train):
        self._check_fit_args(X_train, y_train)
        self.n_predictors = X_train.shape[1]
        self.fitted = True
        
        n_train = X_train.shape[0]
        
        ### VARIABLE SELECTION ###
        
        # use lasso to see which regressors will be included
        cv = TimeSeriesSplit(n_splits=5)
        lasso = LassoCV(cv=cv).fit(X_train, y_train)
        included = lasso.coef_ > 0
    
        # sort variables based on lasso beta values
        # assumes input data have been standardized
        lasso_selected = []
        
        for i in range(len(included)):
            if included[i]:
                lasso_selected.append((i, lasso.coef_[i]))
        
        lasso_selected = sorted(lasso_selected, key=lambda lst: lst[1], reverse=True)
        lasso_selected = [item[0] for item in lasso_selected]
        
        # randomly sort zero-coeff regressors
        leftover = set(range(0,self.n_predictors)).difference(set(lasso_selected))
        leftover = list(leftover)
        random.shuffle(leftover)
        
        # final ordering of nested models
        lasso_selected.extend(leftover)
        self.order = np.array(lasso_selected)
        self.model_regressors = []
        for i in range(self.n_predictors+1):
            self.model_regressors.append(self.order[:i])
        
        s = np.zeros((self.n_predictors+1, self.n_predictors))
        for i in range(self.n_predictors+1):
            regr = self.model_regressors[i]
            for j in regr:
                s[i,j] = 1
        
        ### JMA PART ###
        y_train = y_train.reshape(-1,1)
        n, p = X_train.shape
        m = s.shape[0]
        bbeta = np.zeros((p,m))
        
        ee = np.zeros((n,m))
        
        for j in range(m):
            ss = np.ones((n,1)) @ s[[j],:] > 0;
            xs = X_train[ss]
            xs = xs.reshape(n,int(len(xs)/n))
            betas = np.linalg.lstsq(xs.T @ xs, xs.T @ y_train, rcond=None)[0]
            
            for i, num in enumerate(betas):
                bbeta[i,j] = num

            sj = (s[[j],:] > 0) * 1

            ei = y_train - xs @ betas
            temp = np.linalg.lstsq(xs.T, (xs.T @ xs).T, rcond=None)[0] @ xs.T
            hi = np.diag(temp).reshape(-1,1)
            ee[:,[j]] = ei * (1 / (1-hi))
            

        a1 = ee.T  @ ee
        a2 = np.zeros((m,1))
        
        w0 = np.ones((m,1)) / m
        
        P = cvxopt.matrix(a1, a1.shape)
        q = cvxopt.matrix(a2, a2.shape)
        
        G = np.zeros((1,m))
        G = np.vstack([G, np.diag(np.ones((13,)))])
        G = np.vstack([G, -np.diag(np.ones((13,)))])
        G = cvxopt.matrix(G, G.shape, 'd')

        h = np.zeros((1,1))
        h = np.vstack([h, np.ones((m,1))])
        h = np.vstack([h, np.zeros((m,1))])
        h = cvxopt.matrix(h, h.shape, 'd')

        A = cvxopt.matrix(np.ones((1,m)), (1,m))
        b = cvxopt.matrix(1.0, (1,1))
        
        cvxopt.solvers.options['show_progress'] = False
        w = cvxopt.solvers.qp(P=P, q=q, G=G, h=h, A=A, b=b, initvals=w0)
        
        self.mallows_weight = np.array(w['x'])
        
        self.betahat = bbeta @ self.mallows_weight
        
        return self
    
    def predict(self, X_pred):
        self._check_predict_args(X_pred)
        
        return X_pred @ self.betahat

start = time.time()
m = JMA()
print(m.fit(X_train, y_train))
print(m.predict(X_test[[0],:]))
end = time.time()

print(end-start)

<__main__.JMA object at 0x000001F0873FFE80>
[[0.03712898]]
0.17511892318725586


In [34]:
results_expw_JMA = expanding_window_parallel(X_train, X_test[0:], y_train, y_test[0:], JMA, model_args=None)
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']
for item in display:
    print(f'{item}: {results_expw_JMA[item]}')
    
with open('forecast_results/results_expw_JMA.pickle', 'wb') as file:
    pickle.dump(results_expw_JMA, file)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    2.0s
[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed:    2.6s
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    3.0s
[Parallel(n_jobs=-1)]: Done  77 tasks      | elapsed:    3.7s
[Parallel(n_jobs=-1)]: Done  90 tasks      | elapsed:    4.3s
[Parallel(n_jobs=-1)]: Done 105 tasks      | elapsed:    5.1s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:    5.8s
[Parallel(n_jobs=-1)]: Done 137 tasks      | elapsed:    6.8s
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:    7.7s
[Parallel(n_jobs=-1)]: Done 173 tasks      | elapsed:   

out-of-sample R2: -0.05943942249801881
MSPE_bmk: 0.0017631590057930802
MSPE_model: 0.0018679601588696018
elapsed time: 38.96409869194031


[Parallel(n_jobs=-1)]: Done 720 out of 720 | elapsed:   38.9s finished


In [35]:
results_AveW_JMA = AveW_parallel(X_train, X_test[0:,:], y_train, y_test[0:], 
                                     JMA, m=10, w_min=240, model_args=None)
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']
for item in display:
    print(f'{item}: {results_AveW_JMA[item]}')
    
with open('forecast_results/results_AveW_JMA.pickle', 'wb') as file:
    pickle.dump(results_AveW_JMA, file)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    2.6s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    4.0s
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    7.0s
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    8.8s
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:   12.7s
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   15.5s
[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed:   19.8s
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:   22.9s
[Parallel(n_jobs=-1)]: Done  77 tasks      | elapsed:   28.3s
[Parallel(n_jobs=-1)]: Done  90 tasks      | elapsed:   32.7s
[Parallel(n_jobs=-1)]: Done 105 tasks      | elapsed:   38.0s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:   42.8s
[Parallel(n_jobs=-1)]: Done 137 tasks      | elapsed:   49.7s
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:   55.8s
[Parallel(n_jobs=-1)]: Done 173 tasks      | elapsed:  1

out-of-sample R2: -0.015709737440605887
MSPE_bmk: 0.0017631590057930802
MSPE_model: 0.0017908577708401294
elapsed time: 300.6286463737488


[Parallel(n_jobs=-1)]: Done 720 out of 720 | elapsed:  5.0min finished
