In [16]:
# standard imports
import time
import pickle

# external imports
import pandas as pd
import numpy as np
import sklearn as sk
from sklearn import metrics

# own imports
from evaluation import R2OS


In [2]:
# loading Goyal data
GoyalData_updated = pd.read_csv("GoyalData_2021.csv", thousands=',')

# starting and ending periods
start = 192612
end = 201612

# keep only necessary time periods
start_index = GoyalData_updated[GoyalData_updated['yyyymm'] == start].index[0]
end_index = GoyalData_updated[GoyalData_updated['yyyymm'] == end].index[0] + 1

data = GoyalData_updated

TRAIN_START = 192612
TRAIN_END = 195612
TEST_START = 195701
TEST_END = 201612

IND_TRAIN_START = data[data['yyyymm'] == TRAIN_START].index[0]
IND_TRAIN_END = data[data['yyyymm'] == TRAIN_END].index[0] + 1
IND_TEST_START = data[data['yyyymm'] == TEST_START].index[0]
IND_TEST_END = data[data['yyyymm'] == TEST_END].index[0] + 1

data

Unnamed: 0,yyyymm,Index,D12,E12,b/m,tbl,AAA,BAA,lty,ntis,Rfree,infl,ltr,corpr,svar,csp,CRSP_SPvw,CRSP_SPvwx
0,187101,4.44,0.2600,0.4000,,,,,,,,,,,,,,
1,187102,4.50,0.2600,0.4000,,,,,,,0.004967,,,,,,,
2,187103,4.61,0.2600,0.4000,,,,,,,0.004525,,,,,,,
3,187104,4.74,0.2600,0.4000,,,,,,,0.004252,,,,,,,
4,187105,4.86,0.2600,0.4000,,,,,,,0.004643,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1807,202108,4522.68,58.7913,169.8333,0.184756,0.0005,0.0255,0.0324,0.0128,0.014846,0.000000,0.002066,-0.0035,-0.0045,0.000602,,0.030600,0.029205
1808,202109,4307.54,59.2545,175.3700,0.193036,0.0004,0.0253,0.0323,0.0137,0.015598,0.000000,0.002716,-0.0250,-0.0194,0.001393,,-0.046076,-0.047152
1809,202110,4605.38,59.6354,182.8600,0.182389,0.0005,0.0268,0.0335,0.0158,0.013368,0.000000,0.008308,0.0051,0.0159,0.001151,,0.070510,0.069627
1810,202111,4567.00,60.0162,190.3500,0.189455,0.0005,0.0262,0.0328,0.0156,0.015640,0.000100,0.004913,0.0210,0.0094,0.001327,,-0.007256,-0.008665


In [3]:
data_stocks = data.copy()[['yyyymm']]

data_stocks['EQPREM_+1'] = data.loc[:,'CRSP_SPvw'].shift(-1)

data_stocks['DP'] = np.log(data['D12'] / data['Index'])

data_stocks['DY'] = np.log(data['D12'] / data['Index'].shift(1))

data_stocks['DE'] = np.log(data['D12'] / data['E12'])

data_stocks['EP'] = np.log(data['E12'] / data['Index'])

data_stocks['SVAR'] = data['svar']

data_stocks['BM'] = data['b/m']

data_stocks['NTIS'] = data['ntis']

data_stocks['TBL'] = data['tbl']

data_stocks['LTY'] = data['lty']

data_stocks['LTR'] = data['ltr']

data_stocks['TMS'] = data['lty'] - data['tbl']

data_stocks['DFY'] = data['BAA'] - data['AAA']

data_stocks['DFR'] = data['corpr'] - data['ltr']

data_stocks['INFL'] = data['infl']

data_stocks = data_stocks.drop(columns=['LTY', 'DE'])

data_stocks.iloc[IND_TRAIN_START:IND_TEST_END,:].to_csv('data_e.csv', index=False)

data_stocks

Unnamed: 0,yyyymm,EQPREM_+1,DP,DY,EP,SVAR,BM,NTIS,TBL,LTR,TMS,DFY,DFR,INFL
0,187101,,-2.837728,,-2.406945,,,,,,,,,
1,187102,,-2.851151,-2.837728,-2.420368,,,,,,,,,
2,187103,,-2.875302,-2.851151,-2.444519,,,,,,,,,
3,187104,,-2.903111,-2.875302,-2.472328,,,,,,,,,
4,187105,,-2.928112,-2.903111,-2.497329,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1807,202108,-0.046076,-4.342866,-4.314288,-3.282043,0.000602,0.184756,0.014846,0.0005,-0.0035,0.0123,0.0069,-0.0010,0.002066
1808,202109,0.070510,-4.286281,-4.335018,-3.201224,0.001393,0.193036,0.015598,0.0004,-0.0250,0.0133,0.0070,0.0056,0.002716
1809,202110,-0.007256,-4.346731,-4.279873,-3.226260,0.001151,0.182389,0.013368,0.0005,0.0051,0.0153,0.0067,0.0108,0.008308
1810,202111,0.043485,-4.331997,-4.340366,-3.177747,0.001327,0.189455,0.015640,0.0005,0.0210,0.0151,0.0066,-0.0116,0.004913


In [4]:
from sklearn.preprocessing import StandardScaler

X = np.array(data_stocks.drop(columns=['yyyymm', 'EQPREM_+1']).iloc[IND_TRAIN_START:IND_TEST_END-1,:])
y = np.array(data_stocks.loc[:,['EQPREM_+1']].iloc[IND_TRAIN_START:IND_TEST_END-1,:]).flatten()

scaler = StandardScaler().fit(X)
X_scaled = scaler.transform(X)

X_scaled_train = X_scaled[0:360,:]
X_scaled_test = X_scaled[360:1080,:]
X_train = X[0:360,:]
X_test = X[360:1080,:]
y_train = y[0:360]
y_test = y[360:1080]


In [5]:
# historical average - base model
bmk = np.mean(y_train)
pred_bmk = np.array([bmk] * y_test.shape[0])

MSPE_bmk = sk.metrics.mean_squared_error(y_test, pred_bmk)
R2OS_bmk = R2OS(MSPE_bmk, MSPE_bmk)

print(MSPE_bmk, R2OS_bmk)

0.001761592109508611 0.0


### CW Test

In [6]:
### DONE WITH LEAST SQUARES T SCORE ###

import statsmodels.api as sm

def CW_test(y_test, pred_bmk, pred_model):
    f = (y_test - pred_bmk) ** 2 - ((y_test - pred_model) ** 2 - (pred_bmk - pred_model) ** 2)
    model = sm.OLS(f,np.ones((f.shape[0], 1)), hasconst=True)
    results = model.fit()

    return results.tvalues[0]


In [7]:
### DONE WITH FORMULA IN THE PAPER ###

def CW_test(y_test, pred_bmk, pred_model):
    p = y_test.shape[0]
    e_1_hat_2 = (y_test - pred_bmk) ** 2
    e_2_hat_2 = (y_test - pred_model) ** 2
    adj = (pred_bmk - pred_model) ** 2
    
    f_hat = e_1_hat_2 - (e_2_hat_2 - adj)
    f_bar = np.sum(f_hat) / p
    
    return (p ** 0.5) * f_bar / (np.var(f_hat - f_bar, ddof=1) ** 0.5)

### Expanding Window - Single Thread

In [79]:
from sklearn.linear_model import LinearRegression, LassoCV, ElasticNetCV
from sklearn.model_selection import TimeSeriesSplit
from sklearn import metrics

def expanding_window(X_train, X_test, y_train, y_test, forecast_model, model_args=None):
    """
    parameters
    -----------
    X_train: 2-dimensional numpy array, independent variables used to train the forecast model
    X_test: 2-dimensional numpy array, independent variables used for forecasting
    y_train: 1-dimensional numpy array, dependent variable used to train the forecast model
    y_test: 1-dimensional numpy array, dependent variable to forecast
    forecast_model: python object, model used for forecasting. Make this object similar to the
    sklearn models, with a fit() and predict() function
    model_args: dict, arguments to pass when initializing the model
    """
    start_time = time.time()
    if model_args is None:
        model_args = {}
    n_periods = y_test.shape[0]
    pred_model = np.zeros((n_periods,))
    pred_bmk = np.zeros((n_periods,))
    
    # run a forecast for every test period
    for i in range(n_periods):
        print(f'{i+1}/{n_periods}', end='\r')
        
        X_scaler = StandardScaler().fit(X_train)
        X = X_scaler.transform(X_train)
        y = y_train.reshape(-1,1)
        y_scaler = StandardScaler().fit(y)
        y = y_scaler.transform(y).flatten()
        
        # forecasts of model
        model = forecast_model(**model_args).fit(X, y)
        pred = model.predict(X_scaler.transform(X_test[[i],:]))
        pred_model[i] = y_scaler.inverse_transform(pred)
        
        # forecasts of historical average
        bmk = np.mean(y_train)
        pred_bmk[i] = bmk
        
        # add new row to training data
        X_train = np.vstack([X_train, X_test[[i],:]])
        y_train = np.hstack([y_train, y_test[i]])
    
    print('')
    
    MSPE_model = metrics.mean_squared_error(y_test, pred_model)
    MSPE_bmk = metrics.mean_squared_error(y_test, pred_bmk)
    R2OS_val = R2OS(MSPE_model, MSPE_bmk)
    
    end_time = time.time()
    elapsed_time = end_time - start_time

    return {'out-of-sample R2': R2OS_val, 
            'MSPE_bmk': MSPE_bmk, 
            'MSPE_model': MSPE_model, 
            'pred_bmk': pred_bmk,
            'pred_model': pred_model,
            'elapsed time': elapsed_time}


cv = TimeSeriesSplit(n_splits=5)

In [16]:
results_expw_LinearRegression = expanding_window(X_train, X_test, y_train, y_test, LinearRegression, model_args={'fit_intercept': True})
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']
for item in display:
    print(f'{item}: {results_expw_LinearRegression[item]}')

720/720
out-of-sample R2: -0.09479595117769124
MSPE_bmk: 0.0017631590057930802
MSPE_model: 0.0019302993408247479
elapsed time: 1.5191881656646729


In [17]:
print(CW_test(y_test, results_expw_LinearRegression['pred_bmk'], results_expw_LinearRegression['pred_model']))

-0.39895135944712484


In [24]:
results_expw_LassoCV = expanding_window(X_train, X_test, y_train, y_test, LassoCV, model_args={'cv': cv})
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']
for item in display:
    print(f'{item}: {results_expw_LassoCV[item]}')

720/720
out-of-sample R2: -0.00023184786504271138
MSPE_bmk: 0.0017631590057930802
MSPE_model: 0.0017635677904443044
elapsed time: 62.68646764755249


In [25]:
print(CW_test(y_test, results_expw_LassoCV['pred_bmk'], results_expw_LassoCV['pred_model']))

-1.1582439164883773


In [26]:
results_expw_ElasticNetCV = expanding_window(X_train, X_test, y_train, y_test, ElasticNetCV, model_args={'cv': cv})
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']
for item in display:
    print(f'{item}: {results_expw_ElasticNetCV[item]}')

720/720
out-of-sample R2: 1.6082268055495064e-05
MSPE_bmk: 0.0017631590057930802
MSPE_model: 0.0017631306501973247
elapsed time: 57.54194521903992


In [27]:
print(CW_test(y_test, results_expw_ElasticNetCV['pred_bmk'], results_expw_ElasticNetCV['pred_model']))

1.0780086104381943


### AveW - Single Thread

In [28]:
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)

def AveW(X_train, X_test, y_train, y_test, forecast_model, m, w_min, model_args=None):
    
    start_time = time.time()
    
    if model_args is None:
        model_args = {}
    
    n_periods = y_test.shape[0]
    pred_model = np.zeros((n_periods,))
    pred_bmk = np.zeros((n_periods,))
    
    # loop through all forecasting periods
    for j in range(n_periods):
        print(f'{j+1}/{n_periods}', end='\r')
        
        T = X_train.shape[0]
        single_preds = np.zeros((m,))

        # AveW part
        for i in range(1, m+1):
            # calculate size of window i
            w_i = int(w_min + (i - 1)/(m - 1) * (T - w_min)) - 1
            #print(w_i)
            # starting and ending index of window i
            start, end = T-1-w_i, T-1
            # print(f'start:{T-1-w_i}, end:{T-1}')

            X = X_train[start:end,:]
            X_scaler = StandardScaler().fit(X)
            X = X_scaler.transform(X)
            y = y_train[start:end].reshape(-1,1)
            y_scaler = StandardScaler().fit(y)
            y = y_scaler.transform(y).flatten()
            
            
            model = forecast_model(**model_args).fit(X, y)
            pred = model.predict(X_scaler.transform(X_test[[j],:]))
            single_preds[i-1] = y_scaler.inverse_transform(pred)
            
        pred_model[j] = np.mean(single_preds)
        
        # forecasts of historical average
        bmk = np.mean(y_train)
        pred_bmk[j] = bmk
        
        # add new row to training data
        X_train = np.vstack([X_train, X_test[[j],:]])
        y_train = np.hstack([y_train, y_test[j]])
    
    print('')
    
    MSPE_model = metrics.mean_squared_error(y_test, pred_model)
    MSPE_bmk = metrics.mean_squared_error(y_test, pred_bmk)
    R2OS_val = R2OS(MSPE_model, MSPE_bmk)
    
    end_time = time.time()
    elapsed_time = end_time - start_time

    return {'out-of-sample R2': R2OS_val, 
            'MSPE_bmk': MSPE_bmk, 
            'MSPE_model': MSPE_model, 
            'pred_bmk': pred_bmk,
            'pred_model': pred_model,
            'elapsed time': elapsed_time}

cv = TimeSeriesSplit(n_splits=5)


In [31]:
results_AveW_LinearRegression = AveW(X_train, X_test[0:,:], y_train, y_test[0:], 
                                     LinearRegression, m=10, w_min=240, model_args={'fit_intercept': True})
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']
for item in display:
    print(f'{item}: {results_AveW_LinearRegression[item]}')

720/720
out-of-sample R2: -0.04698292355780809
MSPE_bmk: 0.0017631590057930802
MSPE_model: 0.0018459973705825176
elapsed time: 10.46718978881836


In [34]:
print(CW_test(y_test, results_AveW_LinearRegression['pred_bmk'], results_AveW_LinearRegression['pred_model']))

1.0533314929254785


In [40]:
results_AveW_LassoCV = AveW(X_train, X_test[0:,:], y_train, y_test[0:],
           LassoCV, m=10, w_min=240, model_args={'cv': cv, 'max_iter':5000})
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']
for item in display:
    print(f'{item}: {results_AveW_LassoCV[item]}')

720/720
out-of-sample R2: 0.0012518531138231426
MSPE_bmk: 0.0017631590057930802
MSPE_model: 0.001760951789701513
elapsed time: 547.1992547512054


In [71]:
results_AveW_ElasticNetCV = AveW(X_train, X_test[0:,:], y_train, y_test[0:], 
           ElasticNetCV, m=10, w_min=240, model_args={'cv': cv, 'max_iter':5000})
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']
for item in display:
    print(f'{item}: {results_AveW_ElasticNetCV[item]}')

720/720


### Parallel Expanding Window

In [13]:
from joblib import Parallel, delayed
from sklearn.linear_model import LinearRegression, LassoCV, ElasticNetCV
from sklearn.model_selection import TimeSeriesSplit
from sklearn import metrics

def expanding_window_parallel(X_train, X_test, y_train, y_test, forecast_model, model_args=None, workers=-1):
    """
    parameters
    -----------
    X_train: 2-dimensional numpy array, independent variables used to train the forecast model
    X_test: 2-dimensional numpy array, independent variables used for forecasting
    y_train: 1-dimensional numpy array, dependent variable used to train the forecast model
    y_test: 1-dimensional numpy array, dependent variable to forecast
    forecast_model: python object, model used for forecasting. Make this object similar to the
    sklearn models, with a fit() and predict() function
    model_args: dict, arguments to pass when initializing the model
    """
    start_time = time.time()
    if model_args is None:
        model_args = {}
    n_periods = y_test.shape[0]
    pred_model = np.zeros((n_periods,))
    pred_bmk = np.zeros((n_periods,))
    
    def ExpW_forecast(X_train, y_train, i):
        print(f'{i+1}/{n_periods}', end='\r')
        
        X_train = np.vstack([X_train, X_test[:i,:]])
        y_train = np.hstack([y_train, y_test[:i]])
        
        X_scaler = StandardScaler().fit(X_train)
        X = X_scaler.transform(X_train)
        y = y_train.reshape(-1,1)
        y_scaler = StandardScaler().fit(y)
        y = y_scaler.transform(y).flatten()
        
        # forecasts of model
        model = forecast_model(**model_args).fit(X, y)
        pred = model.predict(X_scaler.transform(X_test[[i],:]))
        
        bmk = np.mean(y_train)
        
        return bmk, y_scaler.inverse_transform(pred)[0]
    
    pred_pairs = Parallel(n_jobs=workers, backend='loky', verbose=10) \
        (delayed(ExpW_forecast)(X_train, y_train, i) for i in range(n_periods))

    for i, pred_pair in enumerate(pred_pairs):
        pred_bmk[i] = pred_pair[0]
        pred_model[i] = pred_pair[1]
    
    MSPE_model = metrics.mean_squared_error(y_test, pred_model)
    MSPE_bmk = metrics.mean_squared_error(y_test, pred_bmk)
    R2OS_val = R2OS(MSPE_model, MSPE_bmk)
    
    end_time = time.time()
    elapsed_time = end_time - start_time

    return {'out-of-sample R2': R2OS_val, 
            'MSPE_bmk': MSPE_bmk, 
            'MSPE_model': MSPE_model, 
            'pred_bmk': pred_bmk,
            'pred_model': pred_model,
            'elapsed time': elapsed_time}


cv = TimeSeriesSplit(n_splits=5)


In [17]:
results_expw_LinearRegression = expanding_window_parallel(X_train, X_test[0:], y_train, y_test[0:], 
                                                          LinearRegression, model_args=None)
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']
for item in display:
    print(f'{item}: {results_expw_LinearRegression[item]}')
    
with open('forecast_results/results_expw_LinearRegression.pickle', 'wb') as file:
    pickle.dump(results_expw_LinearRegression, file)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Batch computation too fast (0.0120s.) Setting batch_size=2.
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.0360s.) Setting batch_size=4.
[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.0200s.) Setting batch_size=8.
[Parallel(n_jobs=-1)]: Done  28 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.0280s.) Setting batch_size=16.
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.0679s.) Setting batch_size=32.
[Parallel(n_jobs=-1)]: Batch computation too fast (0.1278s.) Setting batch_size=64.
[Parallel(n_jobs=-1)]: Done 312 tasks      | elapsed:    0.2s


out-of-sample R2: -0.09479595117769124
MSPE_bmk: 0.0017631590057930802
MSPE_model: 0.0019302993408247479
elapsed time: 0.6271622180938721


[Parallel(n_jobs=-1)]: Done 529 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 584 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 649 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 720 out of 720 | elapsed:    0.5s finished


In [22]:
results_expw_LassoCV = expanding_window_parallel(X_train, X_test[0:], y_train, y_test[0:], 
                                                 LassoCV, model_args={'cv': cv})
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']
for item in display:
    print(f'{item}: {results_expw_LassoCV[item]}')
    
with open('forecast_results/results_expw_LassoCV.pickle', 'wb') as file:
    pickle.dump(results_expw_LassoCV, file)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    2.9s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    3.0s
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    3.4s
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    3.7s
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:    4.0s
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    4.4s
[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed:    4.9s
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    5.4s
[Parallel(n_jobs=-1)]: Done  77 tasks      | elapsed:    6.0s
[Parallel(n_jobs=-1)]: Done  90 tasks      | elapsed:    6.5s
[Parallel(n_jobs=-1)]: Done 105 tasks      | elapsed:    7.1s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:    7.8s
[Parallel(n_jobs=-1)]: Done 137 tasks      | elapsed:    8.5s
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:    9.1s
[Parallel(n_jobs=-1)]: Done 173 tasks      | elapsed:   

out-of-sample R2: -0.00023184786504271138
MSPE_bmk: 0.0017631590057930802
MSPE_model: 0.0017635677904443044
elapsed time: 29.50590991973877


[Parallel(n_jobs=-1)]: Done 720 out of 720 | elapsed:   29.4s finished


In [23]:
results_expw_ElasticNetCV = expanding_window_parallel(X_train, X_test[0:], y_train, y_test[0:], 
                                                      ElasticNetCV, model_args={'cv': cv})
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']
for item in display:
    print(f'{item}: {results_expw_ElasticNetCV[item]}')
    
with open('forecast_results/results_expw_results_expw_ElasticNetCV.pickle', 'wb') as file:
    pickle.dump(results_expw_ElasticNetCV, file)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    2.0s
[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed:    2.5s
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    2.9s
[Parallel(n_jobs=-1)]: Done  77 tasks      | elapsed:    3.5s
[Parallel(n_jobs=-1)]: Done  90 tasks      | elapsed:    4.1s
[Parallel(n_jobs=-1)]: Done 105 tasks      | elapsed:    4.7s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:    5.3s
[Parallel(n_jobs=-1)]: Done 137 tasks      | elapsed:    6.2s
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:    6.9s
[Parallel(n_jobs=-1)]: Done 173 tasks      | elapsed:   

out-of-sample R2: 1.6082268055495064e-05
MSPE_bmk: 0.0017631590057930802
MSPE_model: 0.0017631306501973247
elapsed time: 27.55993390083313


[Parallel(n_jobs=-1)]: Done 720 out of 720 | elapsed:   27.5s finished


### Parallel AveW

In [24]:
import joblib
from sklearn.linear_model import LinearRegression, LassoCV, ElasticNetCV
from sklearn.model_selection import TimeSeriesSplit
from sklearn import metrics

import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)

def AveW_parallel(X_train, X_test, y_train, y_test, forecast_model, m, w_min, model_args=None, workers=-1):
    
    start_time = time.time()
    
    if model_args is None:
        model_args = {}
    
    n_periods = y_test.shape[0]
    pred_model = np.zeros((n_periods,))
    pred_bmk = np.zeros((n_periods,))
    
    # loop through all forecasting periods
    def AveW_forecast(X_train, y_train, j, m):

        X_train = np.vstack([X_train, X_test[:j,:]])
        y_train = np.hstack([y_train, y_test[:j]])
        
        T = X_train.shape[0]
        single_preds = np.zeros((m,))

        # AveW part
        for i in range(1, m+1):
            # calculate size of window i
            w_i = int(w_min + (i - 1)/(m - 1) * (T - w_min)) - 1
            #print(w_i)
            # starting and ending index of window i
            start, end = T-1-w_i, T-1
            # print(f'start:{T-1-w_i}, end:{T-1}')

            X = X_train[start:end,:]
            X_scaler = StandardScaler().fit(X)
            X = X_scaler.transform(X)
            y = y_train[start:end].reshape(-1,1)
            y_scaler = StandardScaler().fit(y)
            y = y_scaler.transform(y).flatten()
            
            
            model = forecast_model(**model_args).fit(X, y)
            pred = model.predict(X_scaler.transform(X_test[[j],:]))
            single_preds[i-1] = y_scaler.inverse_transform(pred)
            
        bmk = np.mean(y_train)

        return bmk, np.mean(single_preds)
    
    pred_pairs = Parallel(n_jobs=workers, backend='loky', verbose=10) \
        (delayed(AveW_forecast)(X_train, y_train, j, m) for j in range(n_periods))
    
    for i, pred_pair in enumerate(pred_pairs):
        pred_bmk[i] = pred_pair[0]
        pred_model[i] = pred_pair[1]
    
    MSPE_model = metrics.mean_squared_error(y_test, pred_model)
    MSPE_bmk = metrics.mean_squared_error(y_test, pred_bmk)
    R2OS_val = R2OS(MSPE_model, MSPE_bmk)
    
    end_time = time.time()
    elapsed_time = end_time - start_time

    return {'out-of-sample R2': R2OS_val, 
            'MSPE_bmk': MSPE_bmk, 
            'MSPE_model': MSPE_model, 
            'pred_bmk': pred_bmk,
            'pred_model': pred_model,
            'elapsed time': elapsed_time}

cv = TimeSeriesSplit(n_splits=5)


In [25]:
results_AveW_LinearRegression = AveW_parallel(X_train, X_test[0:,:], y_train, y_test[0:], 
                                     LinearRegression, m=10, w_min=240, model_args={'fit_intercept': True})
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']
for item in display:
    print(f'{item}: {results_AveW_LinearRegression[item]}')
    
with open('forecast_results/results_AveW_LinearRegression.pickle', 'wb') as file:
    pickle.dump(results_AveW_LinearRegression, file)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Batch computation too fast (0.0330s.) Setting batch_size=2.
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.1076s.) Setting batch_size=4.
[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done  28 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.1581s.) Setting batch_size=8.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done 200 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-1)]: Done 288 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done 376 tasks      | elapsed:    2.8s
[Parallel(n_jobs=-1)]: Done 480 tasks      | elapsed:    3.6s
[Parallel(n_jobs=-1)]: Done 584 tasks      | elapsed:    4.2s
[Parallel(n_jobs=-1)]: Done 690 tasks      | elapsed:  

out-of-sample R2: -0.04698292355780809
MSPE_bmk: 0.0017631590057930802
MSPE_model: 0.0018459973705825176
elapsed time: 5.1809210777282715


In [26]:
results_AveW_LassoCV = AveW_parallel(X_train, X_test[0:,:], y_train, y_test[0:],
           LassoCV, m=10, w_min=240, model_args={'cv': cv, 'max_iter':5000})
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']
for item in display:
    print(f'{item}: {results_AveW_LassoCV[item]}')
    
with open('forecast_results/results_AveW_LassoCV.pickle', 'wb') as file:
    pickle.dump(results_AveW_LassoCV, file)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    2.7s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    4.2s
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    6.9s
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    8.3s
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:   12.1s
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   14.7s
[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed:   18.5s
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:   21.1s
[Parallel(n_jobs=-1)]: Done  77 tasks      | elapsed:   26.0s
[Parallel(n_jobs=-1)]: Done  90 tasks      | elapsed:   29.8s
[Parallel(n_jobs=-1)]: Done 105 tasks      | elapsed:   34.7s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:   39.5s
[Parallel(n_jobs=-1)]: Done 137 tasks      | elapsed:   46.1s
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:   53.0s
[Parallel(n_jobs=-1)]: Done 173 tasks      | elapsed:  1

out-of-sample R2: 0.0012518531138231426
MSPE_bmk: 0.0017631590057930802
MSPE_model: 0.001760951789701513
elapsed time: 252.27812671661377


[Parallel(n_jobs=-1)]: Done 720 out of 720 | elapsed:  4.2min finished


In [31]:
results_AveW_ElasticNetCV = AveW_parallel(X_train, X_test[0:,:], y_train, y_test[0:], 
           ElasticNetCV, m=10, w_min=240, model_args={'cv': cv, 'max_iter':5000})
display = ['out-of-sample R2', 'MSPE_bmk', 'MSPE_model', 'elapsed time']
for item in display:
    print(f'{item}: {results_AveW_ElasticNetCV[item]}')
    
with open('forecast_results/results_AveW_ElasticNetCV.pickle', 'wb') as file:
    pickle.dump(results_AveW_ElasticNetCV, file)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    3.4s
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    5.7s
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    7.0s
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:   11.0s
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   13.4s
[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed:   16.9s
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:   19.4s
[Parallel(n_jobs=-1)]: Done  77 tasks      | elapsed:   24.4s
[Parallel(n_jobs=-1)]: Done  90 tasks      | elapsed:   28.6s
[Parallel(n_jobs=-1)]: Done 105 tasks      | elapsed:   33.3s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:   37.0s
[Parallel(n_jobs=-1)]: Done 137 tasks      | elapsed:   42.9s
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:   47.8s
[Parallel(n_jobs=-1)]: Done 173 tasks      | elapsed:   

out-of-sample R2: 0.0027540616533363593
MSPE_bmk: 0.0017631590057930802
MSPE_model: 0.001758303157186491
elapsed time: 235.4264612197876


[Parallel(n_jobs=-1)]: Done 720 out of 720 | elapsed:  3.9min finished
