In [2]:
import numpy as np
import pandas as pd
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from forecast_evaluation import R2OS
from forecast_methods import HA_combination

data_stocks = pd.read_csv('data_stocks.csv')
data_returns = pd.read_csv('data_return.csv')

returns = np.array(data_returns)[360:1080]

X = np.array(data_stocks.drop(columns=['yyyymm', 'EQPREM']))
y = np.array(data_stocks.loc[:,['EQPREM']]).flatten()

X_train = X[0:360,:]
X_test = X[360:1080,:]
y_train = y[0:360]
y_test = y[360:1080]


In [3]:
def R2OS_stdreturn(pred_model, y_train, y_test, returns):
    y_scaler = StandardScaler().fit(returns.reshape(-1,1))
    y_std = y_scaler.transform(returns.reshape(-1,1))
    
    n_periods = len(y_test)
    
    moderate = []
    
    abs_geq_1 = []
    abs_geq_2 = []
    abs_geq_3 = []
    abs_geq_4 = []
    
    geq1 = []
    geq2 = []
    geq3 = []
    geq4 = []
    
    leq_1 = []
    leq_2 = []
    leq_3 = []
    leq_4 = []
    
    pred_bmk = np.zeros((n_periods,))
    
    for i in range(n_periods):
        y_bmk = np.hstack([y_train, y_test[:i]])
        pred_bmk[i] = np.mean(y_bmk)
        
    for i in range(n_periods):
        
        absret = np.abs(y_std[i])
        ret = y_std[i]
        
        # check for moderate absolute values
        if 0 <= absret < 0.5:
            moderate.append({'real': y_test[i], 'model': pred_model[i], 'bmk': pred_bmk[i]})
        
        # check for extreme absolute values
        if 0.5 <= absret < 1.0:
            abs_geq_1.append({'real': y_test[i], 'model': pred_model[i], 'bmk': pred_bmk[i]})
        elif 1.0 <= absret < 1.5:
            abs_geq_2.append({'real': y_test[i], 'model': pred_model[i], 'bmk': pred_bmk[i]})
        elif 1.5 <= absret < 2.0:
            abs_geq_3.append({'real': y_test[i], 'model': pred_model[i], 'bmk': pred_bmk[i]})
        elif 2.0 <= absret:
            abs_geq_4.append({'real': y_test[i], 'model': pred_model[i], 'bmk': pred_bmk[i]})
        
        # check for extreme rise
        if 0.5 <= ret < 1.0:
            geq1.append({'real': y_test[i], 'model': pred_model[i], 'bmk': pred_bmk[i]})
        elif 1.0 <= ret < 1.5:
            geq2.append({'real': y_test[i], 'model': pred_model[i], 'bmk': pred_bmk[i]})
        elif 1.5 <= ret < 2.0:
            geq3.append({'real': y_test[i], 'model': pred_model[i], 'bmk': pred_bmk[i]})
        elif  2.0 <= ret:
            geq4.append({'real': y_test[i], 'model': pred_model[i], 'bmk': pred_bmk[i]})
        
        # check for extreme downturns
        if -1.0 < ret <= -0.5:
            leq_1.append({'real': y_test[i], 'model': pred_model[i], 'bmk': pred_bmk[i]})
        elif -1.5 < ret <= -1.0:
            leq_2.append({'real': y_test[i], 'model': pred_model[i], 'bmk': pred_bmk[i]})
        elif -2.0 < ret <= -1.5:
            leq_3.append({'real': y_test[i], 'model': pred_model[i], 'bmk': pred_bmk[i]})
        elif  ret < -2.0:
            leq_4.append({'real': y_test[i], 'model': pred_model[i], 'bmk': pred_bmk[i]})

    
    def calc_r2(res_lst):
        n = len(res_lst)
        model = np.zeros((n,))
        bmk = np.zeros((n,))
        real = np.zeros((n,))
        for i in range(n):
            model[i] = res_lst[i]['model']
            bmk[i] = res_lst[i]['bmk']
            real[i] = res_lst[i]['real']
        MSFE_M = metrics.mean_squared_error(model, real)
        MSFE_bmk = metrics.mean_squared_error(bmk, real)
        return R2OS(MSFE_M, MSFE_bmk)

    
    print('------------------------------------------------')
    print('Moderate Normalized Returns')
    print('------------------------------------------------')
    
    print('')
    print('Observations')
    print(f'{len(moderate)}/{n_periods}')
    
    print('')
    
    print('R2OS')
    print(calc_r2(moderate))
    
    print('')
    
    print('------------------------------------------------')
    print('Extreme Absolute Returns')
    print('------------------------------------------------')
    
    print('')
    
    print('Observations')
    print(f'{len(abs_geq_1)}/{n_periods}')
    print(f'{len(abs_geq_2)}/{n_periods}')
    print(f'{len(abs_geq_3)}/{n_periods}')
    print(f'{len(abs_geq_4)}/{n_periods}')
    
    print('')
    
    print('R2OS')
    print(calc_r2(abs_geq_1))
    print(calc_r2(abs_geq_2))
    print(calc_r2(abs_geq_3))
    print(calc_r2(abs_geq_4))
    
    print('')
    
    print('------------------------------------------------')
    print('Extreme Positive Returns')
    print('------------------------------------------------')
    
    print('')
    
    print('Observations')
    print(f'{len(geq1)}/{n_periods}')
    print(f'{len(geq2)}/{n_periods}')
    print(f'{len(geq3)}/{n_periods}')
    print(f'{len(geq4)}/{n_periods}')
    
    print('')
    
    print('R2OS')
    print(calc_r2(geq1))
    print(calc_r2(geq2))
    print(calc_r2(geq3))
    print(calc_r2(geq4))
    
    print('')
    
    print('------------------------------------------------')
    print('Extreme Negative Returns')
    print('------------------------------------------------')
    
    print('')
    
    print('Observations')
    print(f'{len(leq_1)}/{n_periods}')
    print(f'{len(leq_2)}/{n_periods}')
    print(f'{len(leq_3)}/{n_periods}')
    print(f'{len(leq_4)}/{n_periods}')
    
    print('')
    
    print('R2OS')
    print(calc_r2(leq_1))
    print(calc_r2(leq_2))
    print(calc_r2(leq_3))
    print(calc_r2(leq_4))


In [4]:
# HA + LinearRegression
filename = 'forecast_results/results_AveW_LinearRegression.pickle'

with open(filename, 'rb') as file:
    results = pickle.load(file)
    pred_AveW = results['pred_model']
    pred_HA = HA_combination(y_train, y_test, pred_AveW)['pred_model']
    
R2OS_stdreturn(pred_HA, y_train, y_test, returns)

1/7202/7203/7204/7205/7206/7207/7208/7209/72010/72011/72012/72013/72014/72015/72016/72017/72018/72019/72020/72021/72022/72023/72024/72025/72026/72027/72028/72029/72030/72031/72032/72033/72034/72035/72036/72037/72038/72039/72040/72041/72042/72043/72044/72045/72046/72047/72048/72049/72050/72051/72052/72053/72054/72055/72056/72057/72058/72059/72060/72061/72062/72063/72064/72065/72066/72067/72068/72069/72070/72071/72072/72073/72074/72075/72076/72077/72078/72079/72080/72081/72082/72083/72084/72085/72086/72087/72088/72089/72090/72091/72092/72093/72094/72095/72096/72097/72098/72099/720100/720101/720102/720103/720104/720105/720106/720107/720108/720109/720110/720111/720112/720113/720114/720115/720116/720117/720118/720119/720120/720121/720122/720123/720124/720125/720126/720127/720128/720129/720130/720131/720132/720133/720134/720135/720136/720137/720138/720139/

In [5]:
# HA + WALS
filename = 'forecast_results/results_AveW_WALS.pickle'

with open(filename, 'rb') as file:
    results = pickle.load(file)
    pred_AveW = results['pred_model']
    pred_HA = HA_combination(y_train, y_test, pred_AveW)['pred_model']
    
R2OS_stdreturn(pred_HA, y_train, y_test, returns)

1/7202/7203/7204/7205/7206/7207/7208/7209/72010/72011/72012/72013/72014/72015/72016/72017/72018/72019/72020/72021/72022/72023/72024/72025/72026/72027/72028/72029/72030/72031/72032/72033/72034/72035/72036/72037/72038/72039/72040/72041/72042/72043/72044/72045/72046/72047/72048/72049/72050/72051/72052/72053/72054/72055/72056/72057/72058/72059/72060/72061/72062/72063/72064/72065/72066/72067/72068/72069/72070/72071/72072/72073/72074/72075/72076/72077/72078/72079/72080/72081/72082/72083/72084/72085/72086/72087/72088/72089/72090/72091/72092/72093/72094/72095/72096/72097/72098/72099/720100/720101/720102/720103/720104/720105/720106/720107/720108/720109/720110/720111/720112/720113/720114/720115/720116/720117/720118/720119/720120/720121/720122/720123/720124/720125/720126/720127/720128/720129/720130/720131/720132/720133/720134/720135/720136/720137/720138/720139/

In [9]:
# expw SVRRBF
filename = 'ml_results/results_AveW_SVRRBF.pickle'

with open(filename, 'rb') as file:
    results = pickle.load(file)
    pred_AveW = results['pred_model']
    pred_HA = HA_combination(y_train, y_test, pred_AveW)['pred_model']
    
R2OS_stdreturn(pred_HA, y_train, y_test, returns)

1/7202/7203/7204/7205/7206/7207/7208/7209/72010/72011/72012/72013/72014/72015/72016/72017/72018/72019/72020/72021/72022/72023/72024/72025/72026/72027/72028/72029/72030/72031/72032/72033/72034/72035/72036/72037/72038/72039/72040/72041/72042/72043/72044/72045/72046/72047/72048/72049/72050/72051/72052/72053/72054/72055/72056/72057/72058/72059/72060/72061/72062/72063/72064/72065/72066/72067/72068/72069/72070/72071/72072/72073/72074/72075/72076/72077/72078/72079/72080/72081/72082/72083/72084/72085/72086/72087/72088/72089/72090/72091/72092/72093/72094/72095/72096/72097/72098/72099/720100/720101/720102/720103/720104/720105/720106/720107/720108/720109/720110/720111/720112/720113/720114/720115/720116/720117/720118/720119/720120/720121/720122/720123/720124/720125/720126/720127/720128/720129/720130/720131/720132/720133/720134/720135/720136/720137/720138/720139/

In [14]:
# HA + KNRUnif
filename = 'ml_results/results_AveW_KNRUnif.pickle'

with open(filename, 'rb') as file:
    results = pickle.load(file)
    pred_AveW = results['pred_model']
    pred_HA = HA_combination(y_train, y_test, pred_AveW)['pred_model']
    
R2OS_stdreturn(pred_HA, y_train, y_test, returns)

1/7202/7203/7204/7205/7206/7207/7208/7209/72010/72011/72012/72013/72014/72015/72016/72017/72018/72019/72020/72021/72022/72023/72024/72025/72026/72027/72028/72029/72030/72031/72032/72033/72034/72035/72036/72037/72038/72039/72040/72041/72042/72043/72044/72045/72046/72047/72048/72049/72050/72051/72052/72053/72054/72055/72056/72057/72058/72059/72060/72061/72062/72063/72064/72065/72066/72067/72068/72069/72070/72071/72072/72073/72074/72075/72076/72077/72078/72079/72080/72081/72082/72083/72084/72085/72086/72087/72088/72089/72090/72091/72092/72093/72094/72095/72096/72097/72098/72099/720100/720101/720102/720103/720104/720105/720106/720107/720108/720109/720110/720111/720112/720113/720114/720115/720116/720117/720118/720119/720120/720121/720122/720123/720124/720125/720126/720127/720128/720129/720130/720131/720132/720133/720134/720135/720136/720137/720138/720139/