In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (16.0, 9.0)

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, make_scorer
from sklearn.model_selection import GridSearchCV

from sklearn.linear_model import LinearRegression, HuberRegressor, SGDRegressor
from sklearn.cross_decomposition import PLSRegression
from sklearn.decomposition import PCA
# from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.base import BaseEstimator, RegressorMixin, TransformerMixin
from sklearn.pipeline import Pipeline
from datetime import datetime
# import lightgbm as lgb


In [2]:
df = pd.read_csv("../data/initial_data.csv") 

In [3]:
# 在94个x之外的列元素
extra = ['permno','DATE','mve0','prc','RET','SHROUT','sic2']

In [4]:
df_1972=df[df['DATE']>=19720101].copy()

In [5]:
cols = df_1972.columns.tolist()

In [6]:
df_dropna = df_1972.groupby('DATE',as_index=False)[cols].transform(lambda x: x.fillna(x.median()))

  return np.nanmean(a, axis, out=out, keepdims=keepdims)


In [9]:
def standardize(df):
    # exclude the the information columns
    col_names = df.columns.values.tolist()
    list_to_remove = ['permno', 'DATE', 'date', 'datadate', 'gvkey', 'sic', 'count', 'exchcd', 'shrcd', 'ffi49', 'ret',
                      'retadj', 'retx', 'lag_me']
    col_names = list(set(col_names).difference(set(list_to_remove)))
    for col_name in col_names:
        print('processing %s' % col_name)
        # count the non-missing number of factors, we only count non-missing values
        unique_count = df.dropna(subset=['%s' % col_name]).groupby(['DATE'])['%s' % col_name].unique().apply(len)
        unique_count = pd.DataFrame(unique_count).reset_index()
        unique_count.columns = ['DATE', 'count']
        df = pd.merge(df, unique_count, how='left', on=['DATE'])
        # ranking, and then standardize the data
        df['%s_rank' % col_name] = df.groupby(['DATE'])['%s' % col_name].rank(method='dense')
        df['rank_%s' % col_name] = (df['%s_rank' % col_name] - 1) / (df['count'] - 1) * 2 - 1
        df = df.drop(['%s_rank' % col_name, '%s' % col_name, 'count'], axis=1)
    df = df.fillna(0)
    return df

In [10]:
df_rank = standardize(df_dropna)

processing roavol
processing retvol
processing grltnoa
processing sp
processing RET
processing lgr
processing depr
processing roic
processing stdcf
processing mve0
processing rd_sale
processing std_turn
processing pchgm_pchsale
processing ill
processing cinvest
processing bm
processing pchcurrat
processing mvel1
processing pctacc
processing absacc
processing cashdebt
processing roeq
processing chpmia
processing mom36m
processing agr
processing pchsale_pchrect
processing idiovol
processing mve_ia
processing ms
processing gma
processing rd
processing invest
processing age
processing pchsaleinv
processing baspread
processing securedind
processing rsup
processing prc
processing orgcap
processing salecash
processing mom1m
processing saleinv
processing pchsale_pchxsga
processing pchsale_pchinvt
processing currat
processing pchcapx_ia
processing pricedelay
processing roaq
processing tang
processing herf
processing cfp
processing std_dolvol
processing ear
processing mom6m
processing betasq
pro

In [11]:
df_rank = df_rank.drop(columns=['rank_mve0','rank_prc','rank_SHROUT','rank_sic2'])

In [12]:
df_rank

Unnamed: 0,permno,DATE,rank_roavol,rank_retvol,rank_grltnoa,rank_sp,rank_RET,rank_lgr,rank_depr,rank_roic,...,rank_rd_mve,rank_ep,rank_tb,rank_maxret,rank_sin,rank_egr,rank_bm_ia,rank_cash,rank_pchquick,rank_chatoia
0,10006,19720131,0.000000,-0.966346,0.529197,-0.195598,-0.476645,0.285866,-0.629517,0.086957,...,-0.610487,0.212274,-0.537825,-0.863910,-1.0,-0.418202,0.126789,0.000000,0.401124,0.662705
1,10014,19720131,0.000000,0.894231,-0.883212,0.378689,0.578556,-0.263549,0.209160,-0.923913,...,-0.074906,-0.892354,-0.482270,0.770887,-1.0,-0.867822,0.622699,0.000000,-0.787640,-0.761177
2,10057,19720131,0.000000,0.016827,0.018248,0.154577,0.402335,0.616366,-0.061578,-0.125000,...,-0.074906,0.469819,0.393617,0.016365,-1.0,-0.563380,0.710634,0.000000,0.714607,0.231466
3,10065,19720131,0.000000,-0.736378,0.000000,0.009505,-0.169851,0.000000,0.009669,0.000000,...,-0.074906,0.000000,-0.047281,-0.662360,-1.0,-0.001083,0.008180,0.000000,0.000000,-0.000566
4,10102,19720131,0.000000,-0.371795,-0.423358,0.022511,0.097665,-0.088204,-0.265140,-0.309783,...,0.067416,0.194165,-0.815603,-0.018088,-1.0,-0.595883,0.753579,0.000000,0.247191,-0.488398
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3717436,93423,20201231,0.521326,0.616928,0.417193,-0.150362,0.471229,0.256082,-0.539853,0.858152,...,-0.998782,0.266615,-0.185150,0.786741,-1.0,0.363803,0.000000,-0.908430,0.893571,-0.394495
3717437,93426,20201231,-0.532192,0.423032,0.549937,0.140713,0.261832,0.323045,0.154324,0.672337,...,-0.191230,0.291113,0.070919,0.725809,-1.0,0.327762,0.000000,0.457581,-0.801729,-0.731786
3717438,93427,20201231,-0.738658,-0.490137,-0.500632,0.334227,0.578141,0.150929,-0.021481,0.745616,...,-0.260658,0.517852,-0.831509,-0.411805,-1.0,0.556542,0.000000,0.641799,0.350621,0.554776
3717439,93434,20201231,0.325727,0.154274,-0.704172,0.519164,0.527749,-0.878629,0.305257,0.596964,...,0.323995,-0.594475,0.742980,0.481738,-1.0,0.652128,0.000000,-0.417183,0.719611,0.745818


In [13]:
print(df_rank[df_rank.isnull().T.any()].T)

Empty DataFrame
Columns: []
Index: [permno, DATE, rank_roavol, rank_retvol, rank_grltnoa, rank_sp, rank_RET, rank_lgr, rank_depr, rank_roic, rank_stdcf, rank_rd_sale, rank_std_turn, rank_pchgm_pchsale, rank_ill, rank_cinvest, rank_bm, rank_pchcurrat, rank_mvel1, rank_pctacc, rank_absacc, rank_cashdebt, rank_roeq, rank_chpmia, rank_mom36m, rank_agr, rank_pchsale_pchrect, rank_idiovol, rank_mve_ia, rank_ms, rank_gma, rank_rd, rank_invest, rank_age, rank_pchsaleinv, rank_baspread, rank_securedind, rank_rsup, rank_orgcap, rank_salecash, rank_mom1m, rank_saleinv, rank_pchsale_pchxsga, rank_pchsale_pchinvt, rank_currat, rank_pchcapx_ia, rank_pricedelay, rank_roaq, rank_tang, rank_herf, rank_cfp, rank_std_dolvol, rank_ear, rank_mom6m, rank_betasq, rank_dy, rank_salerec, rank_turn, rank_grcapx, rank_divi, rank_convind, rank_acc, rank_chempia, rank_aeavol, rank_zerotrade, rank_chinv, rank_cfp_ia, rank_beta, rank_dolvol, rank_operprof, rank_mom12m, rank_chmom, rank_quick, rank_stdacc, rank_nincr

In [14]:
cols = df_rank.columns.tolist()

In [15]:
cols_mon = ['rank_baspread','rank_beta','rank_betasq','rank_chmom','rank_dolvol','rank_idiovol','rank_ill','rank_indmom','rank_maxret','rank_mom12m','rank_mom1m','rank_mom36m','rank_mom6m','rank_mvel1','rank_pricedelay','rank_retvol','rank_std_dolvol','rank_std_turn','rank_turn','rank_zerotrade']

In [16]:
cols_qua = ['rank_aeavol','rank_cash','rank_chtx','rank_cinvest','rank_ear','rank_ms','rank_nincr','rank_roaq','rank_roavol','rank_roeq','rank_rsup','rank_stdacc','rank_stdcf']

In [17]:
cols_beyond94 = ['RET','permno','DATE']

In [18]:
cols_ann = list(set(cols)-set(cols_mon)-set(cols_qua)-set(cols_beyond94))

In [19]:
df_rank[cols_mon] = df_rank[cols_mon].shift(1)
df_rank[cols_qua] = df_rank[cols_qua].shift(5)
df_rank[cols_ann] = df_rank[cols_ann].shift(7)

In [20]:
df_rank = df_rank.fillna(0)

In [23]:
df_rank['DATE'] = df_rank['DATE'].astype(str).str.slice(start = 0, stop = 6)

In [26]:
df_rank['DATE'] = df_rank['DATE'].astype(int)

In [27]:
df_rank=df_rank[df_rank['DATE']>=197501].copy()

In [28]:
df_rank

Unnamed: 0,permno,DATE,rank_roavol,rank_retvol,rank_grltnoa,rank_sp,rank_RET,rank_lgr,rank_depr,rank_roic,...,rank_rd_mve,rank_ep,rank_tb,rank_maxret,rank_sin,rank_egr,rank_bm_ia,rank_cash,rank_pchquick,rank_chatoia
157936,10006,197501,0.009809,0.602465,0.000000,0.010026,0.462989,0.000000,0.011728,0.000000,...,-0.251641,0.000288,-0.057424,0.715111,-1.0,0.000000,0.005814,0.008000,0.000000,0.000898
157937,10014,197501,0.009809,-0.453519,0.000000,0.010026,-0.938391,0.000000,0.011728,0.000000,...,-0.251641,0.000288,-0.057424,-0.288703,-1.0,0.000000,0.005814,0.008000,0.000000,0.000898
157938,10050,197501,0.009809,0.913553,1.000000,-0.994271,-0.828046,0.957022,-0.863580,0.307832,...,-0.251641,-0.910009,0.308189,0.702929,-1.0,1.000000,-0.859884,0.008000,-1.000000,0.000898
157939,10057,197501,0.009809,0.284535,0.000000,0.010026,-0.149425,0.000000,0.011728,0.000000,...,-0.251641,0.000288,-0.057424,0.644351,-1.0,0.000000,0.005814,0.008000,0.000000,0.000898
157940,10065,197501,0.009809,-0.553432,0.000000,0.010026,-0.776552,0.000000,0.011728,0.000000,...,-0.251641,0.000288,-0.057424,-0.108787,-1.0,0.000000,0.005814,0.008000,0.000000,0.000898
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3717436,93423,202012,0.373540,0.897488,0.409608,-0.327794,0.370106,0.787078,0.496326,-0.817325,...,0.438490,-0.675267,0.740124,0.799204,-1.0,-0.926352,0.000000,0.687584,0.272285,0.815974
3717437,93426,202012,0.010052,0.616928,-0.525917,0.034575,0.596868,-0.836777,-0.932165,-0.533107,...,-0.260658,-0.389627,0.742980,0.786741,-1.0,-0.452599,0.000000,0.006194,-0.181524,-0.191581
3717438,93427,202012,0.010052,0.423032,0.178255,0.116591,-0.559755,-0.943500,0.884115,-0.700079,...,0.755177,-0.570498,0.784864,0.725809,-1.0,0.981718,0.000000,0.006194,0.656942,0.977334
3717439,93434,202012,-0.740288,-0.490137,-0.004425,-0.022246,-0.189990,-0.001308,0.007914,0.004449,...,-0.260658,0.002346,0.004284,-0.411805,-1.0,-0.000783,0.000000,-0.405333,0.001080,-0.025904


## 加入宏观因子作用

In [56]:
factors = pd.read_csv("../figure/factors_197501-201912_24.csv")

In [29]:
macro = pd.read_csv("../data/8fac_1985-2020.csv") 

In [57]:
factors = factors[['month','logDP','logEP','svar','b/m','ntis','tbl','tms','dfy']]

In [58]:
factors['month'] = factors['month'].str.replace('-','').astype(int)

In [59]:
factors = factors.rename(columns = {'month':'yyyymm'})

In [60]:
factors=factors[factors['yyyymm']<198501].copy()

In [63]:
macro = pd.concat([factors,macro])

In [64]:
macro

Unnamed: 0,yyyymm,logDP,logEP,svar,b/m,ntis,tbl,tms,dfy
0,197501,-3.056152,-2.175255,0.004017,0.980830,0.008510,0.0626,0.0170,0.0198
1,197502,-3.107892,-2.250332,0.002179,0.933902,0.011652,0.0550,0.0238,0.0203
2,197503,-3.122977,-2.289002,0.002403,0.972466,0.020467,0.0549,0.0275,0.0181
3,197504,-3.165533,-2.354702,0.002314,0.909489,0.022496,0.0561,0.0291,0.0163
4,197505,-3.205074,-2.417766,0.001807,0.897524,0.022954,0.0523,0.0313,0.0179
...,...,...,...,...,...,...,...,...,...
427,202008,-4.080892,-3.569975,0.000743,0.235975,-0.008504,0.0010,0.0055,0.0102
428,202009,-4.045576,-3.533379,0.004907,0.241482,-0.005698,0.0011,0.0057,0.0105
429,202010,-4.020767,-3.519301,0.003661,0.253146,-0.001895,0.0010,0.0069,0.0109
430,202011,-4.126172,-3.635623,0.002492,0.226352,-0.005262,0.0009,0.0078,0.0100


In [65]:
mvs = macro.columns.tolist()[1:]

In [66]:
cols = list(set(cols)-set(['DATE','permno','rank_RET']))

In [67]:
len(cols)

94

In [68]:
# 将公司层面变量和宏观因子相乘
z_all = pd.DataFrame()

for m in df_rank.groupby('DATE'):
    z = pd.DataFrame(m[1])
    month = z['DATE'].unique()[0]
    for i in range(8):
        xt = macro.loc[macro['yyyymm'] == int(month),mvs[i]].values[0]
        for s in cols :
            z[mvs[i] + '_' + s] = xt * z[s]
    z_all = pd.concat([z_all,z])

In [71]:
cols_3 = ['rank_bm','rank_mvel1','rank_mom12m','rank_mom1m','rank_mom36m','rank_mom6m']

In [72]:
z_3 = pd.DataFrame()
for m in df_rank.groupby('DATE'):
    z = pd.DataFrame(m[1])
    month = z['DATE'].unique()[0]
    for i in range(8):
        xt = macro.loc[macro['yyyymm'] == int(month),mvs[i]].values[0]
        for s in cols_3 :
            z[mvs[i] + '_' + s] = xt * z[s]
    z_3 = pd.concat([z_3,z])

## 构造行业虚拟变量

In [73]:
df_sic = df[['permno','DATE','sic2']]

In [74]:
df_sic = df_sic[df_sic['DATE']>=19750101].copy()
df_sic['DATE'] = df_sic['DATE'].astype(str).str.slice(start = 0, stop = 6)

In [75]:
df_sic['DATE'] = df_sic['DATE'].astype(int)

In [76]:
# 加入描述行业的变量sic2
z_all = pd.merge(z_all,df_sic,on = ['permno','DATE'])

In [77]:
z_3 = pd.merge(z_3,df_sic,on = ['permno','DATE'])

In [78]:
# 构造74个虚拟变量
indus_d = pd.get_dummies(z_all['sic2'], prefix='industry' )

In [79]:
indus_d3 = pd.get_dummies(z_3['sic2'], prefix='industry' )

In [80]:
z_all = pd.concat([z_all,indus_d],axis=1)

In [81]:
z_3 = pd.concat([z_3,indus_d3],axis=1)

In [82]:
z_col = z_all.columns.tolist()

In [83]:
cols = list(set(z_col)-set(['DATE','permno','rank_RET','sic2']))

In [84]:
len(cols)

920

In [85]:
z3_col = z_3.columns.tolist()

In [89]:
cols_3 = list(set(z3_col)-set(['DATE','permno','rank_RET','sic2']))

In [90]:
z_all

Unnamed: 0,permno,DATE,rank_roavol,rank_retvol,rank_grltnoa,rank_sp,rank_RET,rank_lgr,rank_depr,rank_roic,...,industry_79.0,industry_80.0,industry_81.0,industry_82.0,industry_83.0,industry_84.0,industry_86.0,industry_87.0,industry_89.0,industry_99.0
0,10006,197501,0.009809,0.602465,0.000000,0.010026,0.462989,0.000000,0.011728,0.000000,...,0,0,0,0,0,0,0,0,0,0
1,10014,197501,0.009809,-0.453519,0.000000,0.010026,-0.938391,0.000000,0.011728,0.000000,...,0,0,0,0,0,0,0,0,0,0
2,10050,197501,0.009809,0.913553,1.000000,-0.994271,-0.828046,0.957022,-0.863580,0.307832,...,0,0,0,0,0,0,0,0,0,0
3,10057,197501,0.009809,0.284535,0.000000,0.010026,-0.149425,0.000000,0.011728,0.000000,...,0,0,0,0,0,0,0,0,0,0
4,10065,197501,0.009809,-0.553432,0.000000,0.010026,-0.776552,0.000000,0.011728,0.000000,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3559500,93423,202012,0.373540,0.897488,0.409608,-0.327794,0.370106,0.787078,0.496326,-0.817325,...,1,0,0,0,0,0,0,0,0,0
3559501,93426,202012,0.010052,0.616928,-0.525917,0.034575,0.596868,-0.836777,-0.932165,-0.533107,...,0,0,0,0,0,0,0,0,0,0
3559502,93427,202012,0.010052,0.423032,0.178255,0.116591,-0.559755,-0.943500,0.884115,-0.700079,...,0,0,0,0,0,0,0,0,0,0
3559503,93434,202012,-0.740288,-0.490137,-0.004425,-0.022246,-0.189990,-0.001308,0.007914,0.004449,...,0,0,0,0,0,0,0,0,0,0


## Train, Validation, Test split

In [91]:
macro['yyyymm'] = pd.to_datetime(macro['yyyymm'],format = '%Y%m').dt.to_period('M')

In [92]:
z_all['DATE'] = pd.to_datetime(z_all['DATE'].astype(int),format='%Y%m').dt.to_period('Y')

In [93]:
z_all['year'] = z_all['DATE'].dt.year

In [96]:
time_idx = [value for (key, value) in sorted(z_all.groupby('DATE').groups.items())]

In [97]:
len(time_idx)

46

In [98]:
# 定义函数list_flat，效果为展开list
def list_flat(list_):
    return [item for sublist in list_ for item in sublist]

In [105]:
# training, validation, testing scheme:
# 1. [1975-1987], [1988-1996], [1997]
# 2. [1975-1988], [1989-1997], [1998]
# ...
# last. [1975-2010], [2011-2019], [2020]
fulltrain_idx = []
cv_idx = []
test_idx = []
for i in range(13,len(time_idx)-9):
    train_idx = list_flat(time_idx[0:i])
    val_idx = list_flat(time_idx[i:i+9])
    fulltrain_idx.append(train_idx + val_idx)
    cv_idx.append((np.where(np.isin(fulltrain_idx[-1], train_idx))[0],
                   np.where(np.isin(fulltrain_idx[-1], val_idx))[0]))
    test_idx.append(time_idx[i+9])

In [112]:
test_years = list(range(1997,2021))

## 模型评价指标

In [100]:
def r2_oos(y_true, y_pred):
    return 1 - np.sum((y_true - y_pred)**2) / np.sum(y_true**2)

In [101]:
r2_oos_scorer = make_scorer(r2_oos)

# Models

## Linear regression

In [102]:
model = HuberRegressor()

In [103]:
y_t_a = []
y_p_a = []

In [107]:
sum = 0.0
for i in range(len(fulltrain_idx)):
    X_fulltrain = z_all.loc[fulltrain_idx[i], cols]
    y_fulltrain = z_all.loc[fulltrain_idx[i], 'rank_RET']
    X_test = z_all.loc[test_idx[i], cols]
    y_test = z_all.loc[test_idx[i], 'rank_RET']
    
    model.fit(X=X_fulltrain, y=y_fulltrain)
    y_pred = model.predict(X=X_test)
    
    print("Test year", test_years[i],":",r2_oos(y_true=y_test, y_pred=y_pred))
    y_t_a.extend(y_test.tolist())
    y_p_a.extend(y_pred.tolist())
    print("aggregate",":",r2_oos(y_true=np.array(y_t_a), y_pred=np.array(y_p_a)))
    sum = sum + r2_oos(y_true=y_test, y_pred=y_pred)
mean = sum/24
print("OLS",":",mean)
print("OLS aggregate",":",r2_oos(y_true=np.array(y_t_a), y_pred=np.array(y_p_a)))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 1997 : 0.013998117059558557
aggregate : 0.013998117059558557


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 1998 : -0.0022164448242143475
aggregate : 0.005890920747928963


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 1999 : -0.022957361011483712
aggregate : -0.0032538243252375754


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2000 : 0.0190390785152047
aggregate : 0.0019502857372922788


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2001 : -0.0036669091241854357
aggregate : 0.0009050785796766991


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2002 : 0.013874547355409161
aggregate : 0.0028281226376374313


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2003 : -0.027821415516855907
aggregate : -0.0009260962687402952


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2004 : -0.005242613362441473
aggregate : -0.001388524328693963


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2005 : 0.0010125772530301669
aggregate : -0.0011544902413371805


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2006 : -0.0077635577460817284
aggregate : -0.0017409625116890926


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2007 : -0.007272990643842903
aggregate : -0.0021986484756668645


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2008 : -0.0036512155305474803
aggregate : -0.002305722069309013


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2009 : -0.036765669553306735
aggregate : -0.004444276755864562


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2010 : -0.013913295563166006
aggregate : -0.004981235650694638


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2011 : 0.003432070781076879
aggregate : -0.004535070076277847


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2012 : -0.005240267648414232
aggregate : -0.0045700939907682425


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2013 : -0.005663354691478828
aggregate : -0.00462119863077759


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2014 : 0.001679139926204054
aggregate : -0.004331928531139839


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2015 : -0.0010037472785080492
aggregate : -0.004182769750933568


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2016 : 0.005415929037166922
aggregate : -0.0037747488609696234


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2017 : -0.00365241643829739
aggregate : -0.003769796486237542


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2018 : 0.0004910806152111924
aggregate : -0.0036023099373108103


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2019 : -0.0005762233824573304
aggregate : -0.003487237268915644


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


IndexError: list index out of range

In [108]:
print("OLS aggregate",":",r2_oos(y_true=np.array(y_t_a), y_pred=np.array(y_p_a)))

OLS aggregate : -0.003487237268915644


In [109]:
y_t_a = []
y_p_a = []

In [111]:
sum = 0.0
for i in range(len(fulltrain_idx)):
    X_fulltrain = z_3.loc[fulltrain_idx[i], cols_3]
    y_fulltrain = z_3.loc[fulltrain_idx[i], 'rank_RET']
    X_test = z_3.loc[test_idx[i], cols_3]
    y_test = z_3.loc[test_idx[i], 'rank_RET']
    
    model.fit(X=X_fulltrain, y=y_fulltrain)
    y_pred = model.predict(X=X_test)
    y_t_a.extend(y_test.tolist())
    y_p_a.extend(y_pred.tolist())
 
    print("Test year", test_years[i],":",r2_oos(y_true=y_test, y_pred=y_pred))
    print("aggregate",":",r2_oos(y_true=np.array(y_t_a), y_pred=np.array(y_p_a)))  
    sum = sum + r2_oos(y_true=y_test, y_pred=y_pred)
mean = sum/24
print("OLS_3",":",mean)
print("OLS_3 aggregate",":",r2_oos(y_true=np.array(y_t_a), y_pred=np.array(y_p_a)))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 1997 : 0.013671811560874714
aggregate : 0.013671811560874714


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 1998 : 0.00182557743509848
aggregate : 0.007748756328200601


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 1999 : -0.021918470147207936
aggregate : -0.0016555896709151696


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2000 : 0.016833549878333742
aggregate : 0.002660560534575751


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2001 : -0.005283038362110748
aggregate : 0.0011824728718192734


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2002 : 0.011414196953387967
aggregate : 0.0026995786637364905


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2003 : -0.03012257330250878
aggregate : -0.0013207606587866128


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2004 : -0.006311380819007395
aggregate : -0.0018554053152444183


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2005 : -0.0006752151770577086
aggregate : -0.0017403728134957053


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2006 : -0.004885820279309616
aggregate : -0.002019492025884251


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2007 : -0.008012394724489003
aggregate : -0.002515307915303522


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2008 : 0.0029973665391621607
aggregate : -0.0021089501761277774


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2009 : -0.02256818058217558
aggregate : -0.0033786324090592235


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2010 : -0.013111889197903182
aggregate : -0.003930575426048355


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2011 : 0.003121721842251013
aggregate : -0.00355658545109816


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2012 : -0.004233761559752969
aggregate : -0.0035902176685953524


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2013 : -0.006967199684956515
aggregate : -0.0037480752135996553


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2014 : 0.0003059757602162927
aggregate : -0.0035619398625348353


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2015 : -0.0026157348557989124
aggregate : -0.003519533883437953


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2016 : 0.002386611293473506
aggregate : -0.0032684758322856222


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2017 : -0.006574736538955683
aggregate : -0.0034023229483950423


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Test year 2018 : -0.001658951637463879
aggregate : -0.0033337945122504653


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


IndexError: list index out of range

## Partial Least Squares

In [113]:
model = PLSRegression(n_components=3)

In [114]:
sum = 0.0

for i in range(len(fulltrain_idx)):
    X_fulltrain = z_all.loc[fulltrain_idx[i], cols]
    y_fulltrain = z_all.loc[fulltrain_idx[i], 'rank_RET']
    X_test = z_all.loc[test_idx[i], cols]
    y_test = z_all.loc[test_idx[i], 'rank_RET']
    
    model.fit(X_fulltrain, y_fulltrain)
    y_pred = model.predict(X=X_test)
    y_pred = y_pred.reshape(-1)
    print("Test year", test_years[i],":",r2_oos(y_true=y_test, y_pred=y_pred))
    sum = sum + r2_oos(y_true=y_test, y_pred=y_pred)
mean = sum/24
print("PLS",":",mean)

Test year 1997 : 0.016889419269000894
Test year 1998 : 0.009042339374591424
Test year 1999 : -0.004078112241480358
Test year 2000 : 0.01787054107503261
Test year 2001 : 0.0040469195081989096
Test year 2002 : 0.01604553045534629
Test year 2003 : -0.009634442305096247
Test year 2004 : 0.0008571580266319989
Test year 2005 : 0.003547362518705466
Test year 2006 : 0.0007128579755623088
Test year 2007 : 0.0005471454797906228
Test year 2008 : 0.010734378491270347
Test year 2009 : -0.012708852886117672
Test year 2010 : -0.001906448117200199
Test year 2011 : 0.007017960917327315
Test year 2012 : 0.0016571071936668513
Test year 2013 : 0.002170745426630205
Test year 2014 : 0.004790673315781979
Test year 2015 : 0.004809878849562632
Test year 2016 : 0.006407269055004328
Test year 2017 : 0.0028198366678839903
Test year 2018 : 0.005414018593980607
Test year 2019 : 0.004120704592366353
Test year 2020 : -0.006860516786773241
PLS : 0.003513061435402809


## Principal Component Regression

In [115]:
class PCARegressor(BaseEstimator, RegressorMixin):
    def __init__(self, n_components = 20):
        self.n_components = n_components
    
    def fit(self, X, y):
        self.pca_ = PCA(n_components=self.n_components).fit(X)
        self.X_ = self.pca_.transform(X)
        self.reg_ = LinearRegression().fit(self.X_,y)
        return self
    
    def predict(self, X):
        self.pred_ = self.reg_.predict(self.pca_.transform(X))
        return self.pred_

In [116]:
model = PCARegressor(n_components = 30)

In [117]:
sum = 0.0

for i in range(len(fulltrain_idx)):
    X_fulltrain = z_all.loc[fulltrain_idx[i], cols]
    y_fulltrain = z_all.loc[fulltrain_idx[i], 'rank_RET']
    X_test = z_all.loc[test_idx[i], cols]
    y_test = z_all.loc[test_idx[i], 'rank_RET']
    
    model.fit(X_fulltrain, y_fulltrain)
    y_pred = model.predict(X=X_test)
    y_pred = y_pred.reshape(-1)
    print("Test year", test_years[i],":",r2_oos(y_true=y_test, y_pred=y_pred))
    sum = sum + r2_oos(y_true=y_test, y_pred=y_pred)
mean = sum/24
print("PCR",":",mean)

Test year 1997 : 0.01741402959351801
Test year 1998 : 0.007289512438936052
Test year 1999 : -0.009760523573540514
Test year 2000 : 0.018003523080425854
Test year 2001 : 0.003070150638721292
Test year 2002 : 0.01565086073891464
Test year 2003 : -0.01462289039388498
Test year 2004 : -0.0006406516815353758
Test year 2005 : 0.0029859270355776513
Test year 2006 : 0.00040427851123647773
Test year 2007 : -0.001831910897968747
Test year 2008 : 0.006429940935999134
Test year 2009 : -0.012853286825344146
Test year 2010 : -0.0046635474173621905
Test year 2011 : 0.004799745125141808
Test year 2012 : 0.00026832472652293937
Test year 2013 : -0.0005659363387475747
Test year 2014 : 0.0033510332655366515
Test year 2015 : 0.0022221545289753397
Test year 2016 : 0.005678839622700438
Test year 2017 : -0.001796161229351112
Test year 2018 : 0.0011177045398840235
Test year 2019 : 0.0011847574286772966
Test year 2020 : -0.009146245375149631
PCR : 0.0014162345199118058


## Elastic Net

In [127]:
model = SGDRegressor(penalty='elasticnet',alpha=0.1,l1_ratio=0.3,loss='huber')

In [128]:
sum = 0.0

for i in range(len(fulltrain_idx)):
    X_fulltrain = z_all.loc[fulltrain_idx[i], cols]
    y_fulltrain = z_all.loc[fulltrain_idx[i], 'rank_RET']
    X_test = z_all.loc[test_idx[i], cols]
    y_test = z_all.loc[test_idx[i], 'rank_RET']
    
    model.fit(X_fulltrain, y_fulltrain)
    y_pred = model.predict(X=X_test)
    y_pred = y_pred.reshape(-1)
    print("Test year", test_years[i],":",r2_oos(y_true=y_test, y_pred=y_pred))
    sum = sum + r2_oos(y_true=y_test, y_pred=y_pred)
mean = sum/24
print("ENet",":",mean)

Test year 1997 : 0.0005433382097667083
Test year 1998 : -0.001208176866767996
Test year 1999 : -0.0068464251253925745
Test year 2000 : -0.002850178365361611
Test year 2001 : -0.012711930489251788
Test year 2002 : -0.01229313683040556
Test year 2003 : -0.012718262152196624
Test year 2004 : -0.012858220605869386
Test year 2005 : -0.011683813778632812
Test year 2006 : -0.01175256657135293
Test year 2007 : -0.010514654513841926
Test year 2008 : -0.009407859999874368
Test year 2009 : -0.009861615940990998
Test year 2010 : -0.010036297912340064
Test year 2011 : -0.008220158178825043
Test year 2012 : -0.007805604568120295
Test year 2013 : -0.00796391480048042
Test year 2014 : -0.00760009782589921
Test year 2015 : -0.007433192382495957
Test year 2016 : -0.00677029472187618
Test year 2017 : -0.006139005084699223
Test year 2018 : -0.008316497123836974
Test year 2019 : -0.006383944512353246
Test year 2020 : -0.0069141310340492534
ENet : -0.008239443382297823


## Generalized linear model with group lasso

In [120]:
model = SGDRegressor(penalty='l1',alpha = 0.01)

In [121]:
sum = 0.0

for i in range(len(fulltrain_idx)):
    X_fulltrain = z_all.loc[fulltrain_idx[i], cols]
    y_fulltrain = z_all.loc[fulltrain_idx[i], 'rank_RET']
    X_test = z_all.loc[test_idx[i], cols]
    y_test = z_all.loc[test_idx[i], 'rank_RET']
    
    model.fit(X_fulltrain, y_fulltrain)
    y_pred = model.predict(X=X_test)
    y_pred = y_pred.reshape(-1)
    print("Test year", test_years[i],":",r2_oos(y_true=y_test, y_pred=y_pred))
    sum = sum + r2_oos(y_true=y_test, y_pred=y_pred)
mean = sum/24
print("GLM",":",mean)

Test year 1997 : 0.01058828909123033
Test year 1998 : 0.003777811810396825
Test year 1999 : -0.01675517048520203
Test year 2000 : 0.014549229323612356
Test year 2001 : 0.0029155549885113707
Test year 2002 : 0.004177818192303562
Test year 2003 : -0.03212577329868482
Test year 2004 : -0.002596122116116595
Test year 2005 : 0.004413458981639029
Test year 2006 : -0.0015118443077726251
Test year 2007 : -0.0032462406395017407
Test year 2008 : 0.0031742715756993833
Test year 2009 : -0.006103736356814382
Test year 2010 : -0.0007527596743808207
Test year 2011 : 0.0007193978703921022
Test year 2012 : -0.0020625467772359407
Test year 2013 : -0.0007483122311391366
Test year 2014 : -0.0038909566367282267
Test year 2015 : -0.017716285044635516
Test year 2016 : 0.0016917965854962302
Test year 2017 : -0.0007366588861272838
Test year 2018 : 0.0033601169949833976
Test year 2019 : 0.0016185872146823321
Test year 2020 : -0.009721508615487817
GLM : -0.0019575659350366675


## Random Forest

In [122]:
model = RandomForestRegressor(random_state = 42, n_estimators = 100, max_depth = 3, max_features = 7)

In [124]:
sum = 0.0

for i in range(len(fulltrain_idx)):
    X_fulltrain = z_all.loc[fulltrain_idx[i], cols]
    y_fulltrain = z_all.loc[fulltrain_idx[i], 'rank_RET']
    X_test = z_all.loc[test_idx[i], cols]
    y_test = z_all.loc[test_idx[i], 'rank_RET']
    
    model.fit(X_fulltrain, y_fulltrain)
    y_pred = model.predict(X=X_test)
    y_pred = y_pred.reshape(-1)
    print("Test year", test_years[i],":",r2_oos(y_true=y_test, y_pred=y_pred))
    sum = sum + r2_oos(y_true=y_test, y_pred=y_pred)
mean = sum/24
print("RF",":",mean)

Test year 1997 : 0.01117863716005385
Test year 1998 : 0.007737098641032891
Test year 1999 : -0.0014309072827465297
Test year 2000 : 0.007817772036542436
Test year 2001 : 0.0007104580111424452
Test year 2002 : 0.0060693993830475135
Test year 2003 : -0.006268558911345368
Test year 2004 : -0.00024917254324496874
Test year 2005 : 0.0011160545200560312
Test year 2006 : -8.413794630146576e-05
Test year 2007 : -0.0005979540324174693
Test year 2008 : 0.0022981686073837437
Test year 2009 : -0.0035655310460074308
Test year 2010 : -0.0021069174857548667
Test year 2011 : 0.002408296885740735
Test year 2012 : 0.00016909646533380673
Test year 2013 : -0.00031303297672558017
Test year 2014 : 0.0016540913752470798
Test year 2015 : 0.001503009132044375
Test year 2016 : 0.002550762773775306
Test year 2017 : 0.001062838476646344
Test year 2018 : 0.002223709878073299
Test year 2019 : 0.0011725605257082439
Test year 2020 : -0.0020493152532425896
RF : 0.0013752677664184098


## Gradient Boosted Regression Trees