In [2]:
import os
import itertools
import xlsxwriter

import numpy as np
import pandas as pd
from scipy.optimize import minimize
from sklearn.model_selection import TimeSeriesSplit
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_validate
import matplotlib.pyplot as plt

import DataPreparation as dpr
import ModelRun as mr
import benchmarks as bench
from MyEstimators import CLS_Estimator

<Figure size 720x360 with 0 Axes>

### Load data

In [3]:
df = dpr.read_data('EQP_Quarterly')
df = dpr.data_clean(df, '1956-01-01')

In [4]:
df.head()

Unnamed: 0_level_0,EQP,DP,DY,EP,DE,svar,b/m,ntis,tbl,lty,...,TMS,DFR,DFY,infl,c,w,y,cay,AAA,BAA
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1956-03-01,0.066512,-3.33303,-3.269151,-2.575525,-0.757505,0.003289,0.531077,0.026695,0.0225,0.0303,...,0.0078,0.005159,0.005,0.0,9.272498,11.092725,9.100386,0.007275,0.031,0.036
1956-06-01,-0.028264,-3.261722,-3.293365,-2.568575,-0.693147,0.003688,0.551565,0.025672,0.0249,0.0299,...,0.005,-0.021824,0.005,0.014925,9.271728,11.091665,9.107828,0.000775,0.0326,0.0376
1956-09-01,-0.034415,-3.204645,-3.239744,-2.573142,-0.631503,0.002519,0.57191,0.029362,0.0284,0.0324,...,0.004,0.005663,0.0051,0.007353,9.269304,11.086198,9.106428,0.000663,0.0356,0.0407
1956-12-01,0.033241,-3.289216,-3.260525,-2.616389,-0.672827,0.004394,0.544177,0.026149,0.0321,0.0345,...,0.0024,-0.002208,0.0062,0.007299,9.277993,11.096678,9.118405,-0.002524,0.0375,0.0437
1957-03-01,-0.05075,-3.238565,-3.29498,-2.562911,-0.675654,0.002288,0.599819,0.0266,0.0308,0.0331,...,0.0023,-0.000368,0.0077,0.007246,9.280482,11.090721,9.117433,0.002041,0.0366,0.0443


### Add $y_{t-1}$ and construct X and y

In [5]:
df['y_lag'] = df['EQP'].shift(1)
df = df.dropna()
df.head()

Unnamed: 0_level_0,EQP,DP,DY,EP,DE,svar,b/m,ntis,tbl,lty,...,DFR,DFY,infl,c,w,y,cay,AAA,BAA,y_lag
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1956-06-01,-0.028264,-3.261722,-3.293365,-2.568575,-0.693147,0.003688,0.551565,0.025672,0.0249,0.0299,...,-0.021824,0.005,0.014925,9.271728,11.091665,9.107828,0.000775,0.0326,0.0376,0.066512
1956-09-01,-0.034415,-3.204645,-3.239744,-2.573142,-0.631503,0.002519,0.57191,0.029362,0.0284,0.0324,...,0.005663,0.0051,0.007353,9.269304,11.086198,9.106428,0.000663,0.0356,0.0407,-0.028264
1956-12-01,0.033241,-3.289216,-3.260525,-2.616389,-0.672827,0.004394,0.544177,0.026149,0.0321,0.0345,...,-0.002208,0.0062,0.007299,9.277993,11.096678,9.118405,-0.002524,0.0375,0.0437,-0.034415
1957-03-01,-0.05075,-3.238565,-3.29498,-2.562911,-0.675654,0.002288,0.599819,0.0266,0.0308,0.0331,...,-0.000368,0.0077,0.007246,9.280482,11.090721,9.117433,0.002041,0.0366,0.0443,0.033241
1957-06-01,0.075114,-3.309868,-3.238565,-2.628349,-0.681519,0.001363,0.565877,0.030528,0.0329,0.0361,...,-0.003789,0.0072,0.010791,9.278119,11.104916,9.118823,-0.004528,0.0391,0.0463,-0.05075


In [6]:
df_AR2 = df.copy()
df_AR2['y_2lag'] = df['y_lag'].shift(1)
df_AR2 = df_AR2.dropna()
df_AR2.head()

Unnamed: 0_level_0,EQP,DP,DY,EP,DE,svar,b/m,ntis,tbl,lty,...,DFY,infl,c,w,y,cay,AAA,BAA,y_lag,y_2lag
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1956-09-01,-0.034415,-3.204645,-3.239744,-2.573142,-0.631503,0.002519,0.57191,0.029362,0.0284,0.0324,...,0.0051,0.007353,9.269304,11.086198,9.106428,0.000663,0.0356,0.0407,-0.028264,0.066512
1956-12-01,0.033241,-3.289216,-3.260525,-2.616389,-0.672827,0.004394,0.544177,0.026149,0.0321,0.0345,...,0.0062,0.007299,9.277993,11.096678,9.118405,-0.002524,0.0375,0.0437,-0.034415,-0.028264
1957-03-01,-0.05075,-3.238565,-3.29498,-2.562911,-0.675654,0.002288,0.599819,0.0266,0.0308,0.0331,...,0.0077,0.007246,9.280482,11.090721,9.117433,0.002041,0.0366,0.0443,0.033241,-0.034415
1957-06-01,0.075114,-3.309868,-3.238565,-2.628349,-0.681519,0.001363,0.565877,0.030528,0.0329,0.0361,...,0.0072,0.010791,9.278119,11.104916,9.118823,-0.004528,0.0391,0.0463,-0.05075,0.033241
1957-09-01,-0.108352,-3.182306,-3.292675,-2.503465,-0.678841,0.004218,0.624151,0.034363,0.0353,0.0364,...,0.0081,0.007117,9.281519,11.089344,9.117364,0.003433,0.0412,0.0493,0.075114,-0.05075


In [7]:
X = df[['DP','DY']]
station = pd.DataFrame()
y = df[['EQP']].squeeze()
X.head(2)

Unnamed: 0_level_0,DP,DY
time,Unnamed: 1_level_1,Unnamed: 2_level_1
1956-06-01,-3.261722,-3.293365
1956-09-01,-3.204645,-3.239744


### Construct single-index and nonlinear models

In [7]:
def single_index(x):
    if isinstance(x, (pd.DataFrame, np.ndarray)):
        if isinstance(x, pd.DataFrame):
            x_values = x.values
        else:
            pass
    else:
        raise Exception('wrong type')

    def u(theta):
        if len(theta) == x_values.shape[1]:
            sum_up = [x_values[:, i] * theta[i] for i in range(x_values.shape[1])]
            index = np.sum(sum_up, axis=0)
        else:
            raise Exception('wrong parameter dimension')
        return index

    return u

In [8]:
extra_params = {'sin_func':1,
               'cos_func':1,
               'scaled_sin_func':2,
               'scaled_cos_func':2,
               'exp_func':2,
               'exp_shift_func':2,
                'poly_func':3,
                'linear_func':2
               }

In [9]:
def dimensions(non_sta, sta, func):
    stas = sta.shape[1]
    nonstas = non_sta.shape[1]
    extra = range(0, extra_params[func])
    return nonstas, stas, extra

In [10]:
def sin_func(x):
    def objective_func(params):
        func = np.sin(single_index(x.iloc[:,:d1])(params[0:d1])+params[d1+d2+extra[0]])+np.dot(
            x.iloc[:,d1:d1+d2], params[d1:d1+d2])
        return func
    return objective_func

In [11]:
def cos_func(x):
    def objective_func(params):
        func = np.cos(single_index(x.iloc[:,:d1])(params[0:d1])+params[d1+d2+extra[0]])+np.dot(
            x.iloc[:,d1:d1+d2], params[d1:d1+d2])
        return func
    return objective_func

In [12]:
def scaled_sin_func(x):
    def objective_func(params):
        func = np.sin(params[d1+d2+extra[1]]*single_index(x.iloc[:,:d1])(
            params[0:d1])+params[d1+d2+extra[0]])+np.dot(x.iloc[:,d1:d1+d2], params[d1:d1+d2])
        return func
    return objective_func

In [13]:
def scaled_cos_func(x):
    def objective_func(params):
        func = np.cos(params[d1+d2+extra[1]]*single_index(x.iloc[:,:d1])(
            params[0:d1])+params[d1+d2+extra[0]])+np.dot(x.iloc[:,d1:d1+d2], params[d1:d1+d2])
        return func
    return objective_func

In [14]:
def exp_shift_func(x):
    def objective_func(params):
        func = 1 - np.exp(params[d1+d2+extra[1]]*((single_index(x.iloc[:,:d1])(
            params[0:d1]))-params[d1+d2+extra[0]])**2)+np.dot(x.iloc[:,d1:d1+d2], params[d1:d1+d2])
        return func
    return objective_func

In [15]:
def exp_func(x):
    def objective_func(params):
        func = params[d1+d2+extra[0]]*np.exp(-params[d1+d2+extra[1]]*(single_index(x.iloc[:,:d1])(params[0:d1]))**2
                                )+np.dot(x.iloc[:,d1:d1+d2], params[d1:d1+d2])
        return func
    return objective_func

In [16]:
def poly_func(x):
    def objective_func(params):
        func = params[d1+d2+extra[0]]+params[d1+d2+extra[1]]*(single_index(x.iloc[:,:d1])(
            params[0:d1]))+params[d1+d2+extra[2]]*((single_index(x.iloc[:,:d1])(
            params[0:d1]))**2)+np.dot(x.iloc[:,d1:d1+d2], params[d1:d1+d2])
#                (single_index(x.iloc[:,:d1])(params[0:d1])
        return func
    return objective_func

In [17]:
def linear_func(x):
    def objective_func(params):
        func = params[d1+d2+extra[0]]+params[d1+d2+extra[1]]*(single_index(x.iloc[:,:d1])(
            params[0:d1]))+np.dot(x.iloc[:,d1:d1+d2], params[d1:d1+d2])
#                (single_index(x.iloc[:,:d1])(params[0:d1])
        return func
    return objective_func

### Model Estimation

In [18]:
def constraint_func(x):
    def constraint(params):
        con = 0
        for j in np.arange(0, x.iloc[:,:d1].shape[1]):
            con += params[j]**2
            cons = con - 1
        return cons
    return {'type':'eq', 'fun': constraint}

In [19]:
station

In [20]:
d1, d2, extra = dimensions(X, station,'poly_func')
print(d1, d2, extra)

2 0 range(0, 3)


In [21]:
d1, d2, extra = dimensions(X, station,'poly_func')
cls_nls = CLS_Estimator(obj_func = poly_func, x0 = [0.001]*(d1+d2+extra[-1]+1))
cls = CLS_Estimator(obj_func = poly_func, x0 = [0.001]*(d1+d2+extra[-1]+1), constraints = constraint_func(X))

In [22]:
cls_nls.fit(X,y)
cls.fit(X,y)

CLS_Estimator(constraints={'fun': <function constraint_func.<locals>.constraint at 0x0000025FDA3232F0>,
                           'type': 'eq'},
              obj_func=<function poly_func at 0x0000025FDA309C80>,
              x0=[0.001, 0.001, 0.001, 0.001, 0.001])

In [23]:
print(cls_nls.params_)
print(cls.params_)

[-8.94841330e-04  1.92702711e-03  1.24399499e-02 -2.56973660e-05
  1.01218548e-03]
[-0.16930924  0.98556298  0.00341686 -0.03364998 -0.01037493]


In [24]:
cls.params_[0]**2+cls.params_[1]**2

1.0000000010637105

## Empirical Study

### Cointegrated predictors
- dividend-price ratio and dividend yield
- T-bill rate and long-term yield
- dividend-price ratio and earningprice ratio
- baa- and aaa-rated corporate bond yields

In [25]:
co1 = df[['DP', 'DY']]
co2 = df[['tbl', 'lty']]
co3 = df[['DP', 'EP']]
co4 = df[['BAA', 'AAA']]

In [26]:
cointe_variables = [co1, co2, co3, co4]

### Stationary variables

In [27]:
station = df[['y_lag', 'cay']]

### Fit model and Save Results

In [28]:
def get_df_name(df):
    name =[x for x in globals() if globals()[x] is df][0]
    return name

In [29]:
fun_list = [sin_func,
            cos_func,
            scaled_sin_func,
            scaled_cos_func,
            exp_func,
            exp_shift_func,
            poly_func,
            linear_func
           ]

In [30]:
# Set up hierachical index
fun_names = [i.__name__ for i in fun_list]
cointe_names = [get_df_name(i) for i in cointe_variables]
iterables_a = [fun_names, cointe_names]

In [31]:
#Set up directory
parent = os.getcwd()
folder = 'results'
path = os.path.join(parent, folder)
if not os.path.exists(path):
    os.makedirs(path)

In [32]:
results = pd.DataFrame()
for i, j in itertools.product(fun_list, cointe_variables):
    # Set up dimensions
    d1, d2, extra= dimensions(j,station, i.__name__)
    initial_len = d1+d2+extra[-1]+1
    
    # Set up dataframes
    iterables = [[i.__name__], [get_df_name(j)]]
    sec_columns = ['param_'+str(i) for i in range(1,initial_len+1)]
    multi_index = pd.MultiIndex.from_product(iterables, names=["function", "variables"])
    multi_columns = pd.MultiIndex.from_product([['NLS', 'CLS'], sec_columns],
                                               names=['Estimator', 'Parameters'])
    result = pd.DataFrame(index = multi_index, columns = multi_columns)
    # Prepare X
    X_ = j.join(station)
    # Fit models
    nls = CLS_Estimator(obj_func = i, x0 = [0.001]*initial_len)
    cls = CLS_Estimator(obj_func = i, x0 = [0.001]*initial_len, constraints = constraint_func(X_))
    nls.params_ = nls.fit(X_,y).params_
    cls.params_ = cls.fit(X_,y).params_
    # Save results to dataframe
    result.loc[i.__name__,get_df_name(j)].loc['NLS'] = nls.params_ 
    result.loc[i.__name__,get_df_name(j)].loc['CLS'] = cls.params_ 
    # Put into one table
    results = results.append(result, ignore_index = False, sort = False)
    
# Export to Excel
results.to_excel('Results/full_sample.xlsx')

In [33]:
results.head(3)

Unnamed: 0_level_0,Estimator,CLS,CLS,CLS,CLS,CLS,CLS,CLS,NLS,NLS,NLS,NLS,NLS,NLS,NLS
Unnamed: 0_level_1,Parameters,param_1,param_2,param_3,param_4,param_5,param_6,param_7,param_1,param_2,param_3,param_4,param_5,param_6,param_7
function,variables,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
sin_func,co1,0.709838,-0.704365,0.022517,-0.104848,-15.689339,,,-1.008436,1.002847,0.000839,-0.073964,-0.023462,,
sin_func,co2,0.495116,-0.868827,0.079209,0.054524,-50.222284,,,-0.786401,0.690315,0.066833,-0.247875,0.003686,,
sin_func,co3,0.715437,-0.698678,-0.221696,-0.790349,-2.565328,,,-0.026356,-9e-05,0.059006,-0.224006,-0.083221,,


# WTF is hapenning here?!!!

In [34]:
# def fit_model(functions, variables, file_name):   
#     results = pd.DataFrame()
#     for i, j in itertools.product(functions, variables):
#         # Set up dimensions
#         d1, d2, extra= dimensions(j,station, i.__name__)
# #         print(d1,d2,extra[-1])
#         initial_len = d1+d2+extra[-1]+1

#         # Set up dataframes
#         iterables = [[i.__name__], [get_df_name(j)]]
#         sec_columns = ['param_'+str(i) for i in range(1,initial_len+1)]
#         multi_index = pd.MultiIndex.from_product(iterables, names=["function", "variables"])
#         multi_columns = pd.MultiIndex.from_product([['NLS', 'CLS'], sec_columns],
#                                                    names=['Estimator', 'Parameters'])
#         result = pd.DataFrame(index = multi_index, columns = multi_columns)
#         # Prepare X
#         X_ = j.join(station)
#         # Fit models
#         nls = CLS_Estimator(obj_func = i, x0 = [0.001]*initial_len)
#         cls = CLS_Estimator(obj_func = i, x0 = [0.001]*initial_len, constraints = constraint_func(X))
#         nls.params_ = nls.fit(X_,y).params_
#         cls.params_ = cls.fit(X_,y).params_
#         print(initial_len)
#         # Save results to dataframe
#         result.loc[i.__name__,get_df_name(j)].loc['NLS'] = nls.params_ 
#         result.loc[i.__name__,get_df_name(j)].loc['CLS'] = cls.params_ 
#         # Put into one table
#         results = results.append(result, ignore_index = False, sort = False)
#     # Export to Excel
#     path = 'Results/' + file_name +'.xlsx'
#     results.to_excel(path)
#     return results

In [35]:
# fit_model(fun_list, cointe_variables,'results_1835')

## Use initial values from Linear regression (using Taylor expansion)

In [36]:
orders = {'sin_func':1,
          'cos_func':2,
          'scaled_sin_func':1,
          'scaled_cos_func':2,
          'exp_func':5,
          'exp_shift_func':4,
          'poly_func':2,
          'linear_func':1
               }

In [89]:
def Taylor_init(variables, station, y, function):
    
    d1, d2, extra = dimensions(variables, station, function.__name__)
    
    # find the initials for theta
    LR = LinearRegression()
    LR_theta = LR.fit(variables.iloc[:,1:], variables.iloc[:,:1])
    alpha = np.append(1, -LR_theta.coef_)
    theta = np.array(-alpha/np.linalg.norm(alpha))
#     print(len(theta))
    
    # calculate single-index
    u = single_index(variables)(theta)
    
    # find the initials for beta
    Xs = station.copy()
#     print(Xs.shape[1])
    Xs['u'], Xs['u2'], Xs['u3'], Xs['u4'], Xs['u6'] = u, u**2, u**3, u**4, u**6
    t_order = orders.get(function.__name__)
    
    if function == exp_func:
        X_reg = Xs.iloc[:, 0:d2+t_order].drop(['u', 'u3'], axis = 1)
    else:
        X_reg = Xs.iloc[:, 0:d2+t_order]
#     print(d2, t_order)
    LR_taylor = LR.fit(X_reg, y)
    theta_gamma = np.append(theta,LR_taylor.coef_[:d2])
#     print(len(theta_gamma))
    
    
    # initials for gammas
    initials = []
    if function == sin_func:
        initials = np.append(theta_gamma, LR_taylor.intercept_)
    elif function == scaled_sin_func:
        initials = np.append(theta_gamma, ([LR_taylor.intercept_], [LR_taylor.coef_[0]]))
    elif function == linear_func:
        initials = np.append(theta_gamma, ([LR_taylor.intercept_], [LR_taylor.coef_[0]]))
    elif function == poly_func:
        ini_poly_ = np.append(theta_gamma,LR_taylor.coef_[d2:])
        initials = np.insert(ini_poly_, 4, LR_taylor.intercept_)
    elif function == cos_func:
        initials = np.append(theta_gamma, [-LR_taylor.coef_[d2]])
    elif function == scaled_cos_func:
        initials = np.append(theta_gamma,(
            [-LR_taylor.coef_[d2]/np.sqrt(np.abs((1-LR_taylor.intercept_)*2))], [np.sqrt(np.abs((1-LR_taylor.intercept_)*2))]))
    elif function == exp_shift_func:
        initials = np.append(theta_gamma, [LR_taylor.coef_[-2]/(2*LR_taylor.coef_[-1]), np.sqrt(np.abs(LR_taylor.coef_[-1]))])
#         print(LR_taylor.coef_, LR_taylor.coef_[-2])
    elif function == exp_func:
        initials =np.append(theta_gamma, [LR_taylor.intercept_, -LR_taylor.coef_[d2]/LR_taylor.intercept_])

    return initials

In [64]:
station_n=pd.DataFrame()

In [62]:
Taylor_init(co1, station_n, y_lag2, exp_func)

array([-7.14024336e-01,  7.00120881e-01, -7.24210787e-02,  3.18885375e+02])

# Fit model and Save Results

In [38]:
# Taylor_init(co1, station, y, sin_func)

In [39]:
results_Taylor = pd.DataFrame()
for i, j in itertools.product(fun_list, cointe_variables):
    # Set up dimensions
    d1, d2, extra= dimensions(j,station, i.__name__)
    initial_len = d1+d2+extra[-1]+1
    # Set up dataframes
    iterables = [[i.__name__], [get_df_name(j)]]
    sec_columns = ['param_'+str(i) for i in range(1,initial_len+1)]
    multi_index = pd.MultiIndex.from_product(iterables, names=["function", "variables"])
    multi_columns = pd.MultiIndex.from_product([['NLS', 'CLS'], sec_columns],
                                               names=['Estimator', 'Parameters'])
    result = pd.DataFrame(index = multi_index, columns = multi_columns)
    # Prepare X
    X_ = j.join(station)
    # Fit models
    
    nls = CLS_Estimator(obj_func = i, x0 = Taylor_init(j, station, y, i))
    cls = CLS_Estimator(obj_func = i, x0 = Taylor_init(j, station, y, i), constraints = constraint_func(X_))
    nls.params_ = nls.fit(X_,y).params_
    cls.params_ = cls.fit(X_,y).params_
    # Save results to dataframe
    result.loc[i.__name__,get_df_name(j)].loc['NLS'] = nls.params_ 
    result.loc[i.__name__,get_df_name(j)].loc['CLS'] = cls.params_ 
    # Put into one table
    results_Taylor = results_Taylor.append(result, ignore_index = False, sort = False)
    
# Export to Excel
results_Taylor.to_excel('Results/Taylor_fullsample_0709.xlsx')
results_Taylor.tail()

Unnamed: 0_level_0,Estimator,CLS,CLS,CLS,CLS,CLS,CLS,CLS,NLS,NLS,NLS,NLS,NLS,NLS,NLS
Unnamed: 0_level_1,Parameters,param_1,param_2,param_3,param_4,param_5,param_6,param_7,param_1,param_2,param_3,param_4,param_5,param_6,param_7
function,variables,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
poly_func,co4,-0.639086,0.769135,0.029136,0.222809,0.029647,0.171961,-1066.824933,-0.647757,0.777972,0.028445,0.229783,0.030041,-0.098865,-1066.754865
linear_func,co1,-0.709067,0.705141,0.000527,-0.075851,-0.023234,1.416036,,-1.080529,1.074592,0.000564,-0.075504,-0.023073,0.928931,
linear_func,co2,-0.751669,0.659541,0.066719,-0.248713,0.003689,1.04774,,-0.857493,0.752473,0.066743,-0.249108,0.003693,0.919095,
linear_func,co3,-0.999994,-0.003524,0.059013,-0.223993,-0.083197,0.026346,,-0.78005,-0.003462,0.059018,-0.223942,-0.083234,0.033766,
linear_func,co4,0.476652,-0.879092,0.07649,-0.090205,0.023524,0.527701,,-1.472724,-0.020691,0.072875,-0.111111,0.024863,0.113793,


### GridSearch and CrossValidation

### Train_test split

In [40]:
# val_length = 1
test_length = 31
step = 1
### quarterly data:4
freq = 4
# cv_outer = TimeSeriesSplit(gap=0, max_train_size=None, n_splits=int((12/step) * test_length), test_size=step)
# cv_inner = TimeSeriesSplit(gap=0, max_train_size=None, n_splits=int((12/step) * val_length), test_size=step)
cv_outer = TimeSeriesSplit(gap=0, max_train_size=None, n_splits=test_length*freq, test_size=step)
# cv_inner = TimeSeriesSplit(gap=0, max_train_size=None, n_splits=4, test_size=step)

In [8]:
co1 = df_AR2[['DP', 'DY']]
co2 = df_AR2[['tbl', 'lty']]
co3 = df_AR2[['DP', 'EP']]
co4 = df_AR2[['BAA', 'AAA']]
station_AR2 = df_AR2[['y_lag', 'cay']]

In [9]:
y_lag2 = df_AR2[['EQP']].squeeze()
y_lag2

time
1956-09-01   -0.034415
1956-12-01    0.033241
1957-03-01   -0.050750
1957-06-01    0.075114
1957-09-01   -0.108352
                ...   
2017-12-01    0.063293
2018-03-01   -0.011035
2018-06-01    0.029860
2018-09-01    0.069264
2018-12-01   -0.151634
Name: EQP, Length: 250, dtype: float64

In [43]:
X_train_AR2 = df_AR2.loc[:"1988-01-01"]
y_train_AR2 = y_lag2.loc[:"1988-01-01"]

X_test_AR2 = df_AR2.loc["1988-01-01":"2018-12-01"]
y_test_AR2 = y_lag2.loc["1988-01-01":"2018-12-01"]
station_train = df_AR2.loc[:"1988-01-01"][['y_lag', 'cay']]

In [44]:
variables_lag2 = [co1, co2, co3, co4]

In [103]:
fun_list = [sin_func, cos_func, scaled_sin_func, scaled_cos_func, exp_shift_func, exp_func, poly_func, linear_func]
# fun_list = [exp_shift_func]

In [104]:
oos_R2 = pd.DataFrame()
oos_MSE = pd.DataFrame()

rows = df.loc["1988-01-01":"2018-12-01"].index
sec_columns = ['CLS_MSE', 'SM_MSE', 'NLS_MSE', 'AR1_MSE', 'AR2_MSE', 'AR_cay_MSE']
multi_columns = pd.MultiIndex.from_product([['co1', 'co2', 'co3', 'co4'], sec_columns],names=['Variable', 'Model'])
        
sec_columns_R2 = ['SM_R2', 'NLS_R2', 'AR1_R2', 'AR2_R2', 'AR_cay_R2']
multi_columns_R2 = pd.MultiIndex.from_product([['co1', 'co2', 'co3', 'co4'], sec_columns_R2],
                                                  names=['Variable', 'Model'])      
oos_MSE = pd.DataFrame(index = multi_columns, columns = rows)
oos_R2 = pd.DataFrame(index = multi_columns_R2, columns = rows)

In [105]:
writer_MSE = pd.ExcelWriter('results/OOS_MSE_0709.xlsx', engine='xlsxwriter')
writer_R2 = pd.ExcelWriter('results/OOS_R2_0709.xlsx', engine='xlsxwriter')
        
for i in fun_list:
#     worksheet_MSE = wb_MSE.add_worksheet(i.__name__)
#     worksheet_R2 = wb_R2.add_worksheet(i.__name__)
    print(i.__name__)
    for j in variables_lag2:
    
        # Prepare X
        X_ = j.join(station_AR2) 

        # Fit models
        x0 = Taylor_init(j.loc[:"1988-01-01"], station_train.loc[:"1988-01-01"], y_train_AR2.loc[:"1988-01-01"], i)
        print(x0)

        # Target model
        d1, d2, extra= dimensions(j,station, i.__name__)
        cls = CLS_Estimator(obj_func = i, x0 = x0, constraints = constraint_func(X_))
        cv_result = cross_validate(cls, X_, y_lag2, cv=cv_outer, scoring = 'neg_mean_squared_error')

        oos_MSE.loc[get_df_name(j)].loc['CLS_MSE'] = -cv_result['test_score']
#         print(i.__name__,get_df_name(j))

        ##################################### no need to loop! ##############################################################
        # benchmark model: sm
        sm_pred, sm_mse = bench.sample_mean(y_lag2, "1988-01-01", cv_outer = cv_outer)
        oos_MSE.loc[get_df_name(j)].loc['SM_MSE'] = sm_mse

        # benchmark model: Nonlinear
        station_n = pd.DataFrame()
        d1, d2, extra= dimensions(j,station_n, i.__name__)
        x0_n = Taylor_init(j.loc[:"1988-01-01"], station_n, y_train_AR2.loc[:"1988-01-01"], i)
        nlr = CLS_Estimator(obj_func = i, x0 = x0_n, constraints = constraint_func(j))
        
        cv_nonlinear = cross_validate(nlr, j, y_lag2, cv=cv_outer, scoring = 'neg_mean_squared_error')
        oos_MSE.loc[get_df_name(j)].loc['NLS_MSE'] = -cv_nonlinear['test_score']    

        # benchmark model: AR1
        lr = LinearRegression()
        
        ar1 = df_AR2['y_lag']
        cv_ar1 = cross_validate(lr, ar1.values.reshape(-1, 1), y_lag2, cv=cv_outer, scoring = 'neg_mean_squared_error')
        oos_MSE.loc[get_df_name(j)].loc['AR1_MSE'] = -cv_ar1['test_score']
        
        # AR2
        ar2 = df_AR2[['y_lag','y_2lag']]
        cv_ar2 = cross_validate(lr, ar2, y_lag2, cv=cv_outer, scoring = 'neg_mean_squared_error')
        oos_MSE.loc[get_df_name(j)].loc['AR2_MSE'] = -cv_ar2['test_score']

        # benchmark model: AR+cay
        ar_cay = df_AR2[['y_lag','cay']]
        cv_cay = cross_validate(lr, ar_cay, y_lag2, cv=cv_outer, scoring = 'neg_mean_squared_error')
        oos_MSE.loc[get_df_name(j)].loc['AR_cay_MSE'] = -cv_cay['test_score']
#         print("%.9f" % -cv_cay['test_score'][0])

        ####################################################################################################
        # R2
        R2_sm = 1-(-cv_result['test_score'])/sm_mse
        R2_nls = 1- (-cv_result['test_score'])/-cv_nonlinear['test_score']
        R2_ar1 = 1- (-cv_result['test_score'])/-cv_ar1['test_score']
        R2_ar2 = 1- (-cv_result['test_score'])/-cv_ar2['test_score']
        R2_cay = 1- (-cv_result['test_score'])/-cv_cay['test_score']
        
        oos_R2.loc[get_df_name(j)].loc['SM_R2'] = R2_sm
        oos_R2.loc[get_df_name(j)].loc['NLS_R2'] = R2_nls
        oos_R2.loc[get_df_name(j)].loc['AR1_R2'] = R2_ar1
        oos_R2.loc[get_df_name(j)].loc['AR2_R2'] = R2_ar2
        oos_R2.loc[get_df_name(j)].loc['AR_cay_R2'] = R2_cay
        print(get_df_name(j))
        
        oos_MSE.T.to_excel(writer_MSE, sheet_name=i.__name__)
        oos_R2.T.to_excel(writer_R2, sheet_name=i.__name__)
        
writer_MSE.save()
writer_R2.save()

writer_MSE.close()
writer_R2.close()

sin_func
[-0.73532659  0.67771292 -0.04396704  0.21988944 -0.26671277]
co1
[-0.74477197  0.66731905  0.08125472 -0.4287323  -0.0016201 ]
co2
[-0.84691647  0.53172596  0.1220298  -0.22374389  0.03924813]
co3
[-0.66428057  0.74748333  0.10366838  0.03415889 -0.00218679]
co4
cos_func
[-0.73532659  0.67771292 -0.04423969  0.2211051  -1.35034103]
co1
[-0.74477197  0.66731905  0.07222203 -0.49624142 -2.00153521]
co2
[-0.84691647  0.53172596  0.11727755 -0.29242258 -2.6100726 ]
co3
[-0.66428057  0.74748333  0.09854461  0.03600706 17.90908539]
co4
scaled_sin_func
[-0.73532659  0.67771292 -0.04396704  0.21988944 -0.26671277 -0.04396704]
co1
[-0.74477197  0.66731905  0.08125472 -0.4287323  -0.0016201   0.08125472]
co2
[-0.84691647  0.53172596  0.1220298  -0.22374389  0.03924813  0.1220298 ]
co3
[-0.66428057  0.74748333  0.10366838  0.03415889 -0.00218679  0.10366838]
co4
scaled_cos_func
[-0.73532659  0.67771292 -0.04423969  0.2211051  -0.84943354  1.58969591]
co1
[-0.74477197  0.66731905  0.0722

In [106]:
oos_MSE.head(12)

Unnamed: 0_level_0,time,1988-03-01,1988-06-01,1988-09-01,1988-12-01,1989-03-01,1989-06-01,1989-09-01,1989-12-01,1990-03-01,1990-06-01,...,2016-09-01,2016-12-01,2017-03-01,2017-06-01,2017-09-01,2017-12-01,2018-03-01,2018-06-01,2018-09-01,2018-12-01
Variable,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
co1,CLS_MSE,2e-06,3e-06,1e-05,3.2e-05,0.000102,0.000104,0.000103,0.000106,5.3e-05,4.3e-05,...,2.4e-05,1.3e-05,1.9e-05,1e-05,0.0,9e-06,0.0,0.0,2e-06,1e-06
co1,SM_MSE,0.00117,0.001787,0.000424,1.4e-05,0.00148,0.002926,0.005213,0.00011,0.003526,0.001043,...,0.000607,0.000526,0.002097,0.000263,0.000749,0.002543,0.000581,0.000285,0.003161,0.027192
co1,NLS_MSE,0.0,0.0,4e-06,2.1e-05,8.3e-05,8.4e-05,8.8e-05,9.7e-05,4.9e-05,4.2e-05,...,4.2e-05,2.8e-05,3.8e-05,2.7e-05,8e-06,2.6e-05,7e-06,5e-06,1e-06,0.0
co1,AR1_MSE,0.00455,0.001519,0.000611,3.3e-05,0.00145,0.002537,0.004471,0.00032,0.003405,0.001466,...,0.000558,0.000434,0.00192,0.000151,0.000674,0.002308,0.000814,0.000359,0.002998,0.028828
co1,AR2_MSE,0.006396,3.3e-05,0.000404,0.000156,0.001213,0.002537,0.005185,0.000116,0.002229,0.00141,...,0.000555,0.000464,0.002071,0.000187,0.000854,0.002406,0.000721,0.000518,0.0028,0.028531
co1,AR_cay_MSE,0.005513,0.001798,0.000457,9.2e-05,0.001743,0.002903,0.004775,0.000294,0.003279,0.001591,...,0.000429,0.00032,0.001633,6.7e-05,0.00045,0.001908,0.001192,0.000193,0.002453,0.030146
co2,CLS_MSE,0.001954,0.001145,0.000313,0.000537,0.004254,0.006446,0.007369,0.0,0.002132,0.002403,...,0.000227,8.5e-05,0.001094,1.3e-05,0.000268,0.001732,0.0013,0.000167,0.00231,0.02866
co2,SM_MSE,0.00117,0.001787,0.000424,1.4e-05,0.00148,0.002926,0.005213,0.00011,0.003526,0.001043,...,0.000607,0.000526,0.002097,0.000263,0.000749,0.002543,0.000581,0.000285,0.003161,0.027192
co2,NLS_MSE,0.000172,0.000891,0.000507,0.000174,0.003244,0.005391,0.006918,2e-06,0.002583,0.001687,...,0.000351,0.000201,0.001481,0.000126,0.000511,0.002288,0.000679,0.000263,0.003004,0.026372
co2,AR1_MSE,0.00455,0.001519,0.000611,3.3e-05,0.00145,0.002537,0.004471,0.00032,0.003405,0.001466,...,0.000558,0.000434,0.00192,0.000151,0.000674,0.002308,0.000814,0.000359,0.002998,0.028828


In [107]:
oos_R2.head(10)

Unnamed: 0_level_0,time,1988-03-01,1988-06-01,1988-09-01,1988-12-01,1989-03-01,1989-06-01,1989-09-01,1989-12-01,1990-03-01,1990-06-01,...,2016-09-01,2016-12-01,2017-03-01,2017-06-01,2017-09-01,2017-12-01,2018-03-01,2018-06-01,2018-09-01,2018-12-01
Variable,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
co1,SM_R2,0.998356,0.99839,0.977103,-1.303824,0.93116,0.96455,0.980212,0.03546,0.98507,0.958522,...,0.960352,0.974803,0.99089,0.962408,0.999751,0.996377,0.999979,0.999964,0.999273,0.999959
co1,NLS_R2,-11.578788,-6.961491,-1.5732,-0.541946,-0.234662,-0.234576,-0.17081,-0.093955,-0.083031,-0.018474,...,0.423712,0.518346,0.494691,0.628654,0.975871,0.648899,0.9982,0.997947,-2.167076,-1.328551
co1,AR1_R2,0.999577,0.998106,0.98412,0.001073,0.929753,0.959121,0.976925,0.669309,0.984539,0.970501,...,0.956892,0.969454,0.990053,0.934387,0.999723,0.996009,0.999985,0.999972,0.999233,0.999961
co1,AR2_R2,0.999699,0.913287,0.975974,0.792149,0.916033,0.959116,0.980102,0.08338,0.976377,0.96931,...,0.956675,0.971456,0.990776,0.947151,0.999781,0.996171,0.999983,0.99998,0.999179,0.999961
co1,AR_cay_R2,0.999651,0.9984,0.978735,0.647652,0.941555,0.964266,0.978394,0.640013,0.983945,0.972808,...,0.94397,0.958641,0.988301,0.853062,0.999585,0.995172,0.99999,0.999947,0.999063,0.999963
co2,SM_R2,-0.670484,0.359416,0.261546,-37.038695,-1.87492,-1.203045,-0.413506,0.999633,0.395484,-1.303705,...,0.626504,0.838603,0.478021,0.950255,0.642842,0.318723,-1.236656,0.413906,0.26915,-0.054007
co2,NLS_R2,-10.327354,-0.284068,0.382977,-2.078779,-0.311349,-0.195765,-0.065205,0.982174,0.174639,-0.423974,...,0.353846,0.578492,0.261116,0.896335,0.476853,0.242778,-0.914299,0.364962,0.230801,-0.086776
co2,AR1_R2,0.570664,0.246437,0.487855,-15.49339,-1.933678,-1.540418,-0.648286,0.999874,0.374002,-0.638371,...,0.593909,0.804343,0.430091,0.913176,0.602889,0.249511,-0.597342,0.534823,0.229423,0.005827
co2,AR2_R2,0.694566,-33.494203,0.225126,-2.431859,-2.506684,-1.540738,-0.421345,0.999652,0.043529,-0.704502,...,0.591863,0.817166,0.471527,0.930067,0.686624,0.279998,-0.801382,0.67752,0.174869,-0.004537
co2,AR_cay_R2,0.645628,0.363444,0.31418,-4.817661,-1.440811,-1.220655,-0.543376,0.999863,0.34996,-0.510224,...,0.472179,0.735081,0.329687,0.805561,0.405506,0.092011,-0.090604,0.133785,0.058189,0.049268


In [None]:
# for train_index, test_index in cv_outer.split(X):
# #     print(train_index[-3:-1], test_index)
#     print(X.iloc[test_index])

In [None]:
space = dict()
space['constraints'] = [(), constraint_func(x)]
space['x0'] = [[0.01]*8,[1]*8]

In [None]:
X = co1.join(station)

In [None]:
constraints = constraint_func(X)

In [None]:
# nls = CLS_Estimator(obj_func = sin_func, x0 = [0.01]*5)
# 
cls = CLS_Estimator(obj_func = sin_func, x0 = [0.01]*5, constraints = constraints)

In [None]:
cv_result = cross_validate(cls, co1.join(station_AR2), y_lag2, cv=cv_outer, scoring = 'neg_mean_squared_error')

In [None]:
# cls.predict(X["1988-01-01":])

In [None]:
# models, c, model_mse = mr.Nested_CV(X = X, y = y, model = cls, 
#                                              cv_inner = cv_inner, cv_outer = cv_outer, 
#                                              search_method = 'Grid', space = space)

### Benchmark model: sample mean

In [None]:
sm_pred, sm_mse = bench.sample_mean(y, "1988-01-01", cv_outer = cv_outer)

In [None]:
sm_mse

### $R^2$ plot

In [None]:
R2 = 1-(-nls_mse)/sm_mse

In [None]:
OOS_sin = pd.DataFrame(
    {'nls':-nls_mse, 'sm':sm_mse}
)
OOS_sin.to_excel('OOS_sin.xlsx')

In [None]:
plt.plot(-nls_mse)
plt.plot(sm_mse)[]

In [None]:
np.sum(-nls_mse<sm_mse)

In [None]:
plt.plot(R2)
plt.savefig('sin_oos.jpg')

In [None]:
R2_new = 1 / (1 + np.exp(-np.array(R2)))
plt.plot(R2_new[0:47])

In [None]:
# bench.plot_R2(y_test[::3], c, sm_pred, adjust = False, alpha = 0.8)