In [1]:
import DataPreparation as dpr
import ModelRun as mr
import benchmarks as bench

import os
import itertools
import numpy as np
import pandas as pd
from scipy.optimize import minimize
from sklearn.model_selection import TimeSeriesSplit
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

from MyEstimators import CLS_Estimator

<Figure size 720x360 with 0 Axes>

### Load data

In [2]:
df = dpr.read_data('EQP_Quarterly')
df = dpr.data_clean(df, '1956-01-01')

In [3]:
df.head()

Unnamed: 0_level_0,EQP,DP,DY,EP,DE,svar,b/m,ntis,tbl,lty,...,TMS,DFR,DFY,infl,c,w,y,cay,AAA,BAA
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1956-03-01,0.066512,-3.33303,-3.269151,-2.575525,-0.757505,0.003289,0.531077,0.026695,0.0225,0.0303,...,0.0078,0.005159,0.005,0.0,9.272498,11.092725,9.100386,0.007275,0.031,0.036
1956-06-01,-0.028264,-3.261722,-3.293365,-2.568575,-0.693147,0.003688,0.551565,0.025672,0.0249,0.0299,...,0.005,-0.021824,0.005,0.014925,9.271728,11.091665,9.107828,0.000775,0.0326,0.0376
1956-09-01,-0.034415,-3.204645,-3.239744,-2.573142,-0.631503,0.002519,0.57191,0.029362,0.0284,0.0324,...,0.004,0.005663,0.0051,0.007353,9.269304,11.086198,9.106428,0.000663,0.0356,0.0407
1956-12-01,0.033241,-3.289216,-3.260525,-2.616389,-0.672827,0.004394,0.544177,0.026149,0.0321,0.0345,...,0.0024,-0.002208,0.0062,0.007299,9.277993,11.096678,9.118405,-0.002524,0.0375,0.0437
1957-03-01,-0.05075,-3.238565,-3.29498,-2.562911,-0.675654,0.002288,0.599819,0.0266,0.0308,0.0331,...,0.0023,-0.000368,0.0077,0.007246,9.280482,11.090721,9.117433,0.002041,0.0366,0.0443


### Add $y_{t-1}$ and construct X and y

In [4]:
df['y_lag'] = df['EQP'].shift()
df = df.dropna()
df.head()

Unnamed: 0_level_0,EQP,DP,DY,EP,DE,svar,b/m,ntis,tbl,lty,...,DFR,DFY,infl,c,w,y,cay,AAA,BAA,y_lag
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1956-06-01,-0.028264,-3.261722,-3.293365,-2.568575,-0.693147,0.003688,0.551565,0.025672,0.0249,0.0299,...,-0.021824,0.005,0.014925,9.271728,11.091665,9.107828,0.000775,0.0326,0.0376,0.066512
1956-09-01,-0.034415,-3.204645,-3.239744,-2.573142,-0.631503,0.002519,0.57191,0.029362,0.0284,0.0324,...,0.005663,0.0051,0.007353,9.269304,11.086198,9.106428,0.000663,0.0356,0.0407,-0.028264
1956-12-01,0.033241,-3.289216,-3.260525,-2.616389,-0.672827,0.004394,0.544177,0.026149,0.0321,0.0345,...,-0.002208,0.0062,0.007299,9.277993,11.096678,9.118405,-0.002524,0.0375,0.0437,-0.034415
1957-03-01,-0.05075,-3.238565,-3.29498,-2.562911,-0.675654,0.002288,0.599819,0.0266,0.0308,0.0331,...,-0.000368,0.0077,0.007246,9.280482,11.090721,9.117433,0.002041,0.0366,0.0443,0.033241
1957-06-01,0.075114,-3.309868,-3.238565,-2.628349,-0.681519,0.001363,0.565877,0.030528,0.0329,0.0361,...,-0.003789,0.0072,0.010791,9.278119,11.104916,9.118823,-0.004528,0.0391,0.0463,-0.05075


In [5]:
X = df[['DP','DY']]
station = pd.DataFrame()
y = df[['EQP']].squeeze()
X.head(2)

Unnamed: 0_level_0,DP,DY
time,Unnamed: 1_level_1,Unnamed: 2_level_1
1956-06-01,-3.261722,-3.293365
1956-09-01,-3.204645,-3.239744


### Construct single-index and nonlinear models

In [6]:
def single_index(x):
    if isinstance(x, (pd.DataFrame, np.ndarray)):
        if isinstance(x, pd.DataFrame):
            x_values = x.values
        else:
            pass
    else:
        raise Exception('wrong type')

    def u(theta):
        if len(theta) == x_values.shape[1]:
            sum_up = [x_values[:, i] * theta[i] for i in range(x_values.shape[1])]
            index = np.sum(sum_up, axis=0)
        else:
            raise Exception('wrong parameter dimension')
        return index

    return u

In [7]:
extra_params = {'sin_func':1,
               'cos_func':1,
               'scaled_sin_func':2,
               'scaled_cos_func':2,
               'exp_func':2,
               'exp_shift_func':2,
                'poly_func':3
               }

In [8]:
def dimensions(non_sta, sta, func):
    stas = sta.shape[1]
    nonstas = non_sta.shape[1]
    extra = range(0, extra_params[func])
    return nonstas, stas, extra

In [9]:
def sin_func(x):
    def objective_func(params):
        func = np.sin(single_index(x.iloc[:,:d1])(params[0:d1])+params[d1+d2+extra[0]])+np.dot(
            x.iloc[:,d1:d1+d2], params[d1:d1+d2])
        return func
    return objective_func

In [10]:
def cos_func(x):
    def objective_func(params):
        func = np.cos(single_index(x.iloc[:,:d1])(params[0:d1])+params[d1+d2+extra[0]])+np.dot(
            x.iloc[:,d1:d1+d2], params[d1:d1+d2])
        return func
    return objective_func

In [11]:
def scaled_sin_func(x):
    def objective_func(params):
        func = np.sin(params[d1+d2+extra[1]]*single_index(x.iloc[:,:d1])(
            params[0:d1])+params[d1+d2+extra[0]])+np.dot(x.iloc[:,d1:d1+d2], params[d1:d1+d2])
        return func
    return objective_func

In [12]:
def scaled_cos_func(x):
    def objective_func(params):
        func = np.cos(params[d1+d2+extra[1]]*single_index(x.iloc[:,:d1])(
            params[0:d1])+params[d1+d2+extra[0]])+np.dot(x.iloc[:,d1:d1+d2], params[d1:d1+d2])
        return func
    return objective_func

In [13]:
def exp_shift_func(x):
    def objective_func(params):
        func = 1 - np.exp(params[d1+d2+extra[1]]*((single_index(x.iloc[:,:d1])(
            params[0:d1]))-params[d1+d2+extra[0]])**2)+np.dot(x.iloc[:,d1:d1+d2], params[d1:d1+d2])
        return func
    return objective_func

In [14]:
def exp_func(x):
    def objective_func(params):
        func = params[d1+d2+extra[0]]*np.exp(-params[d1+d2+extra[1]]*(single_index(x.iloc[:,:d1])(params[0:d1]))**2
                                )+np.dot(x.iloc[:,d1:d1+d2], params[d1:d1+d2])
        return func
    return objective_func

In [15]:
def poly_func(x):
    def objective_func(params):
        func = params[d1+d2+extra[0]]+params[d1+d2+extra[1]]*(single_index(x.iloc[:,:d1])(
            params[0:d1]))+params[d1+d2+extra[2]]*((single_index(x.iloc[:,:d1])(
            params[0:d1]))**2)+np.dot(x.iloc[:,d1:d1+d2], params[d1:d1+d2])
#                (single_index(x.iloc[:,:d1])(params[0:d1])
        return func
    return objective_func

### Model Estimation

In [16]:
def constraint_func(x):
    def constraint(params):
        con = 0
        for j in np.arange(0, x.iloc[:,:d1].shape[1]):
            con += params[j]**2
            cons = con - 1
        return cons
    return {'type':'eq', 'fun': constraint}

In [17]:
d1, d2, extra = dimensions(X, station,'poly_func')
cls_nls = CLS_Estimator(obj_func = poly_func, x0 = [0.001]*(d1+d2+extra[-1]+1))
cls = CLS_Estimator(obj_func = poly_func, x0 = [0.001]*(d1+d2+extra[-1]+1), constraints = constraint_func(X))

In [18]:
cls_nls.fit(X,y)
cls.fit(X,y)

CLS_Estimator(constraints={'fun': <function constraint_func.<locals>.constraint at 0x000002A33FE6F8C8>,
                           'type': 'eq'},
              obj_func=<function poly_func at 0x000002A33FE6F378>,
              x0=[0.001, 0.001, 0.001, 0.001, 0.001])

In [19]:
print(cls_nls.params_)
print(cls.params_)

[-8.94841330e-04  1.92702711e-03  1.24399499e-02 -2.56973660e-05
  1.01218548e-03]
[-0.16930924  0.98556298  0.00341686 -0.03364998 -0.01037493]


In [20]:
cls.params_[0]

-0.16930923753093413

In [21]:
cls.params_[0]**2+cls.params_[1]**2

1.0000000010637105

## Empirical Study

### Cointegrated predictors
- dividend-price ratio and dividend yield
- T-bill rate and long-term yield
- dividend-price ratio and earningprice ratio
- baa- and aaa-rated corporate bond yields

In [64]:
co1 = df[['DP', 'DY']]
co2 = df[['tbl', 'lty']]
co3 = df[['DP', 'EP']]
co4 = df[['BAA', 'AAA']]

In [65]:
get_df_name(co3)

'co3'

In [66]:
cointe_variables = [co1, co2, co3, co4]

### Stationary variables

In [24]:
station = df[['y_lag', 'cay']]

### Fit model and Save Results

In [25]:
def get_df_name(df):
    name =[x for x in globals() if globals()[x] is df][0]
    return name

In [26]:
fun_list = [sin_func,
            cos_func,
            scaled_sin_func,
            scaled_cos_func,
            exp_func,
            exp_shift_func,
            poly_func
           ]

In [27]:
# Set up hierachical index
fun_names = [i.__name__ for i in fun_list]
cointe_names = [get_df_name(i) for i in cointe_variables]
iterables_a = [fun_names, cointe_names]

In [28]:
#Set up directory
parent = os.getcwd()
folder = 'results'
path = os.path.join(parent, folder)
if not os.path.exists(path):
    os.makedirs(path)

In [29]:
results = pd.DataFrame()
for i, j in itertools.product(fun_list, cointe_variables):
    # Set up dimensions
    d1, d2, extra= dimensions(j,station, i.__name__)
    initial_len = d1+d2+extra[-1]+1
    
    # Set up dataframes
    iterables = [[i.__name__], [get_df_name(j)]]
    sec_columns = ['param_'+str(i) for i in range(1,initial_len+1)]
    multi_index = pd.MultiIndex.from_product(iterables, names=["function", "variables"])
    multi_columns = pd.MultiIndex.from_product([['NLS', 'CLS'], sec_columns],
                                               names=['Estimator', 'Parameters'])
    result = pd.DataFrame(index = multi_index, columns = multi_columns)
    # Prepare X
    X_ = j.join(station)
    # Fit models
    nls = CLS_Estimator(obj_func = i, x0 = [0.001]*initial_len)
    cls = CLS_Estimator(obj_func = i, x0 = [0.001]*initial_len, constraints = constraint_func(X_))
    nls.params_ = nls.fit(X_,y).params_
    cls.params_ = cls.fit(X_,y).params_
    # Save results to dataframe
    result.loc[i.__name__,get_df_name(j)].loc['NLS'] = nls.params_ 
    result.loc[i.__name__,get_df_name(j)].loc['CLS'] = cls.params_ 
    # Put into one table
    results = results.append(result, ignore_index = False, sort = False)
    
# Export to Excel
results.to_excel('Results/full_sample.xlsx')

In [30]:
results.head(3)

Unnamed: 0_level_0,Estimator,CLS,CLS,CLS,CLS,CLS,CLS,CLS,NLS,NLS,NLS,NLS,NLS,NLS,NLS
Unnamed: 0_level_1,Parameters,param_1,param_2,param_3,param_4,param_5,param_6,param_7,param_1,param_2,param_3,param_4,param_5,param_6,param_7
function,variables,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
sin_func,co1,0.709838,-0.704365,0.022517,-0.104848,-15.689339,,,-1.008436,1.002847,0.000839,-0.073964,-0.023462,,
sin_func,co2,0.495116,-0.868827,0.079209,0.054524,-50.222284,,,-0.786401,0.690315,0.066833,-0.247875,0.003686,,
sin_func,co3,0.715437,-0.698678,-0.221696,-0.790349,-2.565328,,,-0.026356,-9e-05,0.059006,-0.224006,-0.083221,,


# WTF is hapenning here?!!!

In [31]:
# def fit_model(functions, variables, file_name):   
#     results = pd.DataFrame()
#     for i, j in itertools.product(functions, variables):
#         # Set up dimensions
#         d1, d2, extra= dimensions(j,station, i.__name__)
# #         print(d1,d2,extra[-1])
#         initial_len = d1+d2+extra[-1]+1

#         # Set up dataframes
#         iterables = [[i.__name__], [get_df_name(j)]]
#         sec_columns = ['param_'+str(i) for i in range(1,initial_len+1)]
#         multi_index = pd.MultiIndex.from_product(iterables, names=["function", "variables"])
#         multi_columns = pd.MultiIndex.from_product([['NLS', 'CLS'], sec_columns],
#                                                    names=['Estimator', 'Parameters'])
#         result = pd.DataFrame(index = multi_index, columns = multi_columns)
#         # Prepare X
#         X_ = j.join(station)
#         # Fit models
#         nls = CLS_Estimator(obj_func = i, x0 = [0.001]*initial_len)
#         cls = CLS_Estimator(obj_func = i, x0 = [0.001]*initial_len, constraints = constraint_func(X))
#         nls.params_ = nls.fit(X_,y).params_
#         cls.params_ = cls.fit(X_,y).params_
#         print(initial_len)
#         # Save results to dataframe
#         result.loc[i.__name__,get_df_name(j)].loc['NLS'] = nls.params_ 
#         result.loc[i.__name__,get_df_name(j)].loc['CLS'] = cls.params_ 
#         # Put into one table
#         results = results.append(result, ignore_index = False, sort = False)
#     # Export to Excel
#     path = 'Results/' + file_name +'.xlsx'
#     results.to_excel(path)
#     return results

In [32]:
# fit_model(fun_list, cointe_variables,'results_1835')

## Use initial values from Linear regression (using Taylor expansion)

In [33]:
orders = {'sin_func':1,
          'cos_func':2,
          'scaled_sin_func':1,
          'scaled_cos_func':2,
          'exp_func':5,
          'exp_shift_func':4,
          'poly_func':2
               }

In [95]:
def Taylor_init(variables, function):
    
    # find the initials for theta
    LR = LinearRegression()
    LR_theta = LR.fit(variables.iloc[:,1:], variables.iloc[:,:1])
    alpha = np.append(1, -LR_theta.coef_)
    theta = np.array(-alpha/np.linalg.norm(alpha))
    
    # calculate single-index
    u = single_index(variables)(theta)
    
    # find the initials for beta
    Xs = station.copy()
    Xs['u'], Xs['u2'], Xs['u3'], Xs['u4'], Xs['u6'] = u, u**2, u**3, u**4, u**6
    t_order = orders.get(function.__name__)
    X_reg = Xs.iloc[:, 0:d2+t_order]
    LR_taylor = LR.fit(X_reg, y)
    theta_gamma = np.append(theta,LR_taylor.coef_[:d2])
    
    # initials for gammas
    initials = []
    if function == sin_func:
        initials = np.append(theta_gamma, LR_taylor.intercept_)
    elif function == scaled_sin_func:
        initials = np.append(theta_gamma, ([LR_taylor.intercept_], [LR_taylor.coef_[0]]))
    elif function == poly_func:
        ini_poly_ = np.append(theta_gamma,LR_taylor.coef_[d2:])
        initials = np.insert(ini_poly_, 4, LR_taylor.intercept_)
    elif function == cos_func:
        initials = np.append(theta_gamma, [-LR_taylor.coef_[d2]])
    elif function == scaled_cos_func:
        initials = np.append(theta_gamma,(
            [-LR_taylor.coef_[2]/np.sqrt((1+LR_taylor.intercept_)*2)], [np.sqrt((1+LR_taylor.intercept_)*2)]))
    elif function == exp_shift_func:
        initials = np.append(theta_gamma, [LR_taylor.coef_[-2]/(2*LR_taylor.coef_[-1]), np.sqrt(np.abs(LR_taylor.coef_[-1]))])
        print(LR_taylor.coef_, LR_taylor.coef_[-2])
    elif function == exp_func:
        initials =np.append(theta_gamma, [LR_taylor.intercept_, -LR_taylor.coef_[2]/LR_taylor.intercept_])

    return initials

In [96]:
Taylor_init(co1, exp_shift_func)

[-0.00882082 -0.10876379  1.41342405  0.15096668 -0.68631559 -3.85751967] -0.6863155928406348


array([-0.7138883 ,  0.70025959, -0.00882082, -0.10876379,  0.08895815,
        1.96405694])

In [92]:
for i, j in itertools.product(fun_list, cointe_variables):
    print(i.__name__, get_df_name(j), Taylor_init(j, i))

sin_func co1 [-0.7138883   0.70025959 -0.00859438 -0.1093989  -0.07173334]
sin_func co2 [-0.70179883  0.71237518  0.06827272 -0.29066918 -0.00328981]
sin_func co3 [-0.83447494  0.55104589  0.0616905  -0.19762149 -0.03300318]
sin_func co4 [-0.67816344  0.73491112  0.08034757 -0.1530162   0.01037965]
cos_func co1 [-0.7138883   0.70025959 -0.00852383 -0.10946317 -1.40655916]
cos_func co2 [-0.70179883  0.71237518  0.06817921 -0.28618449 -1.79022612]
cos_func co3 [-0.83447494  0.55104589  0.06322531 -0.20057079 -0.01207906]
cos_func co4 [-0.67816344  0.73491112  0.05295398  0.04489563 12.73364976]
scaled_sin_func co1 [-0.7138883   0.70025959 -0.00859438 -0.1093989  -0.07173334 -0.00859438]
scaled_sin_func co2 [-0.70179883  0.71237518  0.06827272 -0.29066918 -0.00328981  0.06827272]
scaled_sin_func co3 [-0.83447494  0.55104589  0.0616905  -0.19762149 -0.03300318  0.0616905 ]
scaled_sin_func co4 [-0.67816344  0.73491112  0.08034757 -0.1530162   0.01037965  0.08034757]
scaled_cos_func co1 [-0.

# Fit model and Save Results

In [82]:
Taylor_init(co4, exp_shift_func)

[ 6.40724517e-02  4.67416789e-03 -7.00170125e+00  3.46389414e+03
 -2.70428001e+05  4.17127699e+06]


array([ 6.78163436e-01, -7.34911120e-01,  6.40724517e-02,  4.67416789e-03,
        1.21265760e+04,  3.46389414e+03])

In [81]:
LR = LinearRegression()
LR_theta = LR.fit(co4.iloc[:,1:], co4.iloc[:,:1])
alpha = np.append(1, -LR_theta.coef_)
theta = np.array(alpha/np.linalg.norm(alpha))
u = single_index(co4)(theta)
print(u[5:10])

[0.00315512 0.00611151 0.00506078 0.00462011 0.00296869]


In [97]:
results_Taylor = pd.DataFrame()
for i, j in itertools.product(fun_list, cointe_variables):
    # Set up dimensions
    d1, d2, extra= dimensions(j,station, i.__name__)
    initial_len = d1+d2+extra[-1]+1
    # Set up dataframes
    iterables = [[i.__name__], [get_df_name(j)]]
    sec_columns = ['param_'+str(i) for i in range(1,initial_len+1)]
    multi_index = pd.MultiIndex.from_product(iterables, names=["function", "variables"])
    multi_columns = pd.MultiIndex.from_product([['NLS', 'CLS'], sec_columns],
                                               names=['Estimator', 'Parameters'])
    result = pd.DataFrame(index = multi_index, columns = multi_columns)
    # Prepare X
    X_ = j.join(station)
    # Fit models
    
    nls = CLS_Estimator(obj_func = i, x0 = Taylor_init(j, i))
    cls = CLS_Estimator(obj_func = i, x0 = Taylor_init(j, i), constraints = constraint_func(X_))
    nls.params_ = nls.fit(X_,y).params_
    cls.params_ = cls.fit(X_,y).params_
    print(i.__name__, get_df_name(j))
    # Save results to dataframe
    result.loc[i.__name__,get_df_name(j)].loc['NLS'] = nls.params_ 
    result.loc[i.__name__,get_df_name(j)].loc['CLS'] = cls.params_ 
    # Put into one table
    results_Taylor = results_Taylor.append(result, ignore_index = False, sort = False)
    
# Export to Excel
results_Taylor.to_excel('Results/Taylor_fullsample_0709.xlsx')
results_Taylor.tail()

sin_func co1
sin_func co2
sin_func co3
sin_func co4
cos_func co1
cos_func co2
cos_func co3
cos_func co4
scaled_sin_func co1
scaled_sin_func co2
scaled_sin_func co3
scaled_sin_func co4
scaled_cos_func co1
scaled_cos_func co2
scaled_cos_func co3
scaled_cos_func co4
exp_func co1
exp_func co2
exp_func co3
exp_func co4
[-0.00882082 -0.10876379  1.41342405  0.15096668 -0.68631559 -3.85751967] -0.6863155928406348
[-0.00882082 -0.10876379  1.41342405  0.15096668 -0.68631559 -3.85751967] -0.6863155928406348
exp_shift_func co1
[ 4.13094821e-02 -2.96216233e-01  5.45512336e+00 -7.67025744e+01
 -1.20914000e+04  3.41145483e+05] -12091.399976190176
[ 4.13094821e-02 -2.96216233e-01  5.45512336e+00 -7.67025744e+01
 -1.20914000e+04  3.41145483e+05] -12091.399976190176
exp_shift_func co2
[ 0.05710621 -0.19015056  0.81353738 -1.32071529  0.85457059 -0.18976655] 0.8545705934256219
[ 0.05710621 -0.19015056  0.81353738 -1.32071529  0.85457059 -0.18976655] 0.8545705934256219
exp_shift_func co3
[6.40724517e-02

Unnamed: 0_level_0,Estimator,CLS,CLS,CLS,CLS,CLS,CLS,CLS,NLS,NLS,NLS,NLS,NLS,NLS,NLS
Unnamed: 0_level_1,Parameters,param_1,param_2,param_3,param_4,param_5,param_6,param_7,param_1,param_2,param_3,param_4,param_5,param_6,param_7
function,variables,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
exp_shift_func,co4,-0.66623,0.745746,0.041818,0.145707,-0.002983,2147.617124,,-0.320057,0.383753,0.083023,-0.087313,0.000642,2194.819044,
poly_func,co1,-0.709069,0.705139,0.000653,-0.075503,-0.023284,1.415525,0.008927,-0.712608,0.708657,0.000585,-0.075911,-0.023296,1.408806,0.009038
poly_func,co2,-0.724389,0.689391,0.067567,-0.272215,0.000857,1.590412,-28.011453,-0.586084,0.522989,0.067498,-0.256079,0.004575,1.592035,-28.012295
poly_func,co3,-0.97982,0.199885,0.034193,-0.16009,-0.830234,0.53591,-0.083837,-4.571478,0.930574,0.034471,-0.160394,-0.822782,0.113751,-0.00381
poly_func,co4,-0.639086,0.769135,0.029136,0.222809,0.029647,0.171961,-1066.824933,-0.647757,0.777972,0.028445,0.229783,0.030041,-0.098865,-1066.754865


In [99]:
results_Taylor

Unnamed: 0_level_0,Estimator,CLS,CLS,CLS,CLS,CLS,CLS,CLS,NLS,NLS,NLS,NLS,NLS,NLS,NLS
Unnamed: 0_level_1,Parameters,param_1,param_2,param_3,param_4,param_5,param_6,param_7,param_1,param_2,param_3,param_4,param_5,param_6,param_7
function,variables,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
sin_func,co1,-0.709831,0.704372,0.022482,-0.105361,-0.018576,,,-1.008332,1.002748,0.000805,-0.077334,-0.023443,,
sin_func,co2,-0.756221,0.654316,0.067078,-0.241745,0.004572,,,-0.776286,0.676577,0.066996,-0.244228,0.004087,,
sin_func,co3,-0.715438,0.698677,-0.221703,-0.790293,-0.576272,,,-0.026355,-9.4e-05,0.059019,-0.224064,-12.649602,,
sin_func,co4,-0.761685,0.647948,0.06769,-0.137607,0.026905,,,0.257725,-0.470468,0.07659,-0.090404,0.023487,,
cos_func,co1,-0.709842,0.704361,0.022494,-0.105349,-1.589442,,,-1.008559,1.00295,0.000815,-0.075824,-1.59433,,
cos_func,co2,-0.756206,0.654334,0.06692,-0.241842,-1.566229,,,-0.778349,0.679822,0.066654,-0.245015,-1.566821,,
cos_func,co3,-0.678465,0.734633,0.322686,0.325628,1.202308,,,0.026413,-1e-05,0.058954,-0.223495,1.653938,,
cos_func,co4,-0.574132,0.818762,0.079255,-0.074912,32.964214,,,-0.246581,0.458831,0.076428,-0.090107,32.963154,,
scaled_sin_func,co1,-0.709078,0.70513,0.000803,-0.076606,-0.02355,1.422405,,-1.041841,1.035985,0.000852,-0.077664,-0.023726,0.968271,
scaled_sin_func,co2,-0.75149,0.659745,0.066715,-0.248793,0.003674,1.048277,,-0.859137,0.754006,0.066726,-0.24915,0.003695,0.917307,


### GridSearch and CrossValidation

### Train_test split

In [None]:
X_train, X_test, y_train, y_test = dpr.data_split(df, 'EQP', "1987-12-01", "2018-12-01")

In [None]:
# val_length = 1
test_length = 1
step = 1
# cv_outer = TimeSeriesSplit(gap=0, max_train_size=None, n_splits=int((12/step) * test_length), test_size=step)
# cv_inner = TimeSeriesSplit(gap=0, max_train_size=None, n_splits=int((12/step) * val_length), test_size=step)
cv_outer = TimeSeriesSplit(gap=0, max_train_size=None, n_splits=31*4, test_size=step)
# cv_inner = TimeSeriesSplit(gap=0, max_train_size=None, n_splits=4, test_size=step)

In [None]:
test_length = 31
step = 1
freq = 4
cv_outer = TimeSeriesSplit(gap=0, max_train_size=None, n_splits=test_length*freq, test_size=step)

In [None]:
# for train_index, test_index in cv_outer.split(X):
# #     print(train_index[-3:-1], test_index)
#     print(X.iloc[test_index])

In [None]:
space = dict()
space['constraints'] = [(), constraint_func(x)]
space['x0'] = [[0.01]*8,[1]*8]

In [None]:
X = co1.join(station)

In [None]:
constraints = constraint_func(X)

In [None]:
nls = CLS_Estimator(obj_func = sin_func, x0 = taylor_init['sin_func'].dropna())
cls = CLS_Estimator(obj_func = sin_func, x0 = taylor_init['sin_func'].dropna(), constraints = constraints)

In [None]:
from sklearn.model_selection import cross_validate
from sklearn.metrics import explained_variance_score

In [None]:
cv_result = cross_validate(nls, X, y, cv=cv_outer, scoring = 'neg_mean_squared_error')

In [None]:
nls_mse = cv_result['test_score']

In [None]:
# models, c, model_mse = mr.Nested_CV(X = X, y = y, model = cls, 
#                                              cv_inner = cv_inner, cv_outer = cv_outer, 
#                                              search_method = 'Grid', space = space)

### Benchmark model: sample mean

In [None]:
sm_pred, sm_mse = bench.sample_mean(y, "1988-01-01", cv_outer = cv_outer)

### $R^2$ plot

In [None]:
R2 = 1-(-nls_mse)/sm_mse

In [None]:
OOS_sin = pd.DataFrame(
    {'nls':-nls_mse, 'sm':sm_mse}
)
OOS_sin.to_excel('OOS_sin.xlsx')

In [None]:
plt.plot(-nls_mse)
plt.plot(sm_mse)[]

In [None]:
np.sum(-nls_mse<sm_mse)

In [None]:
plt.plot(R2)
plt.savefig('sin_oos.jpg')

In [None]:
R2_new = 1 / (1 + np.exp(-np.array(R2)))
plt.plot(R2_new[0:47])

In [None]:
# bench.plot_R2(y_test[::3], c, sm_pred, adjust = False, alpha = 0.8)