In [1]:
import numpy as np
import pandas as pd

from scipy.optimize import minimize

## Try my estimator

## load data

In [226]:
x = pd.read_excel('data/EQP_Monthly.xlsx')
x1 = x[['DP', 'EP', 'b/m']]
y = x['EQP']

In [227]:
x1.head()

Unnamed: 0,DP,EP,b/m
0,-2.942374,-2.374773,0.443706
1,-2.979535,-2.430353,0.428501
2,-2.976535,-2.445079,0.469765
3,-2.984225,-2.471309,0.456754
4,-3.025963,-2.531446,0.434783


In [228]:
y.head()

0   -0.005469
1    0.041828
2    0.004559
3    0.010274
4    0.057665
Name: EQP, dtype: float64

##  Nonlinear function with unknown parameters

We define a linear combination of x with unknown parameter $\theta$ and we call it the "single-index":

$$
single\_index = \theta x^{T}
$$

The function below calculate the single-index and return a function with unknown parameter $\theta$:

We then put this single-index into nonlinear functions. 
Take sin function as an example:
$$
f = sin\left(\left(\theta x^{T}\right) + \gamma\right)
$$
where $\theta$ and $\gamma$ are unkown parameters.

The function below defines the above sin function and returns a function with unknow parameters $\theta$
and $\gamma$:

## Construct loss function
Loss function is defined as the sum of squared errors:
$$
Loss = \sum\left(y-sin\left(\left(\theta x^{T}\right) + \gamma\right)\right)^2
$$

The loss function below returns a function with unknow parameters $\theta$ and $\gamma$:

## run the minimizer:
We minimize the loss function and get a estimate of $\theta$ and $\gamma$

## Using BaseEstimator:

In [8]:
from sklearn.base import BaseEstimator
from sklearn.base import RegressorMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted

In [219]:
def single_index(x):
    if isinstance(x, (pd.DataFrame, np.ndarray)):
        if isinstance(x,pd.DataFrame):
            x_values = x.values
        else:
            pass
    else:
        raise Exception('wrong type')

    def u(theta):
        if len(theta) == x_values.shape[1]:
            sum_up = [x_values[:,i]*theta[i] for i in range(x_values.shape[1])]
            index = np.sum(sum_up, axis = 0)
        else:
            raise Exception('wrong parameter dimension')
        return index
    return u
    
class CLS_Estimator(BaseEstimator, RegressorMixin):

    
    def __init__(self, obj_func=None, x0=0, method='SLSQP', constraints = ()):
        self.obj_func = obj_func
        self.x0 = x0
        self.method = method
        self.constraints = constraints
        self.params_ = None
        
    def constraint_func(self, x):
        def constraint(params):
            con = 0
            for j in np.arange(0, x1.shape[1]):
                con += params[j]**2
                cons = con - 1
            return cons
        return {'type':'eq', 'fun': constraint}

    def loss(self, x, y):
        def loss_func(params):
            error = np.sum((y - self.obj_func(x)(params)) ** 2)
            return error
        return loss_func
    
    def fit(self, x, y):
        self._train_data = x
        self._train_target = y

        res = minimize(
            self.loss(x,y),
            x0 = self.x0,
            method = self.method,
            constraints = self.constraints
        )

#         res = self.optimizer
        if res.success:
            self.params_ = res.x
        return self
    
    def predict(self, X):
        self.yhat=self.obj_func(X)(self.params_)
        return self.yhat

In [220]:
def sin_func(x):
    def objective_func(params):
        func = np.sin(single_index(x)(params[0:x.shape[1]])+params[x.shape[1]])
        return func
    return objective_func

In [221]:
def constraint_func(x):
    def constraint(params):
        con = 0
        for j in np.arange(0, x1.shape[1]):
            con += params[j]**2
            cons = con - 1
        return cons
    return {'type':'eq', 'fun': constraint}

In [222]:
cls = CLS_Estimator(obj_func = sin_func, x0 = [0.001]*4)

In [223]:
cls.fit(x1,y_train)

CLS_Estimator(obj_func=<function sin_func at 0x000001AB0B16ED90>,
              x0=[0.001, 0.001, 0.001, 0.001])

In [224]:
yhat = cls.predict(x1[-1:])

In [225]:
yhat

array([0.01107173])

In [230]:
import DataPreparation as dpr
import ModelRun as mr

from sklearn.model_selection import TimeSeriesSplit

In [248]:
df = dpr.read_data('EQP_Monthly')
df = dpr.data_clean(df, '1927-01-01')

In [249]:
df.head()

Unnamed: 0_level_0,EQP,DP,DY,EP,DE,svar,b/m,ntis,tbl,lty,ltr,TMS,DFR,DFY,infl
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1927-01-01,-0.005469,-2.942374,-2.963349,-2.374773,-0.567601,0.00047,0.443706,0.050833,0.0307,0.0351,0.0075,0.0044,-0.0019,0.0095,-0.011299
1927-02-01,0.041828,-2.979535,-2.932946,-2.430353,-0.549182,0.000287,0.428501,0.051681,0.0323,0.0347,0.0088,0.0024,-0.0019,0.0092,-0.005714
1927-03-01,0.004559,-2.976535,-2.970053,-2.445079,-0.531456,0.000924,0.469765,0.04637,0.0329,0.0331,0.0253,0.0002,-0.017,0.0092,-0.005747
1927-04-01,0.010274,-2.984225,-2.967143,-2.471309,-0.512916,0.000603,0.456754,0.050518,0.032,0.0333,-0.0005,0.0013,0.006,0.009,0.0
1927-05-01,0.057665,-3.025963,-2.975058,-2.531446,-0.494518,0.000392,0.434783,0.055279,0.0339,0.0327,0.0109,-0.0012,-0.012,0.0093,0.00578


In [250]:
df = df[['DP','EP','b/m','EQP']]

In [251]:
X_train, X_test, y_train, y_test = dpr.data_split(df, 'EQP', "1987-12-01", "2019-12-01")

In [253]:
X_train.head(3)

Unnamed: 0_level_0,DP,EP,b/m
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1927-01-01,-2.942374,-2.374773,0.443706
1927-02-01,-2.979535,-2.430353,0.428501
1927-03-01,-2.976535,-2.445079,0.469765


In [243]:
val_length = 1
test_length = 32
step = 1
cv_outer = TimeSeriesSplit(gap=0, max_train_size=None, n_splits=12 * test_length, test_size=step)
cv_inner = TimeSeriesSplit(gap=0, max_train_size=None, n_splits=12 * val_length, test_size=step)

In [262]:
cls = CLS_Estimator(obj_func = sin_func, x0 = [0.001]*4)

In [263]:
cls.fit(X_train,y_train)

CLS_Estimator(obj_func=<function sin_func at 0x000001AB0B16ED90>,
              x0=[0.001, 0.001, 0.001, 0.001])

In [267]:
space = dict()
space['constraints'] = [constraint_func(X_train)]

In [269]:
models, model_pred, model_mse = mr.Nested_CV(X = X_train, y = y_train, model = cls, cv_inner = cv_inner, cv_outer = cv_outer, search_method = 'Grid', space = space)

mse=0.0066, best=-0.0210, cfg={'constraints': {'type': 'eq', 'fun': <function constraint_func.<locals>.constraint at 0x000001AB0C712E18>}}
mse=0.0233, best=-0.0207, cfg={'constraints': {'type': 'eq', 'fun': <function constraint_func.<locals>.constraint at 0x000001AB0C712E18>}}
mse=0.0441, best=-0.0219, cfg={'constraints': {'type': 'eq', 'fun': <function constraint_func.<locals>.constraint at 0x000001AB0C712E18>}}
mse=0.0155, best=-0.0249, cfg={'constraints': {'type': 'eq', 'fun': <function constraint_func.<locals>.constraint at 0x000001AB0C712E18>}}
mse=0.0023, best=-0.0246, cfg={'constraints': {'type': 'eq', 'fun': <function constraint_func.<locals>.constraint at 0x000001AB0C712E18>}}
mse=0.0201, best=-0.0237, cfg={'constraints': {'type': 'eq', 'fun': <function constraint_func.<locals>.constraint at 0x000001AB0C712E18>}}
mse=0.0227, best=-0.0220, cfg={'constraints': {'type': 'eq', 'fun': <function constraint_func.<locals>.constraint at 0x000001AB0C712E18>}}
mse=0.0026, best=-0.0207, c

KeyboardInterrupt: 