In [4]:
import numpy as np
import pandas as pd

from scipy.optimize import minimize

## Try my estimator

## load data

In [5]:
x = pd.read_excel('x1.xlsx')
x1 = x[['DP', 'EP', 'b/m']]
y_train = x['EQP']

In [6]:
x1.head()

Unnamed: 0,DP,EP,b/m
0,-2.942374,-2.374773,0.443706
1,-2.979535,-2.430353,0.428501
2,-2.976535,-2.445079,0.469765
3,-2.984225,-2.471309,0.456754
4,-3.025963,-2.531446,0.434783


In [7]:
y_train.head()

0   -0.005469
1    0.041828
2    0.004559
3    0.010274
4    0.057665
Name: EQP, dtype: float64

##  Nonlinear function with unknown parameters

We define a linear combination of x with unknown parameter $\theta$ and we call it the "single-index":

$$
single\_index = \theta x^{T}
$$

The function below calculate the single-index and return a function with unknown parameter $\theta$:

In [8]:
def single_index(x):
    if isinstance(x, (pd.DataFrame, np.ndarray)):
        if isinstance(x,pd.DataFrame):
            x_values = x.values
        else:
            pass
    else:
        raise Exception('wrong type')

    def u(theta):
        if len(theta) == x_values.shape[1]:
            sum_up = [x_values[:,i]*theta[i] for i in range(x_values.shape[1])]
            index = np.sum(sum_up, axis = 0)
        else:
            raise Exception('wrong parameter dimension')
        return index
    return u

We then put this single-index into nonlinear functions. 
Take sin function as an example:
$$
f = sin\left(\left(\theta x^{T}\right) + \gamma\right)
$$
where $\theta$ and $\gamma$ are unkown parameters.

The function below defines the above sin function and returns a function with unknow parameters $\theta$
and $\gamma$:

In [9]:
def sin_func(x):
    def objective_func(params):
        func = np.sin(single_index(x)(params[0:x.shape[1]])+params[x.shape[1]])
        return func
    return objective_func

In [41]:
len(sin_func(x1)([1,2,3,4,5]))

732

## Construct loss function
Loss function is defined as the sum of squared errors:
$$
Loss = \sum\left(y-sin\left(\left(\theta x^{T}\right) + \gamma\right)\right)^2
$$

The loss function below returns a function with unknow parameters $\theta$ and $\gamma$:

In [10]:
def loss(x,y,obj_func):
    def loss_func(params):
        loss = np.sum((y-obj_func(x)(params))**2)
        return loss
    return loss_func

## run the minimizer:
We minimize the loss function and get a estimate of $\theta$ and $\gamma$

In [25]:
minimize(loss(x1,y_train, sin_func), x0 = [0.001]*4, method = 'SLSQP')

     fun: 2.570908341721376
     jac: array([0.00020206, 0.00107372, 0.00558054, 0.00164834])
 message: 'Optimization terminated successfully.'
    nfev: 48
     nit: 6
    njev: 6
  status: 0
 success: True
       x: array([-0.0211188 ,  0.00024749, -0.01008116, -0.05366567])

## But when I try to wrap this into sklearn BaseEstimator, it does not work any more:

In [30]:
from sklearn.base import BaseEstimator
from sklearn.base import RegressorMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted

In [42]:
class CLS_Estimator(BaseEstimator, RegressorMixin):
    def __init__(self, obj_func=None, x0=0, method='SLSQP'):
        # self.obj_func = obj_func
        self.x0 = x0
        self.method = method
        self.obj_func = obj_func
        self.params_ = None

#     def loss(self, x, y, parameters):
#         def loss_func():
#             error = np.sum((y - self.obj_func(x)(parameters)) ** 2)
# #             error = 1
#             return error
#         return loss_func

    def loss(self, x, y):
        def loss_func(params):
            error = np.sum((y - self.obj_func(x)(params)) ** 2)
            return error
        return loss_func

    
    def fit(self, x, y):
        self._train_data = x
        self._train_target = y

        res = minimize(
            self.loss(x,y),
            x0 = self.x0,
            method = self.method
        )

#         res = self.optimizer
        if res.success:
            self.params_ = res.x
        return self
    
    def predict(self, X):
        self.yhat=self.obj_func(X)(self.params_)
        return self.yhat

In [43]:
cls = CLS_Estimator(obj_func = sin_func, x0 = [0.001]*4)

In [44]:
cls.fit(x1,y_train)

CLS_Estimator(obj_func=<function sin_func at 0x00000196C27DD950>,
              x0=[0.001, 0.001, 0.001, 0.001])

In [46]:
yhat = cls.predict(x1)