# Custom Estimators

## https://scikit-learn.org/dev/developers/develop.html

# Function Transformer

In [4]:
from sklearn.preprocessing import FunctionTransformer
ft = FunctionTransformer(func=lambda X: X+2, inverse_func=lambda X: X-2)

In [5]:
import numpy as np
X = np.arange(100).reshape(25, 4)
X

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31],
       [32, 33, 34, 35],
       [36, 37, 38, 39],
       [40, 41, 42, 43],
       [44, 45, 46, 47],
       [48, 49, 50, 51],
       [52, 53, 54, 55],
       [56, 57, 58, 59],
       [60, 61, 62, 63],
       [64, 65, 66, 67],
       [68, 69, 70, 71],
       [72, 73, 74, 75],
       [76, 77, 78, 79],
       [80, 81, 82, 83],
       [84, 85, 86, 87],
       [88, 89, 90, 91],
       [92, 93, 94, 95],
       [96, 97, 98, 99]])

In [6]:
ft.fit(X)
ft.transform(X)

array([[  2,   3,   4,   5],
       [  6,   7,   8,   9],
       [ 10,  11,  12,  13],
       [ 14,  15,  16,  17],
       [ 18,  19,  20,  21],
       [ 22,  23,  24,  25],
       [ 26,  27,  28,  29],
       [ 30,  31,  32,  33],
       [ 34,  35,  36,  37],
       [ 38,  39,  40,  41],
       [ 42,  43,  44,  45],
       [ 46,  47,  48,  49],
       [ 50,  51,  52,  53],
       [ 54,  55,  56,  57],
       [ 58,  59,  60,  61],
       [ 62,  63,  64,  65],
       [ 66,  67,  68,  69],
       [ 70,  71,  72,  73],
       [ 74,  75,  76,  77],
       [ 78,  79,  80,  81],
       [ 82,  83,  84,  85],
       [ 86,  87,  88,  89],
       [ 90,  91,  92,  93],
       [ 94,  95,  96,  97],
       [ 98,  99, 100, 101]])

In [7]:
ft.inverse_transform(ft.transform(X))

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31],
       [32, 33, 34, 35],
       [36, 37, 38, 39],
       [40, 41, 42, 43],
       [44, 45, 46, 47],
       [48, 49, 50, 51],
       [52, 53, 54, 55],
       [56, 57, 58, 59],
       [60, 61, 62, 63],
       [64, 65, 66, 67],
       [68, 69, 70, 71],
       [72, 73, 74, 75],
       [76, 77, 78, 79],
       [80, 81, 82, 83],
       [84, 85, 86, 87],
       [88, 89, 90, 91],
       [92, 93, 94, 95],
       [96, 97, 98, 99]])

# Basic Transformer

In [9]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_array

class MyTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, first_parameter=1, second_parameter=2):
        # all parameters must be specified in the __init__ function
        # and init is not allowed to do anythin
        self.first_parameter = first_parameter
        self.second_parameter = second_parameter
        
    def fit(self, X, y=None):
        X = check_array(X)
        # fit should only take X and y as parameters
        # even if your model is unsupervised, you need to accept a y argument!
        
        # Model fitting code goes here
        print("fitting the model right here")
        # fit returns self
        return self
    
    def transform(self, X):
        X = check_array(X)

        # transform takes as parameter only X
        
        # apply some transformation to X:
        X_transformed = X + 1
        return X_transformed


In [10]:
from sklearn.utils.estimator_checks import check_estimator
check_estimator(MyTransformer)

fitting the model right here
fitting the model right here
fitting the model right here
fitting the model right here
fitting the model right here
fitting the model right here
fitting the model right here
fitting the model right here
fitting the model right here
fitting the model right here
fitting the model right here
fitting the model right here
fitting the model right here
fitting the model right here
fitting the model right here
fitting the model right here
fitting the model right here
fitting the model right here
fitting the model right here
fitting the model right here
fitting the model right here
fitting the model right here
fitting the model right here
fitting the model right here
fitting the model right here
fitting the model right here
fitting the model right here


AssertionError: ValueError not raised : The transformer MyTransformer does not raise an error when the number of features in transform is different from the number of features in fit.

In [11]:
from sklearn.base import TransformerMixin
from sklearn.utils.validation import check_X_y

class MyTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, my_parameter="stuff"):
        self.my_parameter = my_parameter
        
    def fit(self, X, y):
        X, y = check_X_y(X, y)
        self.n_features_ = X.shape[1]
        return self
    
    def transform(self, X):
        X = check_array(X)
        if X.shape[1] != self.n_features_:
            raise ValueError("Wrong number of features {} != {}".format(
                X.shape[1], self.n_features_))
        return X - 2

In [12]:
check_estimator(MyTransformer)

# Exercise
- Reimplement a simple version of the standard scaler (that removes mean and scales to unit variance) with scikit-learn interface. Can you make it pass the tests? Does it give the same result as sklearn.preprocessing.StandardScaler?
- Reimplement a one nearest neighbor classifier with scikit-learn interface (that memorizes the training set and assignes a new test point to the class of the closest training point). Again, try making it pass the tests.

hint: use sklearn.utils.validation.check_is_fitted and sklearn.utils.validation.unique_labels (though you don't have to).

In [None]:
# %load solutions/custom_estimators.py