#Imports

In [1]:
# Based on https://towardsdatascience.com/pipelines-custom-transformers-in-scikit-learn-the-step-by-step-guide-with-python-code-4a7d9b068156

import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import mean_squared_error

from sklearn.preprocessing import StandardScaler, PowerTransformer
from sklearn.compose import TransformedTargetRegressor
from sklearn.pipeline import FeatureUnion, Pipeline, make_pipeline
from sklearn.base import BaseEstimator, TransformerMixin

from sklearn.linear_model import LinearRegression

#Pipeline Experiment

In [2]:
df_actividad = pd.DataFrame(columns=['X1', 'X2', 'y'], data=[
                                                   [1,2,9],
                                                   [2,6,16],
                                                   [1,2,9],
                                                   [5,5,8],
                                                   [3,7,15],
                                                   [13,6,16],
                                                   [1,2,14],
                                                   [34,9,17]
])
### y = X1 + 3 **(X2)
df_actividad['y'] =  df_actividad['X1'] + (3  ** (df_actividad['X2']))
df_actividad

Unnamed: 0,X1,X2,y
0,1,2,10
1,2,6,731
2,1,2,10
3,5,5,248
4,3,7,2190
5,13,6,742
6,1,2,10
7,34,9,19717


In [3]:
# train test actividad
train_actividad = df_actividad.iloc[:6]
test_actividad = df_actividad.iloc[6:]

train_X_actividad = train_actividad.drop('y', axis=1)
train_y_actividad = train_actividad.y

test_X_actividad = test_actividad.drop('y', axis=1)
test_y_actividad = test_actividad.y

In [4]:
test_X_actividad

Unnamed: 0,X1,X2
6,1,2
7,34,9


In [5]:
#  verificacmos si la regresión lineal puede predecir correctamente

m1 = LinearRegression()
fit1 = m1.fit(train_X_actividad, train_y_actividad)
preds = fit1.predict(test_X_actividad)
print(f"\n{preds}")
print(f"RMSE: {np.sqrt(mean_squared_error(test_y_actividad, preds))}\n")


[-128.76248904  552.45836985]
RMSE: 13551.73256307251



In [6]:
train_X_actividad.X2 = 3 ** ((train_X_actividad.X2))

test_X_actividad.X2 = 3 ** ((test_X_actividad.X2))

print(test_X_actividad)

m2 = LinearRegression()

fit2 = m2.fit(train_X_actividad, train_y_actividad)

preds = fit2.predict(test_X_actividad)

print(f"\n{preds}")

print(f"RMSE: {np.sqrt(mean_squared_error(test_y_actividad, preds))}\n")

   X1     X2
6   1      9
7  34  19683

[1.0000e+01 1.9717e+04]
RMSE: 1.0048591735576161e-14



In [7]:
#transformadores personalizados mediante canalización.
train_actividad = df_actividad.iloc[:6]
test_actividad = df_actividad.iloc[6:]

train_X_actividad = train_actividad.drop('y', axis=1)
train_y_actividad = train_actividad.y

test_X_actividad = test_actividad.drop('y', axis=1)
test_y_actividad = test_actividad.y

In [8]:
class ExperimentalTransformerAcitividad(BaseEstimator, TransformerMixin):
  def __init__(self):
    print('\n>>>>>>>init() called.\n')

  def fit(self, X, y = None):
    print('\n>>>>>>>fit() called.\n')
    return self

  def transform(self, X, y = None):
    print('\n>>>>>>>transform() called.\n')
    X_ = X.copy() # creamos una copia para evitar cambios en el conjunto de datos original
    X_.X2 = 3 ** (X_.X2)
    return X_

In [9]:
# sin transformación de entrada - para validar que obtenemos los mismos resultados que antes


print("create pipeline 1")
pipe1 = Pipeline(steps=[
                       ('linear_model', LinearRegression())
])

print("fit pipeline 1")
pipe1.fit(train_X_actividad, train_y_actividad)

print("predict via pipeline 1")
preds1 = pipe1.predict(test_X_actividad)

print(f"\n{preds1}")
print(f"RMSE: {np.sqrt(mean_squared_error(test_y_actividad, preds1))}\n")

create pipeline 1
fit pipeline 1
predict via pipeline 1

[-128.76248904  552.45836985]
RMSE: 13551.73256307251



In [10]:
# con transformation de entradas

print("create pipeline 2")
pipe2 = Pipeline(steps=[
                       ('experimental_trans', ExperimentalTransformerAcitividad()),    # esto dispara una llamda a __init__
                       ('linear_model', LinearRegression())
])

print("fit pipeline 2")
pipe2.fit(train_X_actividad, train_y_actividad)

print("predict via pipeline 2")
preds2 = pipe2.predict(test_X_actividad)

print(f"\n{preds2}")  # should be [1.0000e+0 1.9717e+04]
print(f"RMSE: {np.sqrt(mean_squared_error(test_y_actividad, preds2))}\n")

create pipeline 2

>>>>>>>init() called.

fit pipeline 2

>>>>>>>fit() called.


>>>>>>>transform() called.

predict via pipeline 2

>>>>>>>transform() called.


[1.0000e+01 1.9717e+04]
RMSE: 1.0048591735576161e-14

