In [1]:

import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import mean_squared_error

from sklearn.preprocessing import StandardScaler, PowerTransformer
from sklearn.compose import TransformedTargetRegressor
from sklearn.pipeline import FeatureUnion, Pipeline, make_pipeline
from sklearn.base import BaseEstimator, TransformerMixin

from sklearn.linear_model import LinearRegression

In [2]:
np.random.seed(0)
X1 = np.random.rand(10, 1)
X2 = np.random.rand(10, 1)


y = (X1 + 2 * np.sqrt(X2))**2
### y = X1 + 2 * sqrt(X2)
X_combined = np.hstack((X1, np.sqrt(X2)))
X_combined

array([[0.5488135 , 0.88978932],
       [0.71518937, 0.72725162],
       [0.60276338, 0.75368731],
       [0.54488318, 0.96207933],
       [0.4236548 , 0.26652591],
       [0.64589411, 0.29517673],
       [0.43758721, 0.14219141],
       [0.891773  , 0.91248005],
       [0.96366276, 0.88213194],
       [0.38344152, 0.93274442]])

In [3]:

data = {'X1': X1.flatten(), 'X2': X2.flatten(), 'y': y.flatten()}
df = pd.DataFrame(data)

In [4]:
print(df.head())

# Access individual columns
X1_column = df['X1']
X2_column = df['X2']
y_column = df['y']

         X1        X2         y
0  0.548814  0.791725  5.421410
1  0.715189  0.528895  4.707566
2  0.602763  0.568045  4.452682
3  0.544883  0.925597  6.096168
4  0.423655  0.071036  0.915288


In [5]:
lin_reg = LinearRegression()
lin_reg.fit(X_combined, y)

In [6]:
train = df.iloc[:6]
test = df.iloc[6:]

train_X = train.drop('y', axis=1)
train_y = train.y

test_X = test.drop('y', axis=1)
test_y = test.y

In [7]:
print("create pipeline 1")
pipe1 = Pipeline(steps=[
                       ('linear_model', LinearRegression())
])
print("fit pipeline 1")
pipe1.fit(train_X, train_y)
print("predict via pipeline 1")
preds1 = pipe1.predict(test_X)
print(f"\n{preds1}")
print(f"RMSE: {np.sqrt(mean_squared_error(test_y, preds1))}\n")

create pipeline 1
fit pipeline 1
predict via pipeline 1

[0.57797412 6.95816197 6.90991689 5.28936093]
RMSE: 0.3598775455477421



In [8]:
train_X.X2 =  np.sqrt(np.sqrt(train_X.X2))

test_X.X2 =np.sqrt(np.sqrt(test_X.X2))

print(test_X)

m2 = LinearRegression()

fit2 = m2.fit(train_X, train_y)

preds = fit2.predict(test_X)

print(f"\n{preds}")

print(f"RMSE: {np.sqrt(mean_squared_error(test_y, preds))}\n")

         X1        X2
6  0.437587  0.377083
7  0.891773  0.955238
8  0.963663  0.939219
9  0.383442  0.965787

[-0.48246294  5.90865618  5.80947481  5.54883701]
RMSE: 1.2327977166455044



In [9]:
# Creamos una clase transformadora

#__init__: este es el constructor. Se llama cuando se inicializa el pipeline.
#fit(): Se llama cuando hacemos fit en el pipeline.
#transform(): se llama cuando usamos fit o transform en el pipeline

class ExperimentalTransformer(BaseEstimator, TransformerMixin):
  def __init__(self):
    print('\n>>>>>>>init() called.\n')

  def fit(self, X, y = None):
    print('\n>>>>>>>fit() called.\n')
    return self

  def transform(self, X, y = None):
    print('\n>>>>>>>transform() called.\n')
    X_ = X.copy() # creamos una copia para evitar cambios en el conjunto de datos original
    X_.X2 = np.sqrt(np.sqrt(test_X.X2))
    return X_

In [10]:
print("create pipeline 1")
pipe1 = Pipeline(steps=[
                       ('linear_model', LinearRegression())
])

print("fit pipeline 1")
pipe1.fit(train_X, train_y)

print("predict via pipeline 1")
preds1 = pipe1.predict(test_X)

print(f"\n{preds1}")
print(f"RMSE: {np.sqrt(mean_squared_error(test_y, preds1))}\n")

create pipeline 1
fit pipeline 1
predict via pipeline 1

[-0.48246294  5.90865618  5.80947481  5.54883701]
RMSE: 1.2327977166455044

