Se entrenan en paralelo

In [17]:

import seaborn as sns
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.tree import ExtraTreeClassifier, ExtraTreeRegressor, DecisionTreeRegressor
from sklearn.ensemble import BaggingRegressor, RandomForestRegressor, ExtraTreesRegressor, BaggingClassifier, RandomForestClassifier, ExtraTreesClassifier, VotingRegressor, VotingClassifier, AdaBoostClassifier, AdaBoostRegressor, GradientBoostingClassifier, GradientBoostingRegressor, StackingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, root_mean_squared_error, mean_absolute_percentage_error

In [4]:
df = sns.load_dataset('mpg').dropna()
features = ['weight', 'cylinders', 'displacement', 'horsepower', 'acceleration', 'model_year']
X = df[features]
y = df['mpg']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
models = {
    'BaggingRegressor': BaggingRegressor(random_state=42),
    'RandomForestRegressor': RandomForestRegressor(random_state=42),
    'ExtraTreesRegressor': ExtraTreesRegressor(random_state=42)
}

In [8]:
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"{name}: R2 = {r2_score(y_test, y_pred):.3f}, MAE = {mean_absolute_error(y_test, y_pred):.3f}, RMSE = {root_mean_squared_error(y_test, y_pred):.3f}, MAPE = {mean_absolute_percentage_error(y_test, y_pred):.3f}")

BaggingRegressor: R2 = 0.842, MAE = 2.049, RMSE = 2.838, MAPE = 0.091
RandomForestRegressor: R2 = 0.885, MAE = 1.761, RMSE = 2.423, MAPE = 0.080
ExtraTreesRegressor: R2 = 0.896, MAE = 1.693, RMSE = 2.305, MAPE = 0.076


## Voting


In [19]:
model = VotingRegressor([
    ('LinearRegression', LinearRegression()),
    ('KNeighborsRegressor',KNeighborsRegressor()),
    ('SVR', SVR()),
    ('DesicionTreeRegresor', DecisionTreeRegressor())
])
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(f"VotingRegresor: R2 = {r2_score(y_test, y_pred):.3f}, MAE = {mean_absolute_error(y_test, y_pred):.3f}, RMSE = {root_mean_squared_error(y_test, y_pred):.3f}, MAPE = {mean_absolute_percentage_error(y_test, y_pred):.3f}")

VotingRegresor: R2 = 0.827, MAE = 2.186, RMSE = 2.973, MAPE = 0.097


## boosting

corren secuencialmente corrigiendo errores del anterior

In [20]:
models = {
    'AdaBoostRegressor' : AdaBoostRegressor(random_state=42),
    'GradientBoostingRegressor': GradientBoostingRegressor() 
}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"{name}: R2 = {r2_score(y_test, y_pred):.3f}, MAE = {mean_absolute_error(y_test, y_pred):.3f}, RMSE = {root_mean_squared_error(y_test, y_pred):.3f}, MAPE = {mean_absolute_percentage_error(y_test, y_pred):.3f}")

AdaBoostRegressor: R2 = 0.818, MAE = 2.240, RMSE = 3.048, MAPE = 0.103
GradientBoostingRegressor: R2 = 0.876, MAE = 1.807, RMSE = 2.520, MAPE = 0.081


## Stacking


In [21]:
model = StackingRegressor([ #lista de modelos base
        ('LinearRegression', LinearRegression()),
        ('KNeighborsRegressor', KNeighborsRegressor()),
        ('SVR', SVR()),
        ('DesicionTreeRegresor', DecisionTreeRegressor())
    ], 
    final_estimator=RandomForestRegressor(random_state=42) # meta modelo
)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(f"StackingRegresor: R2 = {r2_score(y_test, y_pred):.3f}, MAE = {mean_absolute_error(y_test, y_pred):.3f}, RMSE = {root_mean_squared_error(y_test, y_pred):.3f}, MAPE = {mean_absolute_percentage_error(y_test, y_pred):.3f}")

StackingRegresor: R2 = 0.850, MAE = 2.067, RMSE = 2.768, MAPE = 0.091
