In [None]:
# 6. **Modelos de Ensamble**
   - **Utilidad:** Este grupo de métodos se utiliza para combinar múltiples modelos base para mejorar el rendimiento y la robustez del modelo final.
   - **Funciones comunes:**
     - **BaggingClassifier/Regressor:** Ensamble de modelos usando bagging.
     - **AdaBoostClassifier/Regressor:** Ensamble de modelos usando boosting adaptativo.
     - **VotingClassifier/Regressor:** Ensamble de múltiples modelos mediante votación o promedio.

In [None]:
## Cargar y preparar los datos de regresión
Cargamos el dataset California housing y dividimos los datos en conjuntos de entrenamiento y prueba para regresión.

In [16]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

# Load the dataset
housing = fetch_california_housing()
X_reg = housing.data
y_reg = housing.target

# Split the dataset into training and testing sets
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)


In [None]:
## Cargar y preparar los datos de clasificación
Cargamos el dataset Wine y dividimos los datos en conjuntos de entrenamiento y prueba para clasificación.

In [17]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split

# Load the dataset
data = load_wine()
X_clf = data.data
y_clf = data.target

# Split the dataset into training and testing sets
X_train_clf, X_test_clf, y_train_clf, y_test_clf = train_test_split(X_clf, y_clf, test_size=0.2, random_state=42)


In [None]:
## BaggingRegressor
Entrenamos y evaluamos un regresor Bagging con un árbol de decisión como base.

In [18]:
from sklearn.ensemble import BaggingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error

# Initialize the BaggingRegressor with DecisionTreeRegressor as base estimator
model = BaggingRegressor(estimator=DecisionTreeRegressor(), n_estimators=50, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Predict using the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Bagging Regressor MSE: {mse:.2f}')



Bagging Regressor MSE: 0.26


In [None]:
## BaggingClassifier
Entrenamos y evaluamos un clasificador Bagging con un árbol de decisión como base.

In [19]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Initialize the BaggingClassifier with DecisionTreeClassifier as base estimator
model_clf = BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=50, random_state=42)

# Train the model
model_clf.fit(X_train_clf, y_train_clf)

# Predict using the test set
y_pred_clf = model_clf.predict(X_test_clf)

# Evaluate the model
accuracy = accuracy_score(y_test_clf, y_pred_clf)
print(f'Bagging Classifier Accuracy: {accuracy:.2f}')

Bagging Classifier Accuracy: 0.97


In [None]:
## AdaBoostRegressor
Entrenamos y evaluamos un regresor AdaBoost con un árbol de decisión como base.

In [20]:
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor

# Initialize the AdaBoostRegressor with DecisionTreeRegressor as base estimator
model = AdaBoostRegressor(estimator=DecisionTreeRegressor(max_depth=4), n_estimators=50, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Predict using the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'AdaBoost Regressor MSE: {mse:.2f}')


AdaBoost Regressor MSE: 0.80


In [None]:
## AdaBoostClassifier
Entrenamos y evaluamos un clasificador AdaBoost con un árbol de decisión como base.

In [24]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

# Initialize the AdaBoostClassifier with DecisionTreeClassifier as base estimator and SAMME algorithm
model_clf = AdaBoostClassifier(estimator=DecisionTreeClassifier(max_depth=1), n_estimators=50, algorithm='SAMME', random_state=42)

# Train the model
model_clf.fit(X_train_clf, y_train_clf)

# Predict using the test set
y_pred_clf = model_clf.predict(X_test_clf)

# Evaluate the model
accuracy = accuracy_score(y_test_clf, y_pred_clf)
print(f'AdaBoost Classifier Accuracy: {accuracy:.2f}')


AdaBoost Classifier Accuracy: 0.94


In [None]:
## VotingRegressor
Entrenamos y evaluamos un regresor de votación con tres regresores base.

In [22]:
from sklearn.ensemble import VotingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error

# Initialize the base regressors
reg1 = LinearRegression()
reg2 = DecisionTreeRegressor()
reg3 = KNeighborsRegressor()

# Initialize the VotingRegressor
model = VotingRegressor(estimators=[
    ('lr', reg1), 
    ('dt', reg2), 
    ('knn', reg3)])

# Train the model
model.fit(X_train, y_train)

# Predict using the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Voting Regressor MSE: {mse:.2f}')


Voting Regressor MSE: 0.46


In [None]:
## VotingClassifier
Entrenamos y evaluamos un clasificador de votación con tres clasificadores base.
- Utilizamos StandardScaler para escalar los datos en los clasificadores LogisticRegression y SVC.
- Aumentamos max_iter en LogisticRegression para asegurar la convergencia.

In [26]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# Initialize the base classifiers with a pipeline for scaling and increased max_iter for LogisticRegression
clf1 = make_pipeline(StandardScaler(), LogisticRegression(max_iter=500))
clf2 = KNeighborsClassifier(n_neighbors=3)
clf3 = make_pipeline(StandardScaler(), SVC(kernel='linear', probability=True))

# Initialize the VotingClassifier
model_clf = VotingClassifier(estimators=[
    ('lr', clf1), 
    ('knn', clf2), 
    ('svc', clf3)], voting='soft')

# Train the model
model_clf.fit(X_train_clf, y_train_clf)

# Predict using the test set
y_pred_clf = model_clf.predict(X_test_clf)

# Evaluate the model
accuracy = accuracy_score(y_test_clf, y_pred_clf)
print(f'Voting Classifier Accuracy: {accuracy:.2f}')


Voting Classifier Accuracy: 1.00
