In [40]:
import numpy as np
import pandas as pd

# Generar datos ficticios
np.random.seed(42)
X = np.random.rand(100, 5)  # 100 muestras, 5 características
y = X[:, 0] * 5 + X[:, 1] * 3 + np.random.randn(100) * 0.5  # Variable objetivo

# **Bagging**

## **Random Forest**

In [41]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Crear el modelo Random Forest
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)

# Entrenar el modelo
rf_regressor.fit(X_train, y_train)

# Hacer predicciones
y_pred = rf_regressor.predict(X_test)

# Evaluar el modelo
mse = mean_squared_error(y_test, y_pred)
print(f'Error cuadrático medio: {mse}')

Error cuadrático medio: 0.36825592950989705


# **Boosting**

## **Gradient Boosting**

In [42]:
from sklearn.ensemble import GradientBoostingRegressor

# Modelo Gradient Boosting
gb_regressor = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gb_regressor.fit(X_train, y_train)

# Predicciones y evaluación
y_pred = gb_regressor.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Error cuadrático medio: {mse}')

Error cuadrático medio: 0.43164180033636235


In [43]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Crear el modelo Gradient Boosting con parámetros ajustados
gb_regressor = GradientBoostingRegressor(
    n_estimators=200,
    learning_rate=0.05,
    max_depth=4,
    min_samples_split=5,
    min_samples_leaf=3,
    subsample=0.8,
    max_features='sqrt',
    random_state=42
)

# Entrenar el modelo
gb_regressor.fit(X_train, y_train)

# Hacer predicciones
y_pred = gb_regressor.predict(X_test)

# Evaluar el modelo
mse = mean_squared_error(y_test, y_pred)
print(f'Error cuadrático medio: {mse}')

Error cuadrático medio: 0.39914679875600256


## **XGBoost**

In [44]:
import xgboost as xgb

# Modelo XGBoost
xgb_regressor = xgb.XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
xgb_regressor.fit(X_train, y_train)

# Predicciones y evaluación
y_pred = xgb_regressor.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Error cuadrático medio: {mse}')

Error cuadrático medio: 0.3790853970503959


## **Adaboost**

In [45]:
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Crear el modelo base
base_regressor = DecisionTreeRegressor(max_depth=4)

# Crear el modelo AdaBoost con parámetros ajustados
adaboost_regressor = AdaBoostRegressor(
    base_estimator=base_regressor,
    n_estimators=100,
    learning_rate=0.1,
    loss='linear',
    random_state=42
)

# Entrenar el modelo
adaboost_regressor.fit(X_train, y_train)

# Hacer predicciones
y_pred = adaboost_regressor.predict(X_test)

# Evaluar el modelo
mse = mean_squared_error(y_test, y_pred)
print(f'Error cuadrático medio: {mse}')

Error cuadrático medio: 0.4221516606860476




## **Catboost**

In [46]:
from catboost import CatBoostRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Crear el modelo CatBoost con parámetros ajustados
catboost_regressor = CatBoostRegressor(
    iterations=1000,
    learning_rate=0.05,
    depth=6,
    l2_leaf_reg=3.0,
    random_seed=42,
    task_type="CPU",
    verbose=0  # Para suprimir la salida durante el entrenamiento
)

# Entrenar el modelo
catboost_regressor.fit(X_train, y_train)

# Hacer predicciones
y_pred = catboost_regressor.predict(X_test)

# Evaluar el modelo
mse = mean_squared_error(y_test, y_pred)
print(f'Error cuadrático medio: {mse}')

Error cuadrático medio: 0.3136024211223604


## **LightGBM**

In [47]:
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Crear el modelo LightGBM con parámetros ajustados
lgb_regressor = lgb.LGBMRegressor(
    num_leaves=31,
    learning_rate=0.05,
    n_estimators=200,
    max_depth=-1,
    min_data_in_leaf=20,
    feature_fraction=0.9,
    bagging_fraction=0.8,
    bagging_freq=5,
    lambda_l1=0.1,
    lambda_l2=0.2,
    random_state=42
)

# Entrenar el modelo
lgb_regressor.fit(X_train, y_train)

# Hacer predicciones
y_pred = lgb_regressor.predict(X_test)

# Evaluar el modelo
mse = mean_squared_error(y_test, y_pred)
print(f'Error cuadrático medio: {mse}')

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000042 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 140
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 5
[LightGBM] [Info] Start training from score 4.082006
Error cuadrático medio: 0.34001497111291645


In [48]:
!pip install catboost



In [49]:
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
#from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
gb_regressor = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gb_regressor.fit(X_train, y_train)
print(gb_regressor.score(X_test, y_test))

# XGBoost Regressor
xgb_regressor = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
xgb_regressor.fit(X_train, y_train)
print(xgb_regressor.score(X_test, y_test))

# LightGBM Regressor
lgbm_regressor = LGBMRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
lgbm_regressor.fit(X_train, y_train)
print(lgbm_regressor.score(X_test, y_test))

# CatBoost Regressor
catboost_regressor = CatBoostRegressor(iterations=100, learning_rate=0.1, depth=3, random_state=42, silent=True)
catboost_regressor.fit(X_train, y_train)
print(catboost_regressor.score(X_test, y_test))

# AdaBoost Regressor
adaboost_regressor = AdaBoostRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
adaboost_regressor.fit(X_train, y_train)
print(adaboost_regressor.score(X_test, y_test))

0.7533808137444382
0.7834089931302248
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000034 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 140
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 5
[LightGBM] [Info] Start training from score 4.082006
0.8630779643290661
0.8420269335531114
0.8070324816266515


# **Staking**

In [50]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import StackingRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error

# Definir los modelos base
estimators = [
    ('lr', LinearRegression()),
    ('dt', DecisionTreeRegressor(max_depth=4)),
    ('xgb', XGBRegressor(n_estimators=50, learning_rate=0.1, max_depth=3, random_state=42))
]

# Definir el modelo de Stacking
stacking_regressor = StackingRegressor(
    estimators=estimators,
    final_estimator=LinearRegression()
)

# Entrenar el modelo de Stacking
stacking_regressor.fit(X_train, y_train)

# Hacer predicciones
y_pred = stacking_regressor.predict(X_test)

# Evaluar el modelo
mse = mean_squared_error(y_test, y_pred)
print(f'Error cuadrático medio: {mse}')

Error cuadrático medio: 0.2244897691072527


# **Voting**

In [51]:
from sklearn.ensemble import VotingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split

# Dividir los datos
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Modelos base
reg1 = LinearRegression()
reg2 = DecisionTreeRegressor(max_depth=4)

# Modelo Voting
voting_regressor = VotingRegressor(estimators=[('lr', reg1), ('dt', reg2)])
voting_regressor.fit(X_train, y_train)

# Predicciones y evaluación
y_pred = voting_regressor.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Error cuadrático medio: {mse}')

Error cuadrático medio: 0.3018821834078754
