In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Cargar dataset
data = pd.read_csv("df_delivery_limpio.csv")

# Preprocesamiento: Convertir variables categóricas a numéricas
categorical_cols = ["store_primary_category", "order_day", "order_period", "grouped_category", "order_size"]
for col in categorical_cols:
    encoder = LabelEncoder()
    data[col] = encoder.fit_transform(data[col])

# Verificar valores infinitos y reemplazarlos
data.replace([np.inf, -np.inf], np.nan, inplace=True)

# Verificar valores nulos y reemplazarlos con la media
data.fillna(data.mean(), inplace=True)

# Definir variables predictoras (X) y variable objetivo (y)
X = data.drop(columns=["subtotal"])  # Eliminar la columna objetivo
y = data["subtotal"]  # Variable a predecir

# Dividir datos en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Entrenar modelo Gradient Boosting
model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
model.fit(X_train, y_train)

# Evaluación del modelo
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

print(f"MAE: {mae}")
print(f"RMSE: {rmse}")

MAE: 77.79300456592213
RMSE: 112.97404893840562


In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Cargar dataset
data = pd.read_csv("df_delivery_limpio.csv")

# Preprocesamiento: Convertir variables categóricas a numéricas
categorical_cols = ["store_primary_category", "order_day", "order_period", "grouped_category", "order_size"]
for col in categorical_cols:
    encoder = LabelEncoder()
    data[col] = encoder.fit_transform(data[col])

# Verificar valores infinitos y reemplazarlos
data.replace([np.inf, -np.inf], np.nan, inplace=True)

# Verificar valores nulos y reemplazarlos con la media
data.fillna(data.mean(), inplace=True)

# Definir variables predictoras (X) y variable objetivo (y)
X = data.drop(columns=["subtotal"])  # Eliminar la columna objetivo
y = data["subtotal"]  # Variable a predecir

# Dividir datos en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Definir el modelo base
model = GradientBoostingRegressor(random_state=42)

# Definir el Grid de hiperparámetros
param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7]
}

# Búsqueda de los mejores hiperparámetros
grid_search = GridSearchCV(model, param_grid, cv=3, scoring='neg_mean_absolute_error', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Mejor modelo encontrado
best_model = grid_search.best_estimator_

# Evaluación del modelo
y_pred = best_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f"Mejores Hiperparámetros: {grid_search.best_params_}")
print(f"MAE: {mae}")
print(f"RMSE: {rmse}")
print(f"R2 Score: {r2}")


Mejores Hiperparámetros: {'learning_rate': 0.1, 'max_depth': 7, 'n_estimators': 300}
MAE: 4.441132627950357
RMSE: 15.40755099557856
R2 Score: 0.9998617168570529


In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Cargar dataset
data = pd.read_csv("df_delivery_limpio.csv")

# Preprocesamiento: Convertir variables categóricas a numéricas
categorical_cols = ["store_primary_category", "order_day", "order_period", "grouped_category", "order_size"]
for col in categorical_cols:
    encoder = LabelEncoder()
    data[col] = encoder.fit_transform(data[col])

# Verificar valores infinitos y reemplazarlos
data.replace([np.inf, -np.inf], np.nan, inplace=True)

# Verificar valores nulos y reemplazarlos con la media
data.fillna(data.mean(), inplace=True)

# Definir variables predictoras (X) y variable objetivo (y)
X = data.drop(columns=["delivery_duration"])  # Eliminar la columna objetivo
y = data["delivery_duration"]  # Variable a predecir

# Dividir datos en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Definir el modelo base
model = GradientBoostingRegressor(random_state=42)

# Definir el Grid de hiperparámetros
param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7]
}

# Búsqueda de los mejores hiperparámetros
grid_search = GridSearchCV(model, param_grid, cv=3, scoring='neg_mean_absolute_error', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Mejor modelo encontrado
best_model = grid_search.best_estimator_

# Evaluación del modelo
y_pred = best_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f"Mejores Hiperparámetros: {grid_search.best_params_}")
print(f"MAE: {mae}")
print(f"RMSE: {rmse}")
print(f"R2 Score: {r2}")

Mejores Hiperparámetros: {'learning_rate': 0.1, 'max_depth': 7, 'n_estimators': 200}
MAE: 629.8207055162122
RMSE: 815.0049780899079
R2 Score: 0.28856070898579256
