In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split

data = {
    "Agua": [300, 280, 310, 290, 305],
    "Gasolina": [1500, 1400, 1550, 1600, 1480],
    "Luz": [500, 520, 480, 510, 495],
    "Renta": [5000, 5000, 5000, 5000, 5000],
    "Super": [2000, 2100, 1900, 2050, 1980],
    "Total": [9300, 9300, 9300, 9450, 9260]
}
df = pd.DataFrame(data)
X = df.drop("Total", axis=1)
y = df["Total"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)


In [5]:
# === Bloque 2: Línea base (Regresión Lineal sin regularización) ===
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Asume que X_train, X_test, y_train, y_test ya están definidos en el Bloque 1
lin = LinearRegression()
lin.fit(X_train, y_train)

y_pred_lin = lin.predict(X_test)

# MSE y RMSE compatibles con cualquier versión de sklearn
lin_mse = mean_squared_error(y_test, y_pred_lin)   # devuelve MSE
lin_rmse = np.sqrt(lin_mse)                        # RMSE = sqrt(MSE)

print("Lineal MSE :", lin_mse)
print("Lineal RMSE:", lin_rmse)

# (Opcional) Ver coeficientes e intercepto
print("Coeficientes:", lin.coef_)
print("Intercepto  :", lin.intercept_)


Lineal MSE : 1724.6066551635452
Lineal RMSE: 41.5283837292465
Coeficientes: [-0.11544755  1.1953821   0.11272044  0.          0.56360221]
Intercepto  : 6357.996485061511


In [6]:
# === Bloque 3: Ridge y Lasso con estandarización ===
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, Lasso
from sklearn.metrics import mean_squared_error

# Ridge
ridge_pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("model", Ridge(alpha=1.0))
])
ridge_pipe.fit(X_train, y_train)
ridge_pred = ridge_pipe.predict(X_test)
ridge_mse = mean_squared_error(y_test, ridge_pred)
ridge_rmse = np.sqrt(ridge_mse)

# Lasso
lasso_pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("model", Lasso(alpha=0.1, max_iter=10000))
])
lasso_pipe.fit(X_train, y_train)
lasso_pred = lasso_pipe.predict(X_test)
lasso_mse = mean_squared_error(y_test, lasso_pred)
lasso_rmse = np.sqrt(lasso_mse)

print("Ridge  RMSE:", ridge_rmse)
print("Lasso  RMSE:", lasso_rmse)

# (Opcional) tabla simple de coeficientes (nota: están en espacio estandarizado)
import pandas as pd
features = X.columns.tolist()
coef_ridge = ridge_pipe.named_steps["model"].coef_
coef_lasso = lasso_pipe.named_steps["model"].coef_

coef_table = pd.DataFrame({
    "Feature": features,
    "Ridge (std.)": coef_ridge,
    "Lasso (std.)": coef_lasso
})
print(coef_table)


Ridge  RMSE: 14.28449046585105
Lasso  RMSE: 3.426197761950265
    Feature  Ridge (std.)  Lasso (std.)
0      Agua    -16.913715    -40.759207
1  Gasolina     34.187296     40.757640
2       Luz     10.994096      0.000000
3     Renta      0.000000      0.000000
4     Super     10.994096      0.000000


In [7]:
# === Bloque 4: Búsqueda de alpha (CV) ===
from sklearn.model_selection import GridSearchCV, KFold

cv = KFold(n_splits=3, shuffle=True, random_state=42)

# Ridge con CV
ridge_cv = GridSearchCV(
    estimator=Pipeline([("scaler", StandardScaler()), ("model", Ridge())]),
    param_grid={"model__alpha": [0.001, 0.01, 0.1, 1, 10]},
    scoring="neg_mean_squared_error",  # usamos MSE y luego sacamos la raíz
    cv=cv
)
ridge_cv.fit(X, y)
best_ridge_alpha = ridge_cv.best_params_["model__alpha"]
best_ridge_rmse = np.sqrt(-ridge_cv.best_score_)
print("Mejor Ridge alpha:", best_ridge_alpha, " | RMSE CV:", best_ridge_rmse)

# Lasso con CV
lasso_cv = GridSearchCV(
    estimator=Pipeline([("scaler", StandardScaler()), ("model", Lasso(max_iter=10000))]),
    param_grid={"model__alpha": [0.001, 0.01, 0.1, 1, 10]},
    scoring="neg_mean_squared_error",
    cv=cv
)
lasso_cv.fit(X, y)
best_lasso_alpha = lasso_cv.best_params_["model__alpha"]
best_lasso_rmse = np.sqrt(-lasso_cv.best_score_)
print("Mejor Lasso alpha:", best_lasso_alpha, " | RMSE CV:", best_lasso_rmse)


Mejor Ridge alpha: 0.001  | RMSE CV: 20.592680807621967
Mejor Lasso alpha: 0.01  | RMSE CV: 12.461700668728794


In [9]:
import joblib

# === Guardar el mejor modelo ===
# Supongamos que elegiste el mejor entre ridge_cv y lasso_cv
# (puedes cambiar aquí si tu preferido fue Ridge)
best_model = lasso_cv.best_estimator_

# Guardamos a un archivo .pkl
joblib.dump(best_model, "modelo_mejor_dia13.pkl")
print("Modelo guardado en 'modelo_mejor_dia13.pkl'")

# === Cargar el modelo cuando quieras usarlo ===
modelo_cargado = joblib.load("modelo_mejor_dia13.pkl")

# === Usar el modelo cargado para predecir ===
nuevo = pd.DataFrame({
    "Agua": [310],
    "Gasolina": [1520],
    "Luz": [505],
    "Renta": [5000],
    "Super": [2000]
})
prediccion = modelo_cargado.predict(nuevo)
print("Predicción para nuevo ejemplo:", prediccion[0])


Modelo guardado en 'modelo_mejor_dia13.pkl'
Predicción para nuevo ejemplo: 9277.264746384673
