In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Dataset
data = {
    "Concepto": ["Renta", "Super", "Gasolina", "Super", "Gasolina", "Luz", "Agua", "Renta", "Agua"],
    "Monto": [2750, 900, 1200, 1100, 800, 600, 400, 2750, 150],
}
df = pd.DataFrame(data)

# Variables
X = df[["Concepto"]]
y = df["Monto"]

# Preprocesamiento + modelo
pre = ColumnTransformer([
    ("cat", OneHotEncoder(drop="first", handle_unknown="ignore"), ["Concepto"])
])
pipe = Pipeline([
    ("prep", pre),
    ("model", LinearRegression())
])

# Entrenar
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
pipe.fit(X_train, y_train)
print("✅ Modelo entrenado")


✅ Modelo entrenado


In [8]:
from joblib import dump

dump(pipe, "modelo_gastos_pipeline.joblib")
print("✅ Modelo guardado como 'modelo_gastos_pipeline.joblib'")


✅ Modelo guardado como 'modelo_gastos_pipeline.joblib'


In [9]:
from joblib import load

pipe_loaded = load("modelo_gastos_pipeline.joblib")
print("✅ Modelo cargado desde disco")


✅ Modelo cargado desde disco


In [10]:
nuevo = pd.DataFrame({"Concepto": ["Super", "Agua", "Renta"]})
pred = pipe_loaded.predict(nuevo)

for c, p in zip(nuevo["Concepto"], pred):
    print(f"Predicción para '{c}': {p:.2f}")


Predicción para 'Super': 1100.00
Predicción para 'Agua': 275.00
Predicción para 'Renta': 2750.00
