In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Cargar datos
df = pd.read_csv("data/raw/train.csv")
df = df.drop(columns=["Id"])

# Preprocesamiento
num_cols = df.select_dtypes(include=["int64","float64"]).columns.drop("SalePrice")
cat_cols = df.select_dtypes(include=["object"]).columns

for col in num_cols:
    df[col] = df[col].fillna(df[col].median())

for col in cat_cols:
    df[col] = df[col].fillna("Missing")

#Asignación de variables y objetivo
X = df.drop("SalePrice", axis=1)
Y = df["SalePrice"]

#División de los datos
X_entr, X_val, Y_entr, Y_val = train_test_split(
    X, Y, test_size = 0.2, random_state = 42
)

# Pipelines
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), num_cols),
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols)
    ]
)

# Modelos
reglin = Pipeline(steps=[("preprocesador", preprocessor),
                         ("modelo", LinearRegression())])

arbol = Pipeline(steps=[("preprocesador", preprocessor),
                       ("modelo", DecisionTreeRegressor(random_state=42))])

#Entrenamiento
modelos = {"Regresión lineal": reglin,
           "Árbol de decisión": arbol}

resultados = []

for nombre, modelo in modelos.items():
    modelo.fit(X_entr, Y_entr)
    Y_pred = modelo.predict(X_val)

    mae = mean_absolute_error(Y_val, Y_pred)
    rmse = np.sqrt(mean_squared_error(Y_val, Y_pred))
    r2 = r2_score(Y_val, Y_pred)

    resultados.append({"Modelo": nombre, "MAE": mae, "RMSE": rmse, "R2": r2})

resultados_df = pd.DataFrame(resultados)
print(resultados_df)

              Modelo           MAE          RMSE        R2
0   Regresión lineal  21126.276571  65389.059987  0.442562
1  Árbol de decisión  28720.746575  43808.958004  0.749786
