In [None]:
import numpy as np
import pandas as pd
from scipy import sparse
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, FunctionTransformer
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LinearRegression

# Variables
y = house_prices_ds["Price"]
X = house_prices_ds.drop(columns=["Price"])

numeric_cols = X.select_dtypes(include=[np.number]).columns.tolist()

# Pipeline numérico -> imputa y convierte a CSR para mantener todo sparse
num_pipe = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="median")),
    ("to_sparse", FunctionTransformer(lambda A: sparse.csr_matrix(A), accept_sparse=True))
])

# Solo Location a OneHot (sparse)
cat_pipe = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("ohe", OneHotEncoder(drop="first", sparse_output=True, handle_unknown="ignore"))
])

# ColumnTransformer que devuelve matriz CSR (todas las salidas son sparse)
pre = ColumnTransformer(
    transformers=[
        ("num", num_pipe, numeric_cols),
        ("loc", cat_pipe, ["Location"]),
    ],
    remainder="drop",
    sparse_threshold=1.0,  # forzamos salida sparse si es posible
)

X_sparse = pre.fit_transform(X)   # <- matriz CSR
linreg = LinearRegression()
linreg.fit(X_sparse, y)