In [5]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
import joblib
from google.colab import files

url = "https://raw.githubusercontent.com/gbergamo74/airbnb-italy-analysis/main/Airbnb%20in%20Italy.csv"
df = pd.read_csv(url)

# Limpieza de 'price' (quitar '$' y ',' y convertir a float)
df['price'] = (
    df['price']
      .astype(str)
      .str.replace(r'[\$,]', '', regex=True)
      .astype(float)
)
# Descarta las filas sin precio válido
df = df.dropna(subset=['price'])

# Preparar X e y
leak_cols = ['price_per_person']
target   = 'price'
X = df.drop(columns=leak_cols + [target], errors='ignore') \
       .select_dtypes(include=[np.number])
y = df[target]

# Definir y entrenar el pipeline
pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler',  StandardScaler()),
    ('model',   Ridge(alpha=1.0, random_state=42))
])
pipeline.fit(X, y)

joblib.dump(pipeline, 'ridge_pipeline.pkl')

from google.colab import files
files.download('ridge_pipeline.pkl')

print("ridge_pipeline.pkl generado correctamente")




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

ridge_pipeline.pkl generado correctamente
