In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib

# Load data
df = pd.read_csv('house_prices_hanoi_50000.csv')

# Train model
X = df[['square_meters', 'num_floors', 'num_bedrooms', 'num_bathrooms',
        'garage', 'main_road', 'near_school', 'near_market',
        'location', 'year_built']]
y = df['price']

# Preprocessor
preprocessor = ColumnTransformer([
    ('location', OneHotEncoder(handle_unknown='ignore'), ['location'])
], remainder='passthrough')

# Pipeline
model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

# Train model
model_pipeline.fit(X, y)

# Dump model
joblib.dump(model_pipeline, 'house_price_model.pkl')
print("✅ Model saved to house_price_model.pkl")


✅ Model saved to house_price_model.pkl
