In [4]:
import sys
import os
sys.path.append(os.path.abspath(".."))

In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.pipeline import Pipeline
from sklearn.metrics import root_mean_squared_error, r2_score
import numpy as np

In [5]:
from src.preprocessing import build_preprocessing

In [6]:
# load data
df = pd.read_csv('C:/Belajar Python/house-price-prediction/data/train.csv')

In [7]:
# karena skewed, kita transformasi log pada target
df['SalePrice'] = np.log1p(df['SalePrice'])

In [8]:
# split data
X = df.drop(columns=['SalePrice', 'Id'])
y = df['SalePrice']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [9]:
preprocessor = build_preprocessing(X_train)

In [13]:
models = {
    'Linear Regression': LinearRegression(),
    'Random Forest': RandomForestRegressor(n_estimators=200, random_state=42),
    'XGBoost': XGBRegressor(n_estimators=500, learning_rate=0.05, max_depth=4, random_state=42)
}

for name, model in models.items():
    pipe = Pipeline(steps=[('preprocessor', preprocessor),
                           ('model', model)])
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)
    
    rmse = root_mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f"=== {name} ===")
    print(f"RMSE: {rmse:.4f}")
    print(f"R²: {r2:.4f}\n")

=== Linear Regression ===
RMSE: 0.1281
R²: 0.9121

=== Random Forest ===
RMSE: 0.1462
R²: 0.8854

=== XGBoost ===
RMSE: 0.1359
R²: 0.9011



## Model Terbaik  
Linear Regression menunjukkan performa terbaik dengan:  
RMSE terendah (0.1281) - Artinya error prediksi paling kecil  
R² tertinggi (0.9121) - Model ini mampu menjelaskan 91.21% variasi dalam data harga rumah

Tetapi semua model memiliki perbedaan RMSE antar model tidak terlalu besar, menunjukkan bahwa semua model cukup kompeten untuk tugas prediksi harga rumah ini.