In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# 1) Load data
df = pd.read_csv("https://raw.githubusercontent.com/MyungKyuYi/AI-class/refs/heads/main/boston.csv").dropna()

X = df.drop(columns=["PRICE"])
y = df["PRICE"]

# 2) Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 3) Scale features for LinearRegression
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 4) Model initialization
dt = DecisionTreeRegressor(random_state=42)
rf = RandomForestRegressor(n_estimators=200, random_state=42)
lr = LinearRegression()

# 5) Train models
dt.fit(X_train, y_train)
rf.fit(X_train, y_train)
lr.fit(X_train_scaled, y_train)  # dùng scaled features cho LinearRegression

# 6) Predict
dt_pred = dt.predict(X_test)
rf_pred = rf.predict(X_test)
lr_pred = lr.predict(X_test_scaled)

# 7) Evaluate
dt_rmse = np.sqrt(mean_squared_error(y_test, dt_pred))
rf_rmse = np.sqrt(mean_squared_error(y_test, rf_pred))
lr_rmse = np.sqrt(mean_squared_error(y_test, lr_pred))

print("=== Test Evaluation ===")
print(f"Decision Tree : RMSE={dt_rmse:.2f}, R2={r2_score(y_test, dt_pred):.4f}")
print(f"Random Forest : RMSE={rf_rmse:.2f}, R2={r2_score(y_test, rf_pred):.4f}")
print(f"Linear Regression : RMSE={lr_rmse:.2f}, R2={r2_score(y_test, lr_pred):.4f}")


=== Test Evaluation ===
Decision Tree : RMSE=0.69, R2=0.6323
Random Forest : RMSE=0.49, R2=0.8168
Linear Regression : RMSE=0.74, R2=0.5767
