In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

# -----------------------------
# 1) 데이터 준비
# -----------------------------
path = "/content/drive/MyDrive/ML 2025-2/week2/boston.csv"
df = pd.read_csv(path)

# 특징/타깃 분리
X = df.drop(columns=["PRICE"])
y = df["PRICE"]

# 학습/테스트 분리
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=True, random_state=42
)
print("Train:", X_train.shape, y_train.shape, " | Test:", X_test.shape, y_test.shape)

# -----------------------------
# 2) 모델 구성
# -----------------------------
# 선형회귀/SVR는 스케일링 효과가 커서 파이프라인으로 구성
models = {
    "LinearRegression": make_pipeline(StandardScaler(), LinearRegression()),
    "DecisionTree":     DecisionTreeRegressor(random_state=42),
    "RandomForest":     RandomForestRegressor(n_estimators=300, random_state=42),
    "SVR(RBF)":         make_pipeline(StandardScaler(), SVR(kernel="rbf", C=3.0, gamma="scale", epsilon=0.1)),
}


Train: (16512, 9) (16512,)  | Test: (4128, 9) (4128,)


In [2]:
# -----------------------------
# 3) 모델 학습
# -----------------------------
for name, model in models.items():
    model.fit(X_train, y_train)
    models[name] = model  # (학습된 모델로 덮어쓰기)

# -----------------------------
# 4) 예측
# -----------------------------
preds = {name: mdl.predict(X_test) for name, mdl in models.items()}

# -----------------------------
# 5) 평가
# -----------------------------
def evaluate(y_true, y_pred):
    mse  = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)  # squared=False 미지원 환경 대비
    mae  = mean_absolute_error(y_true, y_pred)
    r2   = r2_score(y_true, y_pred)
    return mse, rmse, mae, r2

print("\n=== Test Metrics ===")
for name, yhat in preds.items():
    mse, rmse, mae, r2 = evaluate(y_test, yhat)
    print(f"\n[{name}]")
    print(f" MSE : {mse:.4f}")
    print(f" RMSE: {rmse:.4f}")
    print(f" MAE : {mae:.4f}")
    print(f" R^2 : {r2:.4f}")


=== Test Metrics ===

[LinearRegression]
 MSE : 0.5547
 RMSE: 0.7448
 MAE : 0.5323
 R^2 : 0.5767

[DecisionTree]
 MSE : 0.4818
 RMSE: 0.6941
 MAE : 0.4438
 R^2 : 0.6323

[RandomForest]
 MSE : 0.2387
 RMSE: 0.4886
 MAE : 0.3144
 R^2 : 0.8178

[SVR(RBF)]
 MSE : 0.3239
 RMSE: 0.5691
 MAE : 0.3772
 R^2 : 0.7528
