In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression


# -----------------------------
# 1) 데이터 준비
# -----------------------------

df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/boston.csv").dropna()



X = df.drop(columns=["PRICE"])
y = df["PRICE"].astype(float)


num_cols = [c for c in X.columns if pd.api.types.is_numeric_dtype(X[c])]
cat_cols = [c for c in X.columns if c not in num_cols]

preprocess = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), num_cols),
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
    ],
    remainder="drop"
)

X_tr, X_te, y_tr, y_te = train_test_split(

    X, y, test_size=0.2, stratify=y, random_state=42

)

# -----------------------------
# 2) 모델 구성
# -----------------------------
dt = DecisionTreeRegressor(random_state=42)
rf = RandomForestRegressor(n_estimators=200, random_state=42)


# -----------------------------
# 3) 모델 학습
# -----------------------------
def evaluate(pipe, X_te, y_te):
    pred = pipe.predict(X_te)
    mae = mean_absolute_error(y_te, pred)
    rmse = mean_squared_error(y_te, pred, squared=False)
    r2 = r2_score(y_te, pred)
    return mae, rmse, r2

# -----------------------------
# 4) 모델 평가
# -----------------------------
results = []
for name, pipe in models.items():
    pipe.fit(X_tr, y_tr)
    mae, rmse, r2 = evaluate(pipe, X_te, y_te)
    results.append((name, mae, rmse, r2))

print("=== Test Metrics (lower MAE/RMSE better, higher R2 better) ===")
for name, mae, rmse, r2 in results:
    print(f"{name:22s} | MAE: {mae:.3f} | RMSE: {rmse:.3f} | R2: {r2:.3f}")



ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.

In [9]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


# -----------------------------
# 1) 데이터 준비 (회귀용)
# -----------------------------

df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/boston.csv").dropna()


X = df.drop(columns=["PRICE"])
y = df["PRICE"]


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42

)

# -----------------------------
# 2) 모델 구성 (회귀 모델)
# -----------------------------

dt = DecisionTreeRegressor(random_state=42)
rf = RandomForestRegressor(n_estimators=300, random_state=42, n_jobs=-1)


# 선형회귀는 스케일링과 함께 파이프라인 구성 권장

lr = make_pipeline(StandardScaler(with_mean=True, with_std=True), LinearRegression())


# -----------------------------
# 3) 모델 학습
# -----------------------------

dt.fit(X_train, y_train)
rf.fit(X_train, y_train)
lr.fit(X_train, y_train)


# -----------------------------
# 4) 모델 평가 함수
# -----------------------------

def eval_reg(y_true, y_pred):
    mae  = mean_absolute_error(y_true, y_pred)
    mse  = mean_squared_error(y_true, y_pred)  # squared 인자 제거
    rmse = np.sqrt(mse)                        # 직접 제곱근
    r2   = r2_score(y_true, y_pred)
    return mae, rmse, r2



dt_mae, dt_rmse, dt_r2 = eval_reg(y_test, dt.predict(X_test))
rf_mae, rf_rmse, rf_r2 = eval_reg(y_test, rf.predict(X_test))
lr_mae, lr_rmse, lr_r2 = eval_reg(y_test, lr.predict(X_test))


print("=== Test Metrics (Regression) ===")

print("[Decision Tree]")
print(f"MAE: {dt_mae:.3f} | RMSE: {dt_rmse:.3f} | R^2: {dt_r2:.3f}")

print("[Random Forest]")
print(f"MAE: {rf_mae:.3f} | RMSE: {rf_rmse:.3f} | R^2: {rf_r2:.3f}")

print("[Linear Regression]")
print(f"MAE: {lr_mae:.3f} | RMSE: {lr_rmse:.3f} | R^2: {lr_r2:.3f}")

=== Test Metrics (Regression) ===
[Decision Tree]
MAE: 0.444 | RMSE: 0.694 | R^2: 0.632
[Random Forest]
MAE: 0.314 | RMSE: 0.489 | R^2: 0.818
[Linear Regression]
MAE: 0.532 | RMSE: 0.745 | R^2: 0.577
