In [None]:
from sklearn.model_selection import cross_validate, KFold
import numpy as np

# 1) 평가 지표와 폴드 설정
scoring = {
    'rmse': 'neg_root_mean_squared_error',
    'mae':  'neg_mean_absolute_error',
    'r2':   'r2'
}
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# 2) 교차검증 수행 (stack 가 이미 최종 모델이라면 stack, 아니면 best_rf / best_xgb 등)
cv_results = cross_validate(
    rf, X, y,  # rf는 사용하신 모델명 으로 변경
    cv=kf, scoring=scoring,
    return_train_score=False,
    n_jobs=-1
)

# 3) 결과 집계
print("RMSE:",  -np.mean(cv_results['test_rmse']),  "±", np.std(cv_results['test_rmse']))
print("MAE: ",  -np.mean(cv_results['test_mae']),   "±", np.std(cv_results['test_mae']))
print("R2:  ",   np.mean(cv_results['test_r2']),    "±", np.std(cv_results['test_r2']))


In [None]:
import matplotlib.pyplot as plt


# 1) 테스트셋 예측
y_pred = stack.predict(X_test) # stack 사용한 모델링명으로 변경 (ex, rf, xgb.. etc)

# 2) scatter: 실제 vs 예측
plt.figure()
plt.scatter(y_test, y_pred, alpha=0.3)
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'k--')  # 완벽 예측 대각선
plt.xlabel("True")
plt.ylabel("Pred")
plt.title("True vs Pred")
plt.show()

# 3) 잔차 히스토그램
residuals = y_test - y_pred
plt.figure()
plt.hist(residuals, bins=30)
plt.xlabel("Residual")
plt.title("Residual Distribution")
plt.show()

In [None]:
from sklearn.dummy import DummyRegressor

dummy = DummyRegressor(strategy="mean")
dummy_scores = cross_validate(dummy, X, y, cv=kf, scoring=scoring, n_jobs=-1)
print("Dummy RMSE:", -np.mean(dummy_scores['test_rmse']))
print("Dummy R2:  ",  np.mean(dummy_scores['test_r2']))


In [None]:
from sklearn.model_selection import learning_curve

train_sizes, train_scores, test_scores = learning_curve(
    stack, X, y, cv=kf, scoring='neg_root_mean_squared_error', #stack 사용한 모델명으로 변경
    train_sizes=np.linspace(0.1, 1.0, 5), n_jobs=-1
)

train_rmse = -np.mean(train_scores, axis=1)
test_rmse  = -np.mean(test_scores,  axis=1)

plt.figure()
plt.plot(train_sizes, train_rmse, label="Train RMSE")
plt.plot(train_sizes, test_rmse,  label="Test RMSE")
plt.xlabel("Training examples")
plt.ylabel("RMSE")
plt.legend()
plt.show()