**앞서 EDA의 결과로 상관관계가 가장뚜렷한 soxdiff를 예측하는 모델을 만든다.**

## GBR 모델

In [20]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score

# 특징 변수와 목표 변수 정의
X_soxdiff = df[['Power_Unit3', 'InletSox_Unit3', 'Limestone_Unit3']]
y_soxdiff = df['SoxDiff_Unit3']

# 데이터 분할
X_train_soxdiff, X_test_soxdiff, y_train_soxdiff, y_test_soxdiff = train_test_split(X_soxdiff, y_soxdiff, test_size=0.2, random_state=42)

# 최적 하이퍼파라미터로 모델 정의 및 훈련
model_soxdiff = GradientBoostingRegressor(
    random_state=42,
    learning_rate=0.1,
    max_depth=3,
    min_samples_leaf=2,
    min_samples_split=5,
    n_estimators=100
)
model_soxdiff.fit(X_train_soxdiff, y_train_soxdiff)

# 모델 예측
y_pred_soxdiff = model_soxdiff.predict(X_test_soxdiff)

# 모델 평가
mse_soxdiff = mean_squared_error(y_test_soxdiff, y_pred_soxdiff)
r2_soxdiff = r2_score(y_test_soxdiff, y_pred_soxdiff)

print(f'MSE: {mse_soxdiff}')
print(f'R2: {r2_soxdiff}')


MSE: 73.756663438496
R2: 0.9744931022955279


## 하이퍼 파라미터 튜닝(GridSearchCV)

In [19]:
 from sklearn.model_selection import GridSearchCV

 # 최적 학습 데이터 크기로 데이터 분할
 optimal_X_train, _, optimal_y_train, _ = train_test_split(
     X_soxdiff, y_soxdiff, train_size=optimal_train_size, random_state=42
 )

 # 하이퍼파라미터 튜닝
 param_grid = {
     'n_estimators': [100, 200, 300],
     'learning_rate': [0.01, 0.1, 0.2],
     'max_depth': [3, 4, 5],
     'min_samples_split': [2, 5, 10],
     'min_samples_leaf': [1, 2, 4]
 }

 grid_search = GridSearchCV(
     estimator=GradientBoostingRegressor(random_state=42),
     param_grid=param_grid,
     cv=5,
     scoring='r2',
     n_jobs=-1
)

 grid_search.fit(optimal_X_train, optimal_y_train)

# 최적 하이퍼파라미터 출력
 print(f'Best Parameters: {grid_search.best_params_}')
 print(f'Best Cross-Validation R2 Score: {grid_search.best_score_}')

Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 100}
Best Cross-Validation R2 Score: 0.9695203451723536
