In [2]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
import lightgbm as lgb
import xgboost as xgb
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import make_scorer, r2_score

In [3]:
# 데이터 로드 및 전처리
data = pd.read_csv('merged_data.csv', encoding='utf-8')

In [4]:
selected_features = ['stn_id', 'borrowed_hour', 'borrowed_day', 'is_holiday', 'borrowed_num_nearby', '강수량(mm)', 'wind_chill', 'stn_gu', 'nearby_id', 'borrowed_date', 'borrowed_num']
data = data[selected_features]
categorical_features = ['stn_id', 'stn_gu', 'nearby_id']
for feature in categorical_features:
    data[feature] = pd.factorize(data[feature])[0]

X = data.drop('borrowed_num', axis=1)
y = data['borrowed_num']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [5]:
# LightGBM 그리드 서치
lgb_model = lgb.LGBMRegressor()

lgb_param_grid = {
    'num_leaves': [31, 50, 80],
    'learning_rate': [0.01, 0.05, 0.1],
    'feature_fraction': [0.8, 0.9, 1.0],
}

lgb_grid = GridSearchCV(lgb_model, param_grid=lgb_param_grid, scoring=make_scorer(r2_score), cv=3)
lgb_grid.fit(X_train, y_train)

print("LightGBM 최적 파라미터:", lgb_grid.best_params_)
print("LightGBM 최적 R-squared:", lgb_grid.best_score_)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.926359 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1136
[LightGBM] [Info] Number of data points in the train set: 5145300, number of used features: 10
[LightGBM] [Info] Start training from score 1.482044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.324779 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1136
[LightGBM] [Info] Number of data points in the train set: 5145300, number of used features: 10
[LightGBM] [Info] Start training from score 1.484790
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.290188 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not e

In [6]:
# XGBoost 랜덤 서치
xgb_model = xgb.XGBRegressor()

xgb_param_dist = {
    'max_depth': [3, 6, 9],
    'learning_rate': [0.01, 0.05, 0.1],
    'subsample': [0.8, 0.9, 1.0],
}

xgb_random = RandomizedSearchCV(xgb_model, param_distributions=xgb_param_dist, n_iter=10, scoring=make_scorer(r2_score), cv=3, random_state=42)
xgb_random.fit(X_train, y_train)

print("XGBoost 최적 파라미터:", xgb_random.best_params_)
print("XGBoost 최적 R-squared:", xgb_random.best_score_)

XGBoost 최적 파라미터: {'subsample': 0.8, 'max_depth': 9, 'learning_rate': 0.1}
XGBoost 최적 R-squared: 0.512741606246932
