# 0. Import

In [1]:
!pip install optuna tqdm



In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from lightgbm import LGBMRegressor
import optuna
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



# 1. *Dataset*

In [3]:

# 1. 데이터 로드 및 Feature Engineering
# 데이터셋 로드
data = pd.read_csv('/content/drive/MyDrive/ETTm2.csv')
data['date'] = pd.to_datetime(data['date'])

# Feature Engineering
data['hour'] = data['date'].dt.hour
data['dayofweek'] = data['date'].dt.dayofweek
data['month'] = data['date'].dt.month
data['lag_1'] = data['OT'].shift(1)
data['lag_2'] = data['OT'].shift(2)
data['rolling_mean'] = data['OT'].rolling(window=3).mean()
data = data.dropna().reset_index(drop=True)

# Train-Test Split
X = data.drop(columns=['date', 'OT'])
y = data['OT']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)


# 2. LightGBM

In [4]:
def objective(trial):
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'boosting_type': 'gbdt',
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'num_leaves': trial.suggest_int('num_leaves', 20, 300),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'verbose': -1,
    }
    model = LGBMRegressor(**params)
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    return mean_squared_error(y_test, preds, squared=False)

# Optuna를 사용한 최적화
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=30)
best_params = study.best_params
print("Best Params for LightGBM:", best_params)

# 최적 파라미터로 LightGBM 모델 학습
lgb_model = LGBMRegressor(**best_params)
lgb_model.fit(X_train, y_train)
lightgbm_preds = lgb_model.predict(X_test)

[I 2024-12-17 09:06:50,689] A new study created in memory with name: no-name-15088995-5b0b-442f-9ae9-2d593cf592a7
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
[I 2024-12-17 09:07:00,291] Trial 0 finished with value: 0.19185755977001387 and parameters: {'learning_rate': 0.2377736752948565, 'n_estimators': 427, 'num_leaves': 179, 'max_depth': 15}. Best is trial 0 with value: 0.19185755977001387.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
[I 2024-12-17 09:07:11,971] Trial 1 finished with value: 0.17695079210977516 and parameters: {'learning_rate': 0.022983031782596794, 'n_estimators': 551, 'num_leaves': 203, 'max_depth': 14}. Best is trial 1 with value: 0.17695079210977516.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
[I 2024-12-17 09:07:18,578] Trial 2 finished with value: 0.18435106109516722 and parameters: {'learning_rate': 0.18852709715683907, 'n_estimators': 924, 'num_leaves': 182, 'max_depth': 10}.

Best Params for LightGBM: {'learning_rate': 0.056868137624476345, 'n_estimators': 160, 'num_leaves': 226, 'max_depth': 9}


# 3. GRU

In [5]:
# 3. GRU 모델 구현 및 학습
class OilTempDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X.values, dtype=torch.float32)
        self.y = torch.tensor(y.values, dtype=torch.float32)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# GRU 모델 정의
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(GRUModel, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        _, hidden = self.gru(x)
        out = self.fc(hidden[-1])
        return out

# Dataset 준비
gru_train = OilTempDataset(X_train, y_train)
gru_test = OilTempDataset(X_test, y_test)
train_loader = DataLoader(gru_train, batch_size=64, shuffle=True)
test_loader = DataLoader(gru_test, batch_size=64, shuffle=False)

# GRU 모델 학습
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
gru_model = GRUModel(input_size=X_train.shape[1], hidden_size=64, num_layers=2, output_size=1).to(device)
optimizer = torch.optim.Adam(gru_model.parameters(), lr=0.001)
criterion = nn.MSELoss()

for epoch in tqdm(range(100)):
    gru_model.train()
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device).unsqueeze(1)
        optimizer.zero_grad()
        outputs = gru_model(X_batch.unsqueeze(1))
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

# GRU 모델 평가
gru_model.eval()
preds = []
with torch.no_grad():
    for X_batch, _ in test_loader:
        X_batch = X_batch.to(device)
        outputs = gru_model(X_batch.unsqueeze(1))
        preds.extend(outputs.cpu().numpy())
gru_preds = np.array(preds).flatten()

100%|██████████| 100/100 [04:11<00:00,  2.51s/it]


# 4. 앙상블

In [6]:
rmse = mean_squared_error(y_test, lightgbm_preds, squared=False)
print(f"LGBM RMSE: {rmse:.4f}")

LGBM RMSE: 0.1691




In [7]:
rmse = mean_squared_error(y_test, gru_preds, squared=False)
print(f"GRU RMSE: {rmse:.4f}")

GRU RMSE: 0.0721




In [8]:
# 4. 앙상블 예측 수행
ensemble_preds = (lightgbm_preds + gru_preds) / 2
rmse = mean_squared_error(y_test, ensemble_preds, squared=False)
print(f"Ensemble RMSE: {rmse:.4f}")

Ensemble RMSE: 0.0995


