In [None]:
# -*- coding: utf-8 -*-
import os
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
from pathlib import Path

# 하이퍼파라미터 설정
window_size = 24
batch_size = 64
epochs = 50
learning_rate = 0.001
patience = 10
min_delta = 0.001
best_val_loss = float("inf")
patience_counter = 0

# meter_type 매핑
meter_type_mapping = {
    "electricity": 0, "0": 0,
    "chilledwater": 1, "1": 1,
    "steam": 2, "2": 2,
    "hotwater": 3, "3": 3,
    "gas": 4, "4": 4,
    "water": 5, "5": 5,
    "irrigation": 6, "6": 6,
    "solar": 7, "7": 7
}
name_map = {v: k for k, v in meter_type_mapping.items() if not k.isdigit()}

# 파일 경로
X_PATH = Path("../data/processed/X_lstm.npy")
Y_PATH = Path("../data/processed/y_lstm.npy")
MODEL_DIR = Path("models")
MODEL_DIR.mkdir(exist_ok=True)

# 시퀀스와 타깃 불러오기
X = np.load(X_PATH).astype(np.float32)
y = np.load(Y_PATH).astype(np.float32)

# meter_type 필터링
meter_arg = os.environ.get("METER_TYPE", "all")
if meter_arg != "all":
    if meter_arg not in meter_type_mapping:
        raise ValueError(f"Invalid meter_type: {meter_arg}")
    raw_code = meter_type_mapping[meter_arg]
    canonical_name = name_map[raw_code]
    print(f"Training for meter_type = '{meter_arg}' (code: {raw_code})")
    X = X[y[:, 1] == raw_code]
    y = y[y[:, 1] == raw_code][:, 0]
else:
    canonical_name = "all"
    y = y[:, 0]
    print("Training on all meter_type data")

# 학습/검증 데이터 분할
split_idx = int(len(X) * 0.8)
X_train, X_val = X[:split_idx], X[split_idx:]
y_train, y_val = y[:split_idx], y[split_idx:]

train_dataset = TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train))
val_dataset = TensorDataset(torch.from_numpy(X_val), torch.from_numpy(y_val))
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

# 모델 정의
class LSTMModel(nn.Module):
    def __init__(self, input_size=1, hidden_size=96, num_layers=2, dropout=0.2):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        return self.fc(out[:, -1, :]).view(-1)

model = LSTMModel()
criterion = nn.SmoothL1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)

train_losses, val_losses = [] , []
best_model_path = MODEL_DIR / f"lstm_best_{canonical_name}.pth"

# 학습 루프
for epoch in range(epochs):
    model.train()
    train_loss = 0.0
    for X_batch, y_batch in train_loader:
        X_batch = X_batch.unsqueeze(-1)
        output = model(X_batch)
        loss = criterion(output, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * X_batch.size(0)

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch = X_batch.unsqueeze(-1)
            output = model(X_batch)
            loss = criterion(output, y_batch)
            val_loss += loss.item() * X_batch.size(0)

    train_loss /= len(train_loader.dataset)
    val_loss /= len(val_loader.dataset)
    train_losses.append(train_loss)
    val_losses.append(val_loss)

    print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.5f} - Val Loss: {val_loss:.5f}")

    if best_val_loss - val_loss > min_delta:
        best_val_loss = val_loss
        torch.save(model.state_dict(), best_model_path)
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f"Early stopping triggered at epoch {epoch+1}")
            break

# 모델 저장
final_model_path = MODEL_DIR / f"lstm_model_{canonical_name}.pth"
torch.save(model.state_dict(), final_model_path)
print(f"Model saved to {final_model_path}")
print(f"Best model saved to {best_model_path}")

# 손실 그래프 저장
plt.figure(figsize=(12, 5))
plt.plot(train_losses, label="Train Loss")
plt.plot(val_losses, label="Validation Loss")
plt.title("LSTM Training Curve")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig(MODEL_DIR / f"lstm_loss_curve_{canonical_name}.png")


FileNotFoundError: [Errno 2] No such file or directory: 'data/processed/X_lstm.npy'