In [5]:
# ==========================================
# CNN-LSTM 통합 파이프라인 + SHAP 시각화 (시간별/특징별 히트맵)
# ==========================================
import os, sys, random, numpy as np, pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
from matplotlib import font_manager, rc
from pathlib import Path
from datetime import datetime
import optuna
import shap
import seaborn as sns

# -------------------------
# 로깅 & 폰트 설정
# -------------------------
sys.path.append(r"C:\ESG_Project1\util")
from logger import setup_logger
logger = setup_logger(__name__)

def setup_font():
    font_path = "C:/Windows/Fonts/malgun.ttf"
    if os.path.exists(font_path):
        font_name = font_manager.FontProperties(fname=font_path).get_name()
        rc('font', family=font_name)
    else:
        rc('font', family='AppleGothic')  # MacOS 예시
    plt.rcParams['axes.unicode_minus'] = False

# -------------------------
# 재현성
# -------------------------
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# -------------------------
# CNN-LSTM 모델 정의
# -------------------------
class Seq2SeqCNNLSTM(nn.Module):
    def __init__(self, input_size=1, conv_channels=(32,16), lstm_hidden=64, output_steps=24, dropout=0.2):
        super().__init__()
        self.conv1 = nn.Conv1d(input_size, conv_channels[0], 3, padding=1)
        self.bn1   = nn.BatchNorm1d(conv_channels[0])
        self.conv2 = nn.Conv1d(conv_channels[0], conv_channels[1], 3, padding=1)
        self.bn2   = nn.BatchNorm1d(conv_channels[1])
        self.relu  = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.encoder_lstm = nn.LSTM(conv_channels[1], lstm_hidden, batch_first=True)
        self.decoder_lstm = nn.LSTM(1, lstm_hidden, batch_first=True)
        self.fc = nn.Linear(lstm_hidden,1)
        self.output_steps = output_steps

    def forward(self, x, y=None, teacher_forcing_ratio=0.0):
        x = self.relu(self.bn1(self.conv1(x.transpose(1,2))))
        x = self.relu(self.bn2(self.conv2(x))).transpose(1,2)
        _, (hidden, cell) = self.encoder_lstm(x)
        decoder_input = x[:,-1,0].unsqueeze(-1)
        outputs = []
        for t in range(self.output_steps):
            decoder_output, (hidden, cell) = self.decoder_lstm(decoder_input.unsqueeze(1), (hidden,cell))
            out = self.fc(decoder_output).squeeze(1)
            outputs.append(out)
            decoder_input = y[:,t] if (y is not None and torch.rand(1).item() < teacher_forcing_ratio) else out
        return torch.stack(outputs, dim=1)

# -------------------------
# 시퀀스 생성
# -------------------------
def create_sequences(data, input_steps=168, output_steps=24):
    total_steps = len(data) - input_steps - output_steps + 1
    X = np.array([data[i:i+input_steps] for i in range(total_steps)])
    y = np.array([data[i+input_steps:i+input_steps+output_steps] for i in range(total_steps)])
    return X, y

# -------------------------
# Optuna objective
# -------------------------
def objective(trial, X_tensor, y_tensor, input_steps, output_steps, device):
    sample_ratio = 0.2
    total_len = len(X_tensor)
    sample_size = int(total_len * sample_ratio)
    indices = np.random.choice(total_len, sample_size, replace=False)
    X_sample = X_tensor[indices]
    y_sample = y_tensor[indices]

    val_ratio = 0.1
    val_size = int(sample_size*val_ratio)
    train_size = sample_size - val_size
    train_dataset, val_dataset = random_split(TensorDataset(X_sample, y_sample), [train_size,val_size])
    train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
    val_loader   = DataLoader(val_dataset, batch_size=128, shuffle=False)

    conv1_ch = trial.suggest_int("conv1_ch", 16, 48, step=16)
    conv2_ch = trial.suggest_int("conv2_ch", 8, 32, step=8)
    lstm_hidden = trial.suggest_int("lstm_hidden", 32, 128, step=32)
    dropout = trial.suggest_float("dropout", 0.1, 0.5, step=0.1)
    lr = trial.suggest_float("lr", 1e-4, 1e-2, log=True)

    model = Seq2SeqCNNLSTM(conv_channels=(conv1_ch, conv2_ch), lstm_hidden=lstm_hidden,
                           output_steps=output_steps, dropout=dropout).to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    best_val = np.inf
    no_improve = 0
    epochs_trial = 5
    early_patience_trial = 2

    for epoch in range(epochs_trial):
        model.train()
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            y_pred = model(xb, yb, teacher_forcing_ratio=0.5)
            loss = criterion(y_pred, yb)
            loss.backward()
            optimizer.step()

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(device), yb.to(device)
                val_loss += criterion(model(xb), yb).item()*xb.size(0)
        avg_val = val_loss / len(val_loader.dataset)

        trial.report(avg_val, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

        if avg_val < best_val - 1e-6:
            best_val = avg_val
            no_improve = 0
        else:
            no_improve += 1
            if no_improve >= early_patience_trial:
                break
    return best_val

# -------------------------
# 파이프라인 + SHAP 히트맵
# -------------------------
def run_pipeline_shap_heatmap(train_csv, test_csv, target_col='합산발전량(MWh)',
                              input_steps=168, output_steps=24,
                              batch_size=128, epochs=120, n_trials=10,
                              output_dir='output', shap_sample_size=500):

    setup_font()
    set_seed()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    logger.info(f"💻 사용 디바이스: {device}")

    OUTPUT_DIR = Path(output_dir) / datetime.now().strftime("%Y%m%d_%H%M%S")
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

    # 데이터 로딩
    train_df = pd.read_csv(train_csv, index_col=0, parse_dates=True)
    test_df  = pd.read_csv(test_csv, index_col=0, parse_dates=True)

    scaler = MinMaxScaler()
    train_scaled = scaler.fit_transform(train_df[[target_col]].values)
    test_scaled  = scaler.transform(test_df[[target_col]].values)

    X_all, y_all = create_sequences(train_scaled, input_steps, output_steps)
    X_tensor = torch.tensor(X_all, dtype=torch.float32)
    y_tensor = torch.tensor(y_all, dtype=torch.float32)

    # Optuna 최적화
    study = optuna.create_study(direction="minimize", pruner=optuna.pruners.MedianPruner())
    study.optimize(lambda trial: objective(trial, X_tensor, y_tensor, input_steps, output_steps, device), n_trials=n_trials)
    best_params = study.best_params
    logger.info(f"🏆 최적 하이퍼파라미터: {best_params}")

    # 최종 학습
    dataset = TensorDataset(X_tensor, y_tensor)
    val_ratio = 0.1
    val_size = int(len(dataset)*val_ratio)
    train_size = len(dataset) - val_size
    train_dataset, val_dataset = random_split(dataset, [train_size,val_size])
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader   = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    model = Seq2SeqCNNLSTM(conv_channels=(best_params["conv1_ch"], best_params["conv2_ch"]),
                           lstm_hidden=best_params["lstm_hidden"],
                           output_steps=output_steps,
                           dropout=best_params["dropout"]).to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=best_params["lr"])

    best_val_loss = np.inf
    no_improve = 0
    early_patience = 15
    checkpoint_path = OUTPUT_DIR / "best_model.pt"

    for epoch in range(1, epochs+1):
        model.train()
        train_loss = 0
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            y_pred = model(xb, yb, teacher_forcing_ratio=0.5)
            loss = criterion(y_pred, yb)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()*xb.size(0)
        avg_train = train_loss/len(train_loader.dataset)

        model.eval()
        val_loss=0
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(device), yb.to(device)
                y_pred = model(xb)
                val_loss += criterion(y_pred,yb).item()*xb.size(0)
        avg_val = val_loss/len(val_loader.dataset)

        if avg_val < best_val_loss-1e-6:
            best_val_loss = avg_val
            torch.save(model.state_dict(), checkpoint_path)
            no_improve = 0
        else:
            no_improve += 1
            if no_improve >= early_patience:
                logger.info(f"✅ Early stopping at epoch {epoch}")
                break
        logger.info(f"Epoch {epoch}/{epochs} | Train Loss:{avg_train:.6f} | Val Loss:{avg_val:.6f}")

    # -------------------------
    # 학습셋 평가
    # -------------------------
    model.eval()
    train_preds_scaled = []
    with torch.no_grad():
        for xb, yb in train_loader:
            xb = xb.to(device)
            y_pred = model(xb, teacher_forcing_ratio=0.0).cpu().numpy()
            train_preds_scaled.append(y_pred)
    train_preds_scaled = np.vstack(train_preds_scaled).reshape(-1,1)
    train_true = scaler.inverse_transform(y_tensor.numpy().reshape(-1,1))
    train_pred = scaler.inverse_transform(train_preds_scaled)

    train_mae = mean_absolute_error(train_true, train_pred)
    train_rmse = np.sqrt(mean_squared_error(train_true, train_pred))
    train_r2 = r2_score(train_true, train_pred)

    logger.info(f"[Train] MAE={train_mae:.4f} | RMSE={train_rmse:.4f} | R2={train_r2:.4f}")

    # -------------------------
    # 테스트 예측
    # -------------------------
    model.load_state_dict(torch.load(checkpoint_path))
    model.eval()
    rolling_input = train_scaled[-input_steps:].tolist()
    predicted_scaled = []

    with torch.no_grad():
        i=0
        while i<len(test_scaled):
            steps_remaining = len(test_scaled)-i
            steps_to_predict = min(output_steps,steps_remaining)
            X_input = torch.tensor(rolling_input[-input_steps:],dtype=torch.float32).unsqueeze(0).to(device)
            y_pred = model(X_input,teacher_forcing_ratio=0.0).cpu().numpy().flatten()
            for step in range(steps_to_predict):
                rolling_input.append([y_pred[step]])
                predicted_scaled.append(y_pred[step])
            i += steps_to_predict

    predicted_scaled = np.array(predicted_scaled).reshape(-1,1)
    predicted_generation = scaler.inverse_transform(predicted_scaled)
    y_true = test_df[[target_col]].values
    test_mae = mean_absolute_error(y_true, predicted_generation)
    test_rmse = np.sqrt(mean_squared_error(y_true,predicted_generation))
    test_r2   = r2_score(y_true,predicted_generation)
    logger.info(f"[Test] MAE={test_mae:.4f} | RMSE={test_rmse:.4f} | R2={test_r2:.4f}")

    # -------------------------
    # SHAP 영향도 계산 (시간별)
    # -------------------------
    shap_sample = min(len(X_tensor), shap_sample_size)
    X_shap = X_tensor[:shap_sample].to(device)
    model.eval()
    explainer = shap.DeepExplainer(model, X_shap)
    shap_values = explainer.shap_values(X_shap)  # (샘플, 시간, 1)
    shap_values_np = np.array(shap_values).squeeze()  # (샘플, 시간)

    plt.figure(figsize=(12,6))
    sns.heatmap(shap_values_np.T, cmap='viridis', cbar_kws={'label':'SHAP value'})
    plt.xlabel("샘플")
    plt.ylabel("시간 step")
    plt.title("시간별 SHAP 영향도 히트맵")
    plt.tight_layout()
    plt.savefig(OUTPUT_DIR / "shap_heatmap.png")
    plt.close()
    logger.info(f"✅ SHAP 히트맵 저장 완료: {OUTPUT_DIR / 'shap_heatmap.png'}")

    return model, predicted_generation, OUTPUT_DIR


In [None]:
# -------------------------
# 실행 예제
# -------------------------
if __name__ == "__main__":
    train_csv = "C:/ESG_Project1/file/merge_data/train_data.csv"
    test_csv  = "C:/ESG_Project1/file/merge_data/test_data.csv"
    output_dir = "C:/ESG_Project1/cnn_lstm/output"

    model, predicted_generation, output_path = run_pipeline_shap_heatmap(
        train_csv=train_csv,
        test_csv=test_csv,
        target_col='합산발전량(MWh)',
        input_steps=168,
        output_steps=24,
        batch_size=128,
        epochs=120,
        n_trials=10,
        output_dir=output_dir
    )

    logger.info(f"✅ 통합 파이프라인 + SHAP 히트맵 완료. 결과 디렉토리: {output_path}")


[2025-10-27 12:03:00,913]✅ INFO - 💻 사용 디바이스: cuda


[I 2025-10-27 12:03:01,993] A new study created in memory with name: no-name-659b1a60-7948-40fe-a7f3-3de28469e0ab
[I 2025-10-27 12:04:04,445] Trial 0 finished with value: 0.000941793060030295 and parameters: {'conv1_ch': 32, 'conv2_ch': 8, 'lstm_hidden': 32, 'dropout': 0.4, 'lr': 0.0020348928640631417}. Best is trial 0 with value: 0.000941793060030295.
[I 2025-10-27 12:05:03,280] Trial 1 finished with value: 0.0012896537355396257 and parameters: {'conv1_ch': 32, 'conv2_ch': 24, 'lstm_hidden': 96, 'dropout': 0.5, 'lr': 0.008185642803864832}. Best is trial 0 with value: 0.000941793060030295.
[I 2025-10-27 12:05:59,637] Trial 2 finished with value: 0.0009204673577003106 and parameters: {'conv1_ch': 32, 'conv2_ch': 16, 'lstm_hidden': 32, 'dropout': 0.30000000000000004, 'lr': 0.0017884859099584099}. Best is trial 2 with value: 0.0009204673577003106.
[I 2025-10-27 12:06:52,997] Trial 3 finished with value: 0.0009158133731046799 and parameters: {'conv1_ch': 16, 'conv2_ch': 16, 'lstm_hidden': 