In [1]:
# 세팅
!pip install numpy pandas datetime torch torchvision scikit-learn matplotlib pytz google

Collecting datetime
  Downloading datetime-6.0-py3-none-any.whl.metadata (34 kB)
Collecting zope.interface (from datetime)
  Downloading zope_interface-8.1.1-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl.metadata (45 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.2/45.2 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Downloading datetime-6.0-py3-none-any.whl (52 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.6/52.6 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading zope_interface-8.1.1-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl (264 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m264.7/264.7 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: zope.interface, datetime
Successfully installed datetime-6.0 zope.interface-8.1.1


In [24]:
# 라이브러리
import numpy as np
import pandas as pd
# from datetime import datetime
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from pytz import timezone
import os
from google.colab import drive
import copy

In [25]:
# 데이터 가져오기
drive.mount('/content/drive')

path = "/content/drive/MyDrive/4-1_다변량통계분석/final_dataset_complete.csv"
df = pd.read_csv(path)

# drive.mount('VGG')

Mounted at /content/drive


In [29]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
WINDOW_SIZE = 24
BATCH_SIZE = 128
LEARNING_RATE = 1e-4
EPOCHS = 100
PATIENCE = 10

In [30]:
print(df.iloc[:168].isnull().values.any()) # nan 존재
print(df.iloc[169:].isnull().values.any()) # 그다음 행부터는 nan 존재하지 않음 확인

False
False


In [31]:
if 'datetime' in df.columns: df = df.drop(columns=['datetime'])
Y = '청주 지역공급량(Gcal)'

# 1-1. 첫 168행 삭제
df = df.iloc[168:].reset_index(drop=True)
print(f"Nan 존재여부: {df.isnull().values.any()}") # 전체 데이터에 nan 있는지 확인

# 1-2. Y-X 형태로
cols = [Y] + [c for c in df.columns if c!=Y]
data_vals = df[cols].values

# 1-3. train/val 분할
val_size = int(len(data_vals) * 0.2)
val_raw = data_vals[:val_size]
train_raw = data_vals[val_size:]

# 1-4. scaling - leakage 방지하기 위해 분할 먼저
scaler = StandardScaler()
val_data = scaler.fit_transform(val_raw)
train_data = scaler.fit_transform(train_raw)

Nan 존재여부: False


In [32]:
print(f"scaled_df 모양: {data_vals.shape}; scaled_df 길이: {data_vals.size}")
print(f"val_size: {val_size}")
print(f"cols size: {len(cols)}")

scaled_df 모양: (43488, 503); scaled_df 길이: 21874464
val_size: 8697
cols size: 503


In [33]:
# Sliding window dataset
class SlidingWindowDataset(Dataset):
  def __init__(self, data, window_size):
    self.data = data
    self.window_size = window_size

  def __len__(self):
    return len(self.data) - self.window_size

  def __getitem__(self, idx):
    # 입력 범위: idx ~ idx + window_size (과거 데이터)
    # 타겟 시점: idx + window_size (맞춰야 할 현재 시점)

    # 1. 입력 시퀀스 추출 (과거 window_size 개 + 현재 시점 1개)
    sequence = self.data[idx : idx + self.window_size + 1].copy() # shape: (window_size + 1, features)

    # 2. 정답 추출 (마지막 시점의 0번째 컬럼 = 지역공급량)
    target = sequence[-1, 0]

    # 3. 마스킹 (Masking)
    sequence[-1,0] = 0.0

    sequence_tensor = torch.tensor(sequence.transpose(), dtype=torch.float32) # PyTorch Conv1d는 (Channel, Length) 순서를 원하므로 Transpose
    target_tensor = torch.tensor(target, dtype=torch.float32) # 결과 shape: (Features, Window_Size + 1)
    # print(f"sequence_tensor: {sequence_tensor.shape}")
    # print(f"target_tensor: {target_tensor.shape}")

    return sequence_tensor, target_tensor

In [34]:
train_dataset = SlidingWindowDataset(train_data, WINDOW_SIZE)
val_dataset = SlidingWindowDataset(val_data, WINDOW_SIZE)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [35]:
# VGG 모델
class TimeSeriesVGG(nn.Module):
    def __init__(self, num_features, window_len):
        super(TimeSeriesVGG, self).__init__()

        # 입력: (Batch, Features, Time_Steps)
        # Features: 변수 개수 (500개 등)
        # Time_Steps: window_size + 1 (25개 등)

        self.features = nn.Sequential(
            # Block 1
            nn.Conv1d(in_channels=num_features, out_channels=64, kernel_size=3, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            # 시계열 길이가 짧으면 Pooling은 조심해서 써야 함 (정보 손실)
            # 여기서는 차원 유지를 위해 Pooling 대신 Stride를 쓰거나 생략 가능
            # VGG 스타일을 살려 MaxPool 적용
            nn.MaxPool1d(kernel_size=2, stride=2),

            # Block 2
            nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),

            # Block 3
            nn.Conv1d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(1) # 시간 차원을 1로 압축 (Global Pooling)
        )

        self.regressor = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(128, 1) # 최종 예측값 1개
        )

    def forward(self, x):
        x = self.features(x)
        x = self.regressor(x)
        return x

In [36]:
# Early Stopping 구현
best_val_loss = float('inf')
patience_counter = 0
best_model_weights = None
loss_history = {'train':[], 'val':[]}

In [37]:
# 모델 학습
model = TimeSeriesVGG(num_features=data_vals.shape[1], window_len=WINDOW_SIZE+1).to(DEVICE)
criterion = nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

print(f"학습 시작 (Max Epochs: {EPOCHS}, Patience: {PATIENCE})")

for epoch in range(EPOCHS): # Epoch 수 조절
    model.train()
    train_loss = 0.0

    for inputs, targets in train_loader:
        inputs, targets = inputs.to(DEVICE), targets.to(DEVICE).unsqueeze(1)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    avg_train_loss = train_loss / len(train_loader)
    loss_history['train'].append(avg_train_loss)

    # 검증
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(DEVICE), targets.to(DEVICE).unsqueeze(1)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            val_loss += loss.item()

    avg_val_loss = val_loss / len(val_loader)
    loss_history['val'].append(avg_val_loss)

    print(f"Epoch {epoch+1:03d} | Train Loss: {avg_train_loss:.5f} | Val Loss: {avg_val_loss:.5f}")

    if avg_val_loss < best_val_loss:
        # 성능이 좋아졌을 때
        best_val_loss = avg_val_loss
        patience_counter = 0
        # 현재 모델의 가중치를 깊은 복사(Deep Copy)로 저장해둠
        best_model_weights = copy.deepcopy(model.state_dict())
        # (선택) 파일로 저장하고 싶다면: torch.save(model.state_dict(), 'best_model.pth')
        print("  <-- Best Model Saved")
    else:
        # 성능이 안 좋아졌거나 같을 때
        patience_counter += 1
        print(f"  | Patience {patience_counter}/{PATIENCE}")

        if patience_counter >= PATIENCE:
            print(f"\n[Early Stopping] {epoch+1} 에폭에서 학습 조기 종료.")
            break

print("학습 완료.")
# 학습이 끝난 후(혹은 중단된 후), 저장해둔 최고의 가중치로 모델을 되돌립니다.
if best_model_weights is not None:
    model.load_state_dict(best_model_weights)
    print("최적의 검증 성능을 낸 모델로 가중치를 복구했습니다.")

학습 시작 (Max Epochs: 100, Patience: 10)
Epoch 001 | Train Loss: 0.82563 | Val Loss: 0.63463
  <-- Best Model Saved
Epoch 002 | Train Loss: 0.56854 | Val Loss: 0.71805
  | Patience 1/10
Epoch 003 | Train Loss: 0.45031 | Val Loss: 0.73317
  | Patience 2/10
Epoch 004 | Train Loss: 0.39382 | Val Loss: 0.74584
  | Patience 3/10
Epoch 005 | Train Loss: 0.35245 | Val Loss: 0.71869
  | Patience 4/10
Epoch 006 | Train Loss: 0.31929 | Val Loss: 0.74215
  | Patience 5/10
Epoch 007 | Train Loss: 0.29621 | Val Loss: 0.70925
  | Patience 6/10
Epoch 008 | Train Loss: 0.28090 | Val Loss: 0.77527
  | Patience 7/10
Epoch 009 | Train Loss: 0.26820 | Val Loss: 0.70458
  | Patience 8/10
Epoch 010 | Train Loss: 0.25921 | Val Loss: 0.72881
  | Patience 9/10
Epoch 011 | Train Loss: 0.24659 | Val Loss: 0.70328
  | Patience 10/10

[Early Stopping] 11 에폭에서 학습 조기 종료.
학습 완료.
최적의 검증 성능을 낸 모델로 가중치를 복구했습니다.
