In [10]:
import torch
from torch_geometric.data import Data
from torch_geometric.datasets 
from torch_geometric.loader import DataLoader
import torch.utils.data as data

In [4]:
pyg_data = torch.load('../dataset/graph/pyg_data.pt', weights_only=False)

In [2]:
import torch
import platform

# PyTorch 버전
print(f"PyTorch 버전: {torch.__version__}")

# CUDA 사용 가능 여부 및 버전
print(f"CUDA 사용 가능 여부: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA 버전 (PyTorch에서 인식한): {torch.version.cuda}")
    print(f"GPU 이름: {torch.cuda.get_device_name(0)}")

# OS 정보
print(f"운영체제: {platform.system()} {platform.release()}")

# Python 버전
print(f"Python 버전: {platform.python_version()}")

PyTorch 버전: 2.6.0
CUDA 사용 가능 여부: False
운영체제: Darwin 24.3.0
Python 버전: 3.10.4


In [1]:
pip install torch-geometric-temporal

Collecting torch-geometric-temporal
  Using cached torch_geometric_temporal-0.56.0-py3-none-any.whl.metadata (1.9 kB)
Collecting decorator==4.4.2 (from torch-geometric-temporal)
  Using cached decorator-4.4.2-py2.py3-none-any.whl.metadata (4.2 kB)
Collecting cython (from torch-geometric-temporal)
  Using cached Cython-3.0.12-py2.py3-none-any.whl.metadata (3.3 kB)
Collecting torch_sparse (from torch-geometric-temporal)
  Using cached torch_sparse-0.6.18.tar.gz (209 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25lerror
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mGetting requirements to build wheel[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m [31m[17 lines of output][0m
  [31m   [0m Traceback (most recent call last):
  [31m   [0m   File "/Users/kimhakhyun/.pyenv/versions/sci2/lib/python3.10/site-packages/pip/_vendor/pyproject_hooks/_in_process/_in_process.p

In [5]:
class HierarchicalTimeSeriesDataset(data.Dataset):
    def __init__(self, pyg_data, window_size=28, stride=28):
        self.pyg_data = pyg_data
        self.time_length = pyg_data.x.shape[1]
        self.window_size = window_size
        self.num_windows = max(0, (self.time_length - 2*window_size) // stride + 1)
        self.stride = stride
    
    def __len__(self):
        return self.num_windows
    
    def __getitem__(self, idx):
        start_idx = idx * self.stride
        input_window = self.pyg_data.x[:, start_idx:start_idx+self.window_size].clone()
        output_window = self.pyg_data.x[:, start_idx+self.window_size:start_idx+2*self.window_size].clone()
        
        data = Data(
            x=input_window,
            y=output_window,
            edge_index=self.pyg_data.edge_index,
            edge_type=self.pyg_data.edge_type,
            num_nodes=self.pyg_data.num_nodes,
            num_edge_types=self.pyg_data.num_edge_types
        )
        
        return data

# 데이터셋 분할
def create_train_val_test_datasets(graph_data, window_size=28, stride=1, train_ratio=0.7, val_ratio=0.15):
    """
    훈련/검증/테스트 데이터셋 생성
    """
    dataset = HierarchicalTimeSeriesDataset(graph_data, window_size, stride)
    
    # 데이터 개수
    n_samples = len(dataset)
    
    if n_samples == 0:
        raise ValueError("데이터셋에 샘플이 없습니다. 윈도우 크기와 시계열 길이를 확인하세요.")
    
    # 각 분할의 크기 계산
    train_size = int(n_samples * train_ratio)
    val_size = int(n_samples * val_ratio)
    test_size = n_samples - train_size - val_size
    
    # 인덱스 분할
    indices = list(range(n_samples))
    
    # 시간적 의존성을 고려하여 순차적 분할
    train_indices = indices[:train_size]
    val_indices = indices[train_size:train_size+val_size]
    test_indices = indices[train_size+val_size:]
    
    # 서브셋 생성
    train_dataset = torch.utils.data.Subset(dataset, train_indices)
    val_dataset = torch.utils.data.Subset(dataset, val_indices)
    test_dataset = torch.utils.data.Subset(dataset, test_indices)
    
    return train_dataset, val_dataset, test_dataset

# 데이터셋 생성
train_dataset, val_dataset, test_dataset = create_train_val_test_datasets(pyg_data)

# 데이터로더 생성
batch_size = 1  # 그래프 전체가 하나의 배치
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)