In [8]:
import warnings
import pandas as pd
import numpy as np
import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
from lightning.pytorch.loggers import TensorBoardLogger

# PyTorch Forecasting 관련 import
from pytorch_forecasting import TemporalFusionTransformer, TimeSeriesDataSet, Baseline
from pytorch_forecasting.data.examples import get_stallion_data
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import MAE, QuantileLoss
from itertools import product
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
from lightning.pytorch.loggers import TensorBoardLogger
from pytorch_forecasting import (
    TemporalFusionTransformer,
    TimeSeriesDataSet,
    GroupNormalizer,
    MAE,
    QuantileLoss
)

In [None]:
# --------------------
data = pd.read_csv('../data/50area_dummy_processed.csv', dtype={"evse_name": str, "station_location": str})

# UNIX timestamp → datetime (초 단위)
data["connection_start_time_ts"] = pd.to_datetime(data["connection_start_time_ts"], unit='s', errors='coerce')

# 결측 및 이상치 제거
data = data.dropna(subset=["connection_start_time_ts", "evse_name", "station_location"])
data = data[data["connection_start_time_ts"] >= pd.Timestamp("2000-01-01")]

# 시간 인덱스 30분 단위 (초/1800)
data["time_idx_raw"] = ((data["connection_start_time_ts"] - data["connection_start_time_ts"].min()).dt.total_seconds() // 1800).astype(int)

# 조합별 모든 time_idx 채우기 (Missing timestep 처리)
group_cols = ["station_location", "evse_name"]
unique_groups = data[group_cols].drop_duplicates()

# time_idx 전체 범위
all_time_idx = np.arange(data["time_idx_raw"].min(), data["time_idx_raw"].max() + 1)

# 모든 group × 모든 time_idx 조합 생성
full_index = pd.DataFrame(
    [(loc, evse, t) for loc, evse in unique_groups.values for t in all_time_idx],
    columns=group_cols + ["time_idx_raw"]
)

# 기존 데이터와 조인하여 결측 timestep 채우기
data_full = pd.merge(full_index, data, on=group_cols + ["time_idx_raw"], how='left')

# 연속 인덱스 재설정 (group 별 0부터 순차적)
data_full = data_full.sort_values(group_cols + ["time_idx_raw"])
data_full["time_idx"] = data_full.groupby(group_cols).cumcount()

# ---------------------------------------------------
# 여기부터 논리적 보간 처리 추가 시작
sort_cols = group_cols + ["time_idx_raw"]
data_full = data_full.sort_values(sort_cols)

# last_charge_end_time_ts: 이전 charging_end_time_ts 로 보간 (shift)
if "charging_end_time_ts" in data_full.columns and "last_charge_end_time_ts" in data_full.columns:
    data_full["last_charge_end_time_ts_filled"] = data_full.groupby(group_cols)["charging_end_time_ts"].shift(1)
    data_full["last_charge_end_time_ts"] = data_full["last_charge_end_time_ts"].combine_first(data_full["last_charge_end_time_ts_filled"])
    data_full.drop(columns=["last_charge_end_time_ts_filled"], inplace=True)

# charging_end_time_ts, connection_end_time_ts, expected_departure_time_ts, idle_time_ts : forward fill
for col in ["charging_end_time_ts", "connection_end_time_ts", "expected_departure_time_ts", "idle_time_ts"]:
    if col in data_full.columns:
        data_full[col] = data_full.groupby(group_cols)[col].ffill()
# ---------------------------------------------------

# 타입 지정
data_full["weekday"] = data_full["connection_start_time_ts"].dt.weekday.astype(str).astype("category")
data_full["month"] = data_full["connection_start_time_ts"].dt.month.astype(str).astype("category")
data_full["evse_type"] = data_full["evse_type"].astype(str).astype("category")
data_full["supports_discharge"] = data_full["supports_discharge"].astype(str).astype("category")

# 추가 feature
data_full["log_volume"] = np.log(data_full["requested_kwh"].fillna(0) + 1e-8)
data_full["avg_volume_by_evsename"] = data_full.groupby(group_cols, observed=True)["requested_kwh"].transform("mean")

# 결측치 처리 (fillna -1)
fill_minus_one_cols = ["actual_charging_duration_missing", "start_delay_duration_missing", "post_charge_departure_delay_missing"]
for col in fill_minus_one_cols:
    if col in data_full.columns:
        data_full[col] = data_full[col].fillna(-1)

missing_cols = [
    "charging_end_time_ts", "last_charge_end_time_ts", "connection_end_time_ts", 
    "expected_departure_time_ts", "idle_time_ts", "expected_usage_duration_ts", 
    "expected_time_diff_ts", "actual_usage_duration_ts", "actual_charging_duration_ts", 
    "start_delay_duration_ts", "post_charge_departure_delay_ts", 
    "usage_departure_time_diff_ts","expected_time_diff_missing"
]
for col in missing_cols:
    if col in data_full.columns:
        data_full[col] = data_full[col].fillna(-1)

# 타겟 및 필수 필드 NaN 행 제거
data_full = data_full.dropna(subset=["requested_kwh", "station_location", "evse_name", "time_idx"])

# 최대 인덱스 및 학습/검증 분할 기준
max_prediction_length = 6
max_encoder_length = 24
training_cutoff = data_full["time_idx"].max() - max_prediction_length
# --------------------
# TimeSeriesDataSet 생성
# --------------------
training = TimeSeriesDataSet(
    data_full[lambda x: x.time_idx <= training_cutoff],
    time_idx="time_idx",
    target="requested_kwh",
    group_ids=group_cols,
    min_encoder_length=max_encoder_length // 2,
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=["station_location", "evse_name", "evse_type", "supports_discharge"],
    static_reals=["usage_departure_range", "post_charge_departure_range", "cluster"] if "usage_departure_range" in data_full.columns else [],
    time_varying_known_categoricals=["month", "weekday"],
    time_varying_known_reals=[],
    time_varying_unknown_reals=[
        col for col in [
            "last_charge_end_time_ts", "charging_end_time_ts", "connection_end_time_ts", "expected_departure_time_ts",
            "expected_departure_time_missing", "idle_time_ts", "expected_usage_duration_ts", "expected_usage_duration_missing",
            "expected_time_diff_ts", "expected_time_diff_missing", "actual_usage_duration_ts", "actual_charging_duration_ts",
            "actual_charging_duration_missing", "start_delay_duration_ts", "start_delay_duration_missing",
            "post_charge_departure_delay_ts", "post_charge_departure_delay_missing",
            "usage_departure_time_diff_ts", "usage_departure_time_diff_missing",
            "delivered_kwh", "requested_kwh", "kwh_request_diff", "kwh_per_usage_time"
        ] if col in data_full.columns
    ],
    target_normalizer=GroupNormalizer(groups=group_cols, transformation="softplus"),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    allow_missing_timesteps=True,
)

[np.int64(0)]
int64
time_idx            0
requested_kwh       0
station_location    0
evse_name           0
dtype: int64


In [11]:
# 모델 구조 정의
model = TimeSeriesDataSet(
    data_full[lambda x: x.time_idx <= training_cutoff],
    time_idx="time_idx",
    target="requested_kwh",
    group_ids=["station_location", "evse_name"],
    min_encoder_length=max_encoder_length // 2,
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=["station_location", "evse_name", "evse_type", "supports_discharge"],
    static_reals=["usage_departure_range", "post_charge_departure_range", "cluster"],
    time_varying_known_categoricals=["month", "weekday"],
    time_varying_known_reals=[],
    time_varying_unknown_reals=[
        "last_charge_end_time_ts", "charging_end_time_ts", "connection_end_time_ts", "expected_departure_time_ts",
        "expected_departure_time_missing", "idle_time_ts", "expected_usage_duration_ts", "expected_usage_duration_missing",
        "expected_time_diff_ts", "expected_time_diff_missing", "actual_usage_duration_ts", "actual_charging_duration_ts",
        "actual_charging_duration_missing", "start_delay_duration_ts", "start_delay_duration_missing",
        "post_charge_departure_delay_ts", "post_charge_departure_delay_missing",
        "usage_departure_time_diff_ts", "usage_departure_time_diff_missing",
        "delivered_kwh", "requested_kwh", "kwh_request_diff", "kwh_per_usage_time"
    ],
    target_normalizer=GroupNormalizer(groups=["station_location", "evse_name"], transformation="softplus"),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    allow_missing_timesteps=True
)

# 체크포인트 파일 로드
model = TemporalFusionTransformer.load_from_checkpoint("epoch=0-step=242.ckpt")
model.eval()

TemporalFusionTransformer(
  	"attention_head_size":               2
  	"categorical_groups":                {}
  	"causal_attention":                  True
  	"dataset_parameters":                {'time_idx': 'time_idx', 'target': 'requested_kwh', 'group_ids': ['station_location', 'evse_name'], 'weight': None, 'max_encoder_length': 24, 'min_encoder_length': 12, 'min_prediction_idx': np.int64(0), 'min_prediction_length': 1, 'max_prediction_length': 6, 'static_categoricals': ['station_location', 'evse_name', 'evse_type', 'supports_discharge'], 'static_reals': ['usage_departure_range', 'post_charge_departure_range', 'cluster'], 'time_varying_known_categoricals': ['month', 'weekday'], 'time_varying_known_reals': [], 'time_varying_unknown_categoricals': None, 'time_varying_unknown_reals': ['last_charge_end_time_ts', 'charging_end_time_ts', 'connection_end_time_ts', 'expected_departure_time_ts', 'expected_departure_time_missing', 'idle_time_ts', 'expected_usage_duration_ts', 'expected_usage