In [16]:
import pandas as pd
import numpy as np
import warnings
import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
from lightning.pytorch.loggers import TensorBoardLogger
from pytorch_forecasting import (
    TemporalFusionTransformer,
    TimeSeriesDataSet,
    GroupNormalizer,
    MAE,
    QuantileLoss
)


best_model_path = "epoch=14-step=1275.ckpt"  # 실제 경로로 변경
best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
group_cols = ["station_location", "evse_name"]

FileNotFoundError: [Errno 2] No such file or directory: 'c:/CODE/401_python/Ev_my/data/epoch=14-step=1275.ckpt'

In [None]:
data = pd.read_csv('../data/50area_dummy_processed.csv', dtype={"evse_name": str, "station_location": str})

# UNIX timestamp → datetime (초 단위)
data["last_charge_end_time_ts"] = pd.to_datetime(data["last_charge_end_time_ts"], unit='s', errors='coerce')

# 결측 및 이상치 제거
data = data.dropna(subset=["last_charge_end_time_ts", "evse_name", "station_location"])
data = data[data["last_charge_end_time_ts"] >= pd.Timestamp("2000-01-01")]

# 시간 인덱스 30분 단위 (초/1800)
data["time_idx_raw"] = ((data["connection_start_time_ts"] - data["connection_start_time_ts"].min()).dt.total_seconds() // 1800).astype(int)

# 조합별 모든 time_idx 채우기 (Missing timestep 처리)
group_cols = ["station_location", "evse_name"]
unique_groups = data[group_cols].drop_duplicates()

# time_idx 전체 범위
all_time_idx = np.arange(data["time_idx_raw"].min(), data["time_idx_raw"].max() + 1)

# 모든 group × 모든 time_idx 조합 생성
full_index = pd.DataFrame(
    [(loc, evse, t) for loc, evse in unique_groups.values for t in all_time_idx],
    columns=group_cols + ["time_idx_raw"]
)

# 기존 데이터와 조인하여 결측 timestep 채우기
data_full = pd.merge(full_index, data, on=group_cols + ["time_idx_raw"], how='left')

# 연속 인덱스 재설정 (group 별 0부터 순차적)
data_full = data_full.sort_values(group_cols + ["time_idx_raw"])
data_full["time_idx"] = data_full.groupby(group_cols).cumcount()


data_full = fill_nearest_within_range(
    data_full,
    group_cols=['station_location', 'evse_name'],
    target_col='last_charge_end_time_ts',
    window=100
)

# charging_end_time_ts, connection_end_time_ts, expected_departure_time_ts, idle_time_ts : forward fill
for col in ["charging_end_time_ts", "connection_end_time_ts", "expected_departure_time_ts", "idle_time_ts"]:
    if col in data_full.columns:
        data_full[col] = data_full.groupby(group_cols)[col].ffill()
# ---------------------------------------------------

# 타입 지정
data_full["weekday"] = data_full["connection_start_time_ts"].dt.weekday.astype(str).astype("category")
data_full["month"] = data_full["connection_start_time_ts"].dt.month.astype(str).astype("category")
data_full["evse_type"] = data_full["evse_type"].astype(str).astype("category")
data_full["supports_discharge"] = data_full["supports_discharge"].astype(str).astype("category")

# 추가 feature
data_full["log_volume"] = np.log(data_full["requested_kwh"].fillna(0) + 1e-8)
data_full["avg_volume_by_evsename"] = data_full.groupby(group_cols, observed=True)["requested_kwh"].transform("mean")

missing_cols = [
    "actual_charging_duration_missing", "start_delay_duration_missing", "post_charge_departure_delay_missing",
    "charging_end_time_ts", "last_charge_end_time_ts", "connection_end_time_ts", 
    "expected_departure_time_ts", "idle_time_ts", "expected_usage_duration_ts", 
    "expected_time_diff_ts", "actual_usage_duration_ts", "actual_charging_duration_ts", 
    "start_delay_duration_ts", "post_charge_departure_delay_ts", 
    "usage_departure_time_diff_ts","expected_time_diff_missing"
]
for col in missing_cols:
    if col in data_full.columns:
        data_full[col] = data_full[col].fillna(0)

# 타겟 및 필수 필드 NaN 행 제거
data_full = data_full.dropna(subset=["requested_kwh", "station_location", "evse_name", "time_idx"])

# 최대 인덱스 및 학습/검증 분할 기준
max_prediction_length = 6
max_encoder_length = 24
training_cutoff = data_full["time_idx"].max() - max_prediction_length
# --------------------
# TimeSeriesDataSet 생성
# --------------------
training = TimeSeriesDataSet(
    data_full[lambda x: x.time_idx <= training_cutoff],
    time_idx="time_idx",
    target="requested_kwh",
    group_ids=group_cols,
    min_encoder_length=max_encoder_length // 2,
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=["station_location", "evse_name", "evse_type", "supports_discharge"],
    static_reals=["usage_departure_range", "post_charge_departure_range", "cluster"] if "usage_departure_range" in data_full.columns else [],
    time_varying_known_categoricals=["month", "weekday"],
    time_varying_known_reals=[],
    time_varying_unknown_reals=[
        col for col in [
            "connection_start_time_ts","last_charge_end_time_ts", "charging_end_time_ts", "connection_end_time_ts", "expected_departure_time_ts",
            "expected_departure_time_missing", "idle_time_ts", "expected_usage_duration_ts", "expected_usage_duration_missing",
            "expected_time_diff_ts", "expected_time_diff_missing", "actual_usage_duration_ts", "actual_charging_duration_ts",
            "actual_charging_duration_missing", "start_delay_duration_ts", "start_delay_duration_missing",
            "post_charge_departure_delay_ts", "post_charge_departure_delay_missing",
            "usage_departure_time_diff_ts", "usage_departure_time_diff_missing",
            "delivered_kwh", "requested_kwh", "kwh_request_diff", "kwh_per_usage_time"
        ] if col in data_full.columns
    ],
    target_normalizer=GroupNormalizer(groups=group_cols, transformation="softplus"),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    allow_missing_timesteps=True,
)

최소 time_idx: 0
최대 time_idx: 18294
전체 기간(시간 단위): 9147.408333333333
post_charge_departure_range NaN 개수: 18600238
post_charge_departure_range 무한대 개수: 0


In [7]:
# group_cols = ["station_location", "evse_name"]
# max_continuous_lengths = []

# for _, grp in data_full.groupby(group_cols):
#     idx = grp["time_idx"].sort_values()
#     diff = idx.diff()
#     # 연속 구간 마커
#     breaks = diff[diff != 1].index
#     # 구간별 길이 계산
#     starts = [0] + list(breaks)
#     ends = list(breaks) + [len(idx)]
#     max_run = 0
#     for s, e in zip(starts, ends):
#         run_length = e - s
#         if run_length > max_run:
#             max_run = run_length
#     max_continuous_lengths.append(max_run)

# import numpy as np
# print("그룹별 최대 연속 길이 예시:", np.percentile(max_continuous_lengths, [10, 50, 90]))


In [8]:
# --------------------
# TimeSeriesDataSet 생성
# --------------------
training = TimeSeriesDataSet(
    data_full[lambda x: x.time_idx <= training_cutoff],
    time_idx="time_idx",
    target="requested_kwh",
    group_ids=group_cols,
    min_encoder_length=max_encoder_length // 2,
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=["station_location", "evse_name", "evse_type", "supports_discharge"],
    static_reals=["usage_departure_range", "post_charge_departure_range", "cluster"] if "usage_departure_range" in data_full.columns else [],
    time_varying_known_categoricals=["month", "weekday"],
    time_varying_known_reals=[],
    time_varying_unknown_reals=[
        col for col in [
            "last_charge_end_time_ts", "charging_end_time_ts", "connection_end_time_ts", "expected_departure_time_ts",
            "expected_departure_time_missing", "idle_time_ts", "expected_usage_duration_ts", "expected_usage_duration_missing",
            "expected_time_diff_ts", "expected_time_diff_missing", "actual_usage_duration_ts", "actual_charging_duration_ts",
            "actual_charging_duration_missing", "start_delay_duration_ts", "start_delay_duration_missing",
            "post_charge_departure_delay_ts", "post_charge_departure_delay_missing",
            "usage_departure_time_diff_ts", "usage_departure_time_diff_missing",
            "delivered_kwh", "requested_kwh", "kwh_request_diff", "kwh_per_usage_time"
        ] if col in data_full.columns
    ],
    target_normalizer=GroupNormalizer(groups=group_cols, transformation="softplus"),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    allow_missing_timesteps=True,
)


In [9]:
min_encoder_time_idx = training_cutoff - max_encoder_length + 1  # encoder 구간 최소 시작 index
future_data = data_full[data_full.time_idx >= min_encoder_time_idx]


In [13]:
validation = TimeSeriesDataSet.from_dataset(training, future_data, predict=True, stop_randomization=True)
val_dataloader = validation.to_dataloader(train=False, batch_size=64)


In [14]:
print(data_full.columns)  # 라벨 고유값 확인

Index(['station_location', 'evse_name', 'time_idx_raw',
       'last_charge_end_time_ts', 'connection_start_time_ts',
       'charging_start_time_ts', 'charging_start_time_missing',
       'charging_end_time_ts', 'charging_end_time_missing',
       'connection_end_time_ts', 'expected_departure_time_ts',
       'expected_departure_time_missing', 'idle_time_ts',
       'expected_usage_duration_ts', 'expected_usage_duration_missing',
       'expected_time_diff_ts', 'expected_time_diff_missing',
       'actual_usage_duration_ts', 'actual_charging_duration_ts',
       'actual_charging_duration_missing', 'start_delay_duration_ts',
       'start_delay_duration_missing', 'post_charge_departure_delay_ts',
       'post_charge_departure_delay_missing', 'usage_departure_time_diff_ts',
       'usage_departure_time_diff_missing', 'duration_per_kwh_ts',
       'duration_per_kwh_missing', 'delivered_kwh', 'requested_kwh',
       'kwh_request_diff', 'kwh_per_usage_time', 'kwh_per_usage_time_missing',
 

In [15]:
predictions = best_tft.predict(val_dataloader)


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
original_data = future_data.copy()
import pandas as pd

pred_array = predictions.detach().cpu().numpy()

pred_df = original_data.iloc[:pred_array.shape[0]].copy()
pred_df["predicted_requested_kwh"] = pred_array[:, 0]

pred_df.to_csv("predictions.csv", index=False)
