In [2]:
%load_ext autoreload
%autoreload 2

# Experimental 005

## Setup

In [1]:
import ctypes
import os
import site
# nvidia pip packages内のlibcusparseLtをプリロード（PyTorch GPU版用）
_site_packages = site.getsitepackages()[0]
_cusparselt_so = os.path.join(_site_packages, "nvidia", "cusparselt", "lib", "libcusparseLt.so.0")
if os.path.isfile(_cusparselt_so):
    ctypes.cdll.LoadLibrary(_cusparselt_so)

from multiprocessing import process
from athena_analyze.data.processor import DataProcessor
from utils.logging import setup_logging
from utils.plotter import Plotter
from utils.config import load_config_section, load_config

_log = setup_logging()
data_cfg = load_config_section("../config/config.yml", "data")
processor = DataProcessor(data_folder=data_cfg["raw"])
dfs = []
dfs.append(processor.load_data("ETTh1.csv"))
dfs.append(processor.load_data("ETTh2.csv"))
exp_name = "exp_005"
plotter = Plotter(f"../reports/figures/{exp_name}")

general_cfg_path = "../config/config.yml"
exp_cfg_path = f"../config/{exp_name}.yml"


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/Users/teramoto/dev/github/YHTR0257/athena-assignment/.venv/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/Users/teramoto/dev/github/YHTR0257/athena-assignment/.venv/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/Users/teramoto/dev/github/YHTR0257/athena-assignment/.venv/lib/py

2026-02-09 10:10:15,046 - athena-assignment - DEBUG - DataProcessor initialized with data folder: ../data/raw
2026-02-09 10:10:15,123 - athena-assignment - DEBUG - Plotter initialized with output folder: ../reports/figures/exp_005


# Preprocess

MSTLを用いて周期性の分解を行う。

## EDA

exp_001のデータがどのような周期性を持っているかを確認する。

In [3]:
from anyio import Path
import pandas as pd
from athena_analyze.eda.analyzer import analyze_periodicity, compute_acf, compute_pacf
from athena_analyze.eda.visualize import plot_acf, plot_pacf, plot_power_spectrum

for h in ["h1"]:
    train_df = processor.load_data(f"exp_005/train_{h}.parquet", data_folder=Path("../data/experiment/"))
    test_df = processor.load_data(f"exp_005/test_{h}.parquet", data_folder=Path("../data/experiment/"))
    predict_df = processor.load_data(f"exp_005/predictions_tft_{h}.parquet", data_folder=Path("../data/experiment/"))
    # plot_df = pd.concat([train_df, test_df])
    plot_df = train_df.copy()

    cols = ["OT", "HUFL", "HULL", "MUFL", "MULL", "LUFL", "LULL"]
    results = {}

    for c in cols:
        _log.info(f"Analyzing column: {c}")
        _c = f"stl_{c}_resid"
        periods_df, power_df = analyze_periodicity(plot_df, max_period=2500, fs=1, target_col=_c)
        results[c] = {"periods" : periods_df, "power": power_df}
        acf_df = compute_acf(plot_df, target_col=_c)
        pacf_df = compute_pacf(plot_df, target_col=_c)
        results[c]["acf"] = acf_df
        results[c]["pacf"] = pacf_df
    for c in cols:
        _log.info(f"Top periods for column: {c}")
        acf_fig, _ = plot_acf(results[c]["acf"])
        pacf_fig, _ = plot_pacf(results[c]["pacf"])
        power_fig, _ = plot_power_spectrum(results[c]["power"])
        plotter.save_plot(acf_fig, f"{h}_{c.lower()}_acf.png")
        plotter.save_plot(pacf_fig, f"{h}_{c.lower()}_pacf.png")
        plotter.save_plot(power_fig, f"{h}_{c.lower()}_power_spectrum.png")
        del acf_fig, pacf_fig, power_fig

  if not file_path.exists():
  if not file_path.exists():
  if not file_path.exists():


2026-02-09 10:10:18,466 - athena-assignment - INFO - Analyzing column: OT
2026-02-09 10:10:18,471 - athena-assignment - DEBUG - Analyzing periodicity for column: stl_OT_resid with fs=1 and max_period=2500 (data length: 12934)
2026-02-09 10:10:18,486 - athena-assignment - INFO - Detected 5 significant periods for stl_OT_resid
2026-02-09 10:10:18,486 - athena-assignment - DEBUG - Top period: 1175.8 (relative power: 1.000)
2026-02-09 10:10:18,487 - athena-assignment - DEBUG - Computing ACF for column: stl_OT_resid with max_lag=6467 (data length: 12934)
2026-02-09 10:10:18,491 - athena-assignment - DEBUG - Computing PACF for column: stl_OT_resid with max_lag=1000 (data length: 12934)
2026-02-09 10:10:28,695 - athena-assignment - INFO - Analyzing column: HUFL
2026-02-09 10:10:28,698 - athena-assignment - DEBUG - Analyzing periodicity for column: stl_HUFL_resid with fs=1 and max_period=2500 (data length: 12934)
2026-02-09 10:10:28,705 - athena-assignment - INFO - Detected 5 significant perio

In [13]:
test_df.head()

Unnamed: 0,date,HUFL,HULL,MUFL,MULL,LUFL,LULL,OT,year,month,...,MULL_ma_72,MULL_ma_168,sin_24,cos_24,sin_168,cos_168,sin_2160,cos_2160,sin_8760,cos_8760
0,2018-02-01 14:00:00,-2.344374,0.473496,-2.560091,0.312154,0.52085,0.602501,-0.952773,2.295863,-1.465244,...,0.606818,0.370759,-2.280865,-1.130131,-1.755178,-0.946015,1.701318,-1.736581,1.731274,-1.74156
1,2018-02-01 15:00:00,-0.26158,0.408365,-0.514791,0.36917,0.833308,0.458515,-1.214097,2.295863,-1.465244,...,0.610948,0.369199,-2.281533,-1.12998,-1.755461,-0.94594,1.70158,-1.736859,1.73155,-1.741841
2,2018-02-01 16:00:00,-0.613844,0.05014,-0.861403,0.29279,0.963072,0.21643,-1.229853,2.295863,-1.465244,...,0.614765,0.367072,-2.282035,-1.129866,-1.755674,-0.945884,1.701776,-1.737068,1.731757,-1.742051
3,2018-02-01 17:00:00,0.878287,0.310667,0.610099,0.33098,1.482983,0.023395,-1.253599,2.295863,-1.465244,...,0.620812,0.366929,-2.282703,-1.129714,-1.755957,-0.945809,1.702038,-1.737346,1.732032,-1.742332
4,2018-02-01 18:00:00,1.23055,0.408365,0.8942,0.14003,1.925206,0.168962,-1.229853,2.295863,-1.465244,...,0.625587,0.366646,-2.283372,-1.129562,-1.756241,-0.945735,1.702299,-1.737625,1.732308,-1.742613


In [None]:
from athena_analyze.eda.visualize import plot_time_series, plot_pairplot

for h in ["h1"]:
    train_df = processor.load_data(f"exp_005/train_{h}.parquet", data_folder=Path("../data/experiment/"))
    test_df = processor.load_data(f"exp_005/test_{h}.parquet", data_folder=Path("../data/experiment/"))
    predict_df = processor.load_data(f"exp_005/predictions_tft_{h}.parquet", data_folder=Path("../data/experiment/"))
    test_df["stl_OT_pred"] = predict_df["predicted"]
    
    train_end_date = train_df["date"].max()

    cols = ["OT", "HUFL", "HULL", "MUFL", "MULL", "LUFL", "LULL"]
    for c in cols:
        pair_cols = [col for col in train_df.columns if col.startswith(f"stl_{c}")]
        pair_cols = pair_cols + ["stl_OT_resid", "stl_OT_trend"]
        pair_cols = list(set(pair_cols))
        display(train_df[pair_cols].dtypes)
        display(train_df[pair_cols].shape)
        pair_fig = plot_pairplot(train_df[pair_cols], figsize=(8,8))
        plotter.save_plot(pair_fig, f"{h}_{c.lower()}_pairplot.png")
    
    plot_df = pd.concat([train_df, test_df])

    for c in cols:
        _cols = [_col for _col in plot_df.columns if "stl" in _col and (c in _col)]
        plot_cols = [_col for _col in _cols if "trend" in _col or "resid" in _col or _col == c]
        plot_cols = plot_cols + [c]
        fig = plot_time_series(df=plot_df, date_col="date", value_cols=plot_cols, v_lines=[train_end_date])
        plotter.save_plot(fig, f"{h}_{c.lower()}_time.png")
        plot_cols = [_col for _col in _cols if "seasonal" in _col]
        fig = plot_time_series(df=plot_df, date_col="date", value_cols=plot_cols, v_lines=[train_end_date])
        plotter.save_plot(fig, f"{h}_{c.lower()}_seasonal_time.png")

  if not file_path.exists():
  if not file_path.exists():
  if not file_path.exists():


stl_OT_trend            float64
stl_OT_seasonal_24      float64
stl_OT_seasonal_504     float64
stl_OT_seasonal_720     float64
stl_OT_seasonal_2160    float64
stl_OT_resid            float64
stl_OT_resid            float64
stl_OT_trend            float64
dtype: object

(12934, 8)

2026-02-09 12:21:51,594 - athena-assignment - DEBUG - Creating pairplot for variables: all numeric columns
2026-02-09 12:21:51,595 - athena-assignment - DEBUG - Auto-selected numeric columns: ['stl_OT_trend', 'stl_OT_seasonal_24', 'stl_OT_seasonal_504', 'stl_OT_seasonal_720', 'stl_OT_seasonal_2160', 'stl_OT_resid', 'stl_OT_resid', 'stl_OT_trend']
2026-02-09 12:21:51,598 - athena-assignment - DEBUG - Plotting 12934 rows with 12 columns
2026-02-09 12:21:51,600 - athena-assignment - ERROR - Error creating pairplot: Data must be 1-dimensional, got ndarray of shape (12934, 4) instead


ValueError: Data must be 1-dimensional, got ndarray of shape (12934, 4) instead