# 特徴量テーブル閲覧ノートブック（JupyterLab）

このノートブックは `loto_feature_system_v2` プロジェクトの出力（`artifacts/` 配下の特徴量CSV）を読み込み、
テーブルの中身を確認するための最小セットです。

- 生成対象: `features_hist.csv`, `features_futr.csv`, `features_stat.csv`
- もしファイルが存在しない場合は、**オプション**で同梱のサンプルデータからパイプラインを実行して生成できます。


In [1]:
# パス設定（このノートブックはプロジェクト直下で動かす想定）
from pathlib import Path
import pandas as pd

BASE = Path('.').resolve()
ART = BASE / 'artifacts'
CFG = BASE / 'config' / 'pipeline_config.yaml'
SAMPLE = BASE / 'examples' / 'sample_loto.csv'

HIST = ART / 'features_hist.csv'
FUTR = ART / 'features_futr.csv'
STAT = ART / 'features_stat.csv'

display({
    "BASE": str(BASE),
    "ARTIFACTS_DIR": str(ART),
    "HIST": str(HIST),
    "FUTR": str(FUTR),
    "STAT": str(STAT),
})

{'BASE': '/mnt/e/env/ts/test/loto_feature_system_v2_with_notebook/notebooks',
 'ARTIFACTS_DIR': '/mnt/e/env/ts/test/loto_feature_system_v2_with_notebook/notebooks/artifacts',
 'HIST': '/mnt/e/env/ts/test/loto_feature_system_v2_with_notebook/notebooks/artifacts/features_hist.csv',
 'FUTR': '/mnt/e/env/ts/test/loto_feature_system_v2_with_notebook/notebooks/artifacts/features_futr.csv',
 'STAT': '/mnt/e/env/ts/test/loto_feature_system_v2_with_notebook/notebooks/artifacts/features_stat.csv'}

In [7]:
%cd /mnt/e/env/ts/test

/mnt/e/env/ts/test


In [10]:
!tree

[01;34m.[0m
├── Core-Nixtla.md
├── Explore_Original_LongTable.ipynb
├── Explore_Postgres_FeatureTables.ipynb
├── INSTALL.md
├── N4.csv
├── README.md
├── [01;34m__pycache__[0m
│   ├── db_config.cpython-311.pyc
│   ├── postgres_manager.cpython-311.pyc
│   └── postgres_manager.cpython-313.pyc
├── [01;34martifacts[0m
├── [01;34mbundles[0m
│   ├── 01_NeuralForecast.md
│   ├── 02_Ray.md
│   ├── 03_Optuna.md
│   ├── 03_optunareadthedocsio.md
│   ├── 04_MLflow.md
│   ├── 05_MLflow-Databricks.md
│   ├── 06_GitHub.md
│   ├── 07_Lightning.md
│   ├── https___nixtlaverse.md
│   └── パラメータ.md
├── [01;34mcache[0m
├── [01;34mconfig[0m
│   ├── db_config.yaml.template
│   ├── default_config.yaml
│   ├── logging_config.yaml
│   └── pipeline_config.yaml
├── [01;34mconfigs[0m
│   ├── CONFIG_DESIGN.md
│   ├── OVERVIEW.md
│   ├── README.md
│   ├── config_loader.py
│   ├── configs_README.md
│   ├── default_configs.yaml
│   ├── example_config_usage.py
│   └── model_characteristics.yaml
├── [01;34

In [9]:
# 必要に応じてパイプラインを実行（サンプル）
# run_if_missing=True の場合、features_*.csv が無ければ生成します。
run_if_missing = True

def _maybe_run_pipeline():
    import importlib, sys
    from pathlib import Path
    sys.path.insert(0, str(BASE))  # src を相対import
    try:
        from src.pipelines.loto_pipeline import run as run_pipeline
    except Exception as e:
        print("パイプラインのimportに失敗:", e)
        return
    ART.mkdir(parents=True, exist_ok=True)
    print("パイプライン実行中...")
    res = run_pipeline(str(SAMPLE), str(ART), str(CFG))
    print("生成完了:", res)

if run_if_missing and not (HIST.exists() and FUTR.exists() and STAT.exists()):
    _maybe_run_pipeline()
else:
    print("features_*.csv が見つかりました（または実行スキップ）。")

パイプラインのimportに失敗: No module named 'src.pipelines.loto_pipeline'


In [None]:
# 読み込み（存在チェック付き）
def _safe_read_csv(p: Path):
    if not p.exists():
        print(f"ないよ: {p}")
        return None
    try:
        df = pd.read_csv(p)
    except Exception as e:
        print(f"読込失敗: {p} -> {e}")
        return None
    return df

df_hist = _safe_read_csv(HIST)
df_futr = _safe_read_csv(FUTR)
df_stat = _safe_read_csv(STAT)

for name, df in [("hist", df_hist), ("futr", df_futr), ("stat", df_stat)]:
    if df is not None:
        print(f"{name}: shape={df.shape}")
        display(df.head(10))

In [None]:
# ユーティリティ: フィルタ & プレビュー
import pandas as pd

def preview_hist(unique_id=None, start=None, end=None, cols=None, head=20):
    if df_hist is None:
        print("df_hist がありません")
        return None
    out = df_hist.copy()
    if unique_id is not None:
        out = out[out['unique_id'] == unique_id]
    if start is not None:
        out = out[out['ds'] >= str(start)]
    if end is not None:
        out = out[out['ds'] <= str(end)]
    if cols is not None:
        keep = ['unique_id','ds'] + [c for c in cols if c in out.columns]
        out = out[keep]
    display(out.head(head))
    return out

# 例: 指定系列の先頭20行
_ = preview_hist(unique_id='loto6', head=20)

In [None]:
# 結合ビュー（左結合でざっくり確認）
def merged_view(limit=50):
    if (df_hist is None) or (df_futr is None) or (df_stat is None):
        print("いずれかのテーブルがありません")
        return None
    m = df_hist.merge(df_futr, on=['unique_id','ds'], how='left')\
               .merge(df_stat, on=['unique_id'], how='left')
    display(m.head(limit))
    return m

_ = merged_view(30)

In [None]:
# 簡単な統計・欠損プロファイル
def profile(df, name):
    if df is None:
        return
    print(f"[{name}] shape={df.shape}")
    display(pd.DataFrame({
        "dtype": df.dtypes.astype(str),
        "n_unique": df.nunique(),
        "n_missing": df.isna().sum(),
    }))

profile(df_hist, "hist")
profile(df_futr, "futr")
profile(df_stat, "stat")

In [None]:
# （任意）可視化例: 任意列の折れ線
# 実行時に 'unique_id' と列名 'col' を指定してください。
import matplotlib.pyplot as plt

def plot_series(unique_id='loto6', col='y', head=None):
    if df_hist is None:
        print("df_hist がありません")
        return
    d = df_hist[df_hist['unique_id'] == unique_id].copy()
    if head is not None:
        d = d.head(head)
    # ds を日付に
    try:
        d['ds'] = pd.to_datetime(d['ds'])
    except Exception:
        pass
    d = d.sort_values('ds')
    if col not in d.columns:
        print(f"列がありません: {col}")
        return
    plt.figure()
    plt.plot(d['ds'], d[col])
    plt.title(f"{unique_id} - {col}")
    plt.xlabel("ds")
    plt.ylabel(col)
    plt.tight_layout()
    plt.show()

# plot_series('loto6', 'y')  # 必要ならコメントアウトを外して実行