# 特徴量テーブル閲覧ノートブック v2（自動ルート検出）

このノートブックは、**どのディレクトリから起動しても** プロジェクトルートを自動検出し、
`src.pipelines.loto_pipeline` を確実に import できるように改良されています。

- 生成対象: `artifacts/features_hist.csv`, `features_futr.csv`, `features_stat.csv`
- 未生成の場合は、同梱のサンプル（`examples/sample_loto.csv`）から自動実行します。


In [1]:
# ルート自動検出（上位へ遡り、'src' と 'config/pipeline_config.yaml' がある場所を探す）
from pathlib import Path
import sys

def find_project_root(start: Path, max_up: int = 6):
    p = start.resolve()
    for _ in range(max_up + 1):
        if (p / "src").is_dir() and (p / "config" / "pipeline_config.yaml").exists():
            return p
        p = p.parent
    return None

# Jupyter の CWD を起点に検索
CWD = Path.cwd()
ROOT = find_project_root(CWD)

if ROOT is None:
    # ノートブックが同梱された配布物想定のフォールバック
    # （このノートブックの親→親をルート候補とする）
    ROOT = Path(".").resolve().parent

print("Detected ROOT:", ROOT)

# import パスに追加（'src' を含むディレクトリを追加）
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

Detected ROOT: /mnt/e/env/ts/test


In [2]:
# 主要パス
from pathlib import Path

BASE = ROOT
ART = BASE / 'artifacts'
CFG = BASE / 'config' / 'pipeline_config.yaml'
SAMPLE = BASE / 'examples' / 'sample_loto.csv'

HIST = ART / 'features_hist.csv'
FUTR = ART / 'features_futr.csv'
STAT = ART / 'features_stat.csv'

print("BASE:", BASE)
print("ART:", ART)
print("CFG:", CFG)
print("SAMPLE:", SAMPLE)

BASE: /mnt/e/env/ts/test
ART: /mnt/e/env/ts/test/artifacts
CFG: /mnt/e/env/ts/test/config/pipeline_config.yaml
SAMPLE: /mnt/e/env/ts/test/examples/sample_loto.csv


In [3]:
# 必要に応じてパイプラインを実行（サンプル）
run_if_missing = True

def _maybe_run_pipeline():
    try:
        from src.pipelines.loto_pipeline import run as run_pipeline
    except Exception as e:
        print("パイプラインのimportに失敗:", e)
        # デバッグ情報
        import sys, os
        print("sys.path[0:5]:", sys.path[:5])
        print("ディレクトリ一覧（ROOT直下）:", os.listdir(str(BASE)))
        return
    ART.mkdir(parents=True, exist_ok=True)
    print("パイプライン実行中...")
    res = run_pipeline(str(SAMPLE), str(ART), str(CFG))
    print("生成完了:", res)

if run_if_missing and not (HIST.exists() and FUTR.exists() and STAT.exists()):
    _maybe_run_pipeline()
else:
    print("features_*.csv が見つかりました（または実行スキップ）。")

パイプライン実行中...
生成完了: {'hist': PosixPath('/mnt/e/env/ts/test/artifacts/features_hist.csv'), 'futr': PosixPath('/mnt/e/env/ts/test/artifacts/features_futr.csv'), 'stat': PosixPath('/mnt/e/env/ts/test/artifacts/features_stat.csv')}


In [4]:
# 読み込み＆プレビュー
import pandas as pd

def _safe_read_csv(p: Path):
    if not p.exists():
        print(f"ないよ: {p}")
        return None
    try:
        return pd.read_csv(p)
    except Exception as e:
        print(f"読込失敗: {p} -> {e}")
        return None

df_hist = _safe_read_csv(HIST)
df_futr = _safe_read_csv(FUTR)
df_stat = _safe_read_csv(STAT)

for name, df in [("hist", df_hist), ("futr", df_futr), ("stat", df_stat)]:
    if df is not None:
        print(f"{name}: shape={df.shape}")
        display(df.head(10))

hist: shape=(240, 13)


Unnamed: 0,unique_id,ds,y,hist_lag_1,hist_lag_7,hist_lag_14,hist_lag_30,hist_rollmean_7,hist_rollmean_30,hist_rollstd_7,hist_rollstd_30,hist_diff_1,hist_diff_2
0,loto6,2024-01-01,101.49,,,,,,,,,,
1,loto6,2024-01-02,107.4,101.49,,,,,,,,5.91,
2,loto6,2024-01-03,111.69,107.4,,,,106.86,,5.121396,,4.29,10.2
3,loto6,2024-01-04,108.91,111.69,,,,107.3725,,4.305395,,-2.78,1.51
4,loto6,2024-01-05,94.96,108.91,,,,104.89,,6.687028,,-13.95,-16.73
5,loto6,2024-01-06,89.55,94.96,,,,102.333333,,8.659812,,-5.41,-19.36
6,loto6,2024-01-07,96.92,89.55,,,,101.56,,8.165778,,7.37,1.96
7,loto6,2024-01-08,102.3,96.92,101.49,,,101.675714,,8.170359,,5.38,12.75
8,loto6,2024-01-09,106.41,102.3,107.4,,,101.534286,,8.062615,,4.11,9.49
9,loto6,2024-01-10,111.38,106.41,111.69,,,101.49,,7.998129,,4.97,9.08


futr: shape=(240, 10)


Unnamed: 0,unique_id,ds,year,month,day,weekday,week,quarter,is_month_start,is_month_end
0,loto6,2024-01-01,2024,1,1,0,1,1,1,0
1,loto6,2024-01-02,2024,1,2,1,1,1,0,0
2,loto6,2024-01-03,2024,1,3,2,1,1,0,0
3,loto6,2024-01-04,2024,1,4,3,1,1,0,0
4,loto6,2024-01-05,2024,1,5,4,1,1,0,0
5,loto6,2024-01-06,2024,1,6,5,1,1,0,0
6,loto6,2024-01-07,2024,1,7,6,1,1,0,0
7,loto6,2024-01-08,2024,1,8,0,2,1,0,0
8,loto6,2024-01-09,2024,1,9,1,2,1,0,0
9,loto6,2024-01-10,2024,1,10,2,2,1,0,0


stat: shape=(2, 2)


Unnamed: 0,unique_id,unique_id_len
0,loto6,5
1,loto7,5


In [5]:
# フィルタ/結合/プロファイル/可視化
import pandas as pd
import matplotlib.pyplot as plt

def preview_hist(unique_id=None, start=None, end=None, cols=None, head=20):
    if df_hist is None:
        print("df_hist がありません")
        return None
    out = df_hist.copy()
    if unique_id is not None:
        out = out[out['unique_id'] == unique_id]
    if start is not None:
        out = out[out['ds'] >= str(start)]
    if end is not None:
        out = out[out['ds'] <= str(end)]
    if cols is not None:
        keep = ['unique_id','ds'] + [c for c in cols if c in out.columns]
        out = out[keep]
    display(out.head(head))
    return out

def merged_view(limit=50):
    if (df_hist is None) or (df_futr is None) or (df_stat is None):
        print("いずれかのテーブルがありません")
        return None
    m = df_hist.merge(df_futr, on=['unique_id','ds'], how='left')\
               .merge(df_stat, on=['unique_id'], how='left')
    display(m.head(limit))
    return m

def profile(df, name):
    if df is None:
        return
    print(f"[{name}] shape={df.shape}")
    display(pd.DataFrame({
        "dtype": df.dtypes.astype(str),
        "n_unique": df.nunique(),
        "n_missing": df.isna().sum(),
    }))

def plot_series(unique_id='loto6', col='y', head=None):
    if df_hist is None:
        print("df_hist がありません")
        return
    d = df_hist[df_hist['unique_id'] == unique_id].copy()
    if head is not None:
        d = d.head(head)
    try:
        d['ds'] = pd.to_datetime(d['ds'])
    except Exception:
        pass
    d = d.sort_values('ds')
    if col not in d.columns:
        print(f"列がありません: {col}")
        return
    plt.figure()
    plt.plot(d['ds'], d[col])
    plt.title(f"{unique_id} - {col}")
    plt.xlabel("ds")
    plt.ylabel(col)
    plt.tight_layout()
    plt.show()

# 例
_ = preview_hist(unique_id='loto6', head=20)
_ = merged_view(30)
profile(df_hist, "hist")
profile(df_futr, "futr")
profile(df_stat, "stat")


Unnamed: 0,unique_id,ds,y,hist_lag_1,hist_lag_7,hist_lag_14,hist_lag_30,hist_rollmean_7,hist_rollmean_30,hist_rollstd_7,hist_rollstd_30,hist_diff_1,hist_diff_2
0,loto6,2024-01-01,101.49,,,,,,,,,,
1,loto6,2024-01-02,107.4,101.49,,,,,,,,5.91,
2,loto6,2024-01-03,111.69,107.4,,,,106.86,,5.121396,,4.29,10.2
3,loto6,2024-01-04,108.91,111.69,,,,107.3725,,4.305395,,-2.78,1.51
4,loto6,2024-01-05,94.96,108.91,,,,104.89,,6.687028,,-13.95,-16.73
5,loto6,2024-01-06,89.55,94.96,,,,102.333333,,8.659812,,-5.41,-19.36
6,loto6,2024-01-07,96.92,89.55,,,,101.56,,8.165778,,7.37,1.96
7,loto6,2024-01-08,102.3,96.92,101.49,,,101.675714,,8.170359,,5.38,12.75
8,loto6,2024-01-09,106.41,102.3,107.4,,,101.534286,,8.062615,,4.11,9.49
9,loto6,2024-01-10,111.38,106.41,111.69,,,101.49,,7.998129,,4.97,9.08


Unnamed: 0,unique_id,ds,y,hist_lag_1,hist_lag_7,hist_lag_14,hist_lag_30,hist_rollmean_7,hist_rollmean_30,hist_rollstd_7,...,hist_diff_2,year,month,day,weekday,week,quarter,is_month_start,is_month_end,unique_id_len
0,loto6,2024-01-01,101.49,,,,,,,,...,,2024,1,1,0,1,1,1,0,5
1,loto6,2024-01-02,107.4,101.49,,,,,,,...,,2024,1,2,1,1,1,0,0,5
2,loto6,2024-01-03,111.69,107.4,,,,106.86,,5.121396,...,10.2,2024,1,3,2,1,1,0,0,5
3,loto6,2024-01-04,108.91,111.69,,,,107.3725,,4.305395,...,1.51,2024,1,4,3,1,1,0,0,5
4,loto6,2024-01-05,94.96,108.91,,,,104.89,,6.687028,...,-16.73,2024,1,5,4,1,1,0,0,5
5,loto6,2024-01-06,89.55,94.96,,,,102.333333,,8.659812,...,-19.36,2024,1,6,5,1,1,0,0,5
6,loto6,2024-01-07,96.92,89.55,,,,101.56,,8.165778,...,1.96,2024,1,7,6,1,1,0,0,5
7,loto6,2024-01-08,102.3,96.92,101.49,,,101.675714,,8.170359,...,12.75,2024,1,8,0,2,1,0,0,5
8,loto6,2024-01-09,106.41,102.3,107.4,,,101.534286,,8.062615,...,9.49,2024,1,9,1,2,1,0,0,5
9,loto6,2024-01-10,111.38,106.41,111.69,,,101.49,,7.998129,...,9.08,2024,1,10,2,2,1,0,0,5


[hist] shape=(240, 13)


Unnamed: 0,dtype,n_unique,n_missing
unique_id,object,2,0
ds,object,120,0
y,float64,234,0
hist_lag_1,float64,232,2
hist_lag_7,float64,220,14
hist_lag_14,float64,207,28
hist_lag_30,float64,176,60
hist_rollmean_7,float64,226,4
hist_rollmean_30,float64,209,28
hist_rollstd_7,float64,236,4


[futr] shape=(240, 10)


Unnamed: 0,dtype,n_unique,n_missing
unique_id,object,2,0
ds,object,120,0
year,int64,1,0
month,int64,4,0
day,int64,31,0
weekday,int64,7,0
week,int64,18,0
quarter,int64,2,0
is_month_start,int64,2,0
is_month_end,int64,2,0


[stat] shape=(2, 2)


Unnamed: 0,dtype,n_unique,n_missing
unique_id,object,2,0
unique_id_len,int64,1,0
