In [None]:
import pandas as pd
path="/mnt/e/env/ts/datas/data/data_long/ft_normal/bingo5/by_unique_id/N1.csv"
df = pd.read_csv(path)  
df.columns.tolist()

In [None]:
futr_cols

In [None]:
# ================================================
# NeuralForecast Auto(TFT / PatchTST) フル実装（seed修正）
# ================================================
import os, json, time, warnings, inspect, random
from datetime import timedelta
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import pytorch_lightning as pl

from neuralforecast import NeuralForecast
from neuralforecast.auto import AutoTFT, AutoPatchTST
from neuralforecast.losses.pytorch import SMAPE

# ---------------------------
# ユーザ指定パラメータ
# ---------------------------
DATA_CSV = "/mnt/e/env/ts/datas/data/data_long/ft_normal/bingo5/by_unique_id/N1.csv"
TRIALS   = 1
SEED     = 1029
H        = 1
ARTIFACTS_ROOT = "/mnt/e/env/ts/ts-mlops-skeleton/nf_auto_runs"
FREQ = "D"
AUTO_GENERATE_FUTR_EXOG = False

# --- 再現性（Lightning/torch/np/python を一括固定）---
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
pl.seed_everything(SEED, workers=True)

# =========================================================
# ユーティリティ
# =========================================================
def ensure_dir(path: str) -> str:
    os.makedirs(path, exist_ok=True); return path

def now_str():
    return time.strftime("%Y%m%d-%H%M%S")

def to_datetime_col(df: pd.DataFrame, col: str) -> pd.DataFrame:
    out = df.copy(); out[col] = pd.to_datetime(out[col], errors="coerce"); return out

def to_numeric_col(df: pd.DataFrame, col: str) -> pd.DataFrame:
    out = df.copy(); out[col] = pd.to_numeric(out[col], errors="coerce"); return out

def preprocess_df(raw: pd.DataFrame) -> pd.DataFrame:
    df = raw.copy()
    if "unique_id" not in df.columns:
        df["unique_id"] = "series_0"
    df = to_datetime_col(df, "ds")
    df = to_numeric_col(df, "y")
    before = len(df)
    df = df.dropna(subset=["ds", "y"])
    dropped = before - len(df)
    if dropped > 0:
        print(f"[info] 前処理: ds/y の変換で {dropped} 行を除去しました。")
    df = df.sort_values(["unique_id", "ds"])
    df = df.drop_duplicates(subset=["unique_id", "ds"], keep="last").reset_index(drop=True)
    gap = df.groupby("unique_id")["ds"].diff().dropna()
    if not gap.empty and (gap != pd.Timedelta(days=1)).any():
        warnings.warn("日次ギャップが検出されました。")
    return df

def split_exog_by_prefix(df: pd.DataFrame):
    futr_cols = [c for c in df.columns if c.startswith("futr_")]
    hist_cols = [c for c in df.columns if c.startswith("hist_")]
    stat_cols = [c for c in df.columns if c.startswith("stat_")]
    return futr_cols, hist_cols, stat_cols

# ---- 指標 ----
def _to_arr(x): return np.asarray(x, dtype=float)
def smape(y, yhat, eps=1e-8): y=_to_arr(y);yhat=_to_arr(yhat);return 100*np.mean(2*np.abs(yhat-y)/(np.abs(y)+np.abs(yhat)+eps))
def mae(y, yhat): y=_to_arr(y);yhat=_to_arr(yhat);return float(np.mean(np.abs(yhat-y)))
def mape(y, yhat, eps=1e-8): y=_to_arr(y);yhat=_to_arr(yhat);den=np.clip(np.abs(y),eps,None);return 100*np.mean(np.abs((yhat-y)/den))
def rmse(y, yhat): y=_to_arr(y);yhat=_to_arr(yhat);return float(np.sqrt(np.mean((yhat-y)**2)))

def compute_metrics(df_pred: pd.DataFrame, model_cols):
    rows=[]
    for m in model_cols:
        y=df_pred["y"].values; yhat=df_pred[m].values
        rows.append({"model":m,"SMAPE":smape(y,yhat),"MAE":mae(y,yhat),"MAPE":mape(y,yhat),"RMSE":rmse(y,yhat),"n":len(df_pred)})
    return pd.DataFrame(rows)

def plot_last_window(cv_df: pd.DataFrame, model_cols, out_png: str, uid: str = None):
    if "cutoff" not in cv_df.columns:
        print("[warn] cutoff がないため描画スキップ"); return
    last_cutoff = cv_df["cutoff"].max()
    sub = cv_df[cv_df["cutoff"] == last_cutoff]
    if uid is not None:
        pick = sub[sub["unique_id"] == uid]
        if not pick.empty: sub = pick
    sub = sub.sort_values(["unique_id","ds"])
    if sub["unique_id"].nunique() > 1:
        sub = sub[sub["unique_id"] == sub["unique_id"].iloc[0]]
    plt.figure(figsize=(12,5))
    plt.plot(sub["ds"], sub["y"], label="y", linewidth=2)
    for m in model_cols: plt.plot(sub["ds"], sub[m], label=m, linewidth=1)
    plt.title(f"Last CV Window @ cutoff={last_cutoff}")
    plt.xlabel("ds"); plt.ylabel("value"); plt.legend()
    plt.tight_layout(); plt.savefig(out_png); plt.close()
    print(f"[info] 予測可視化を保存: {out_png}")

# =========================================================
# 1) データ読込 & 前処理
# =========================================================
print(f"[info] CSV を読込中: {DATA_CSV}")
raw = pd.read_csv(DATA_CSV)
print(f"[info] loaded shape: {raw.shape}")
print(f"[info] columns: {list(raw.columns)[:10]} ...")

df = preprocess_df(raw)
futr_cols, hist_cols, stat_cols = split_exog_by_prefix(df)
print(f"[info] exog sizes (raw): futr={len(futr_cols)} hist={len(hist_cols)} stat={len(stat_cols)}")

# =========================================================
# 2) static_df 構築（非数値→factorize）＆ df から stat_* を削除
# =========================================================
static_df = None
if len(stat_cols):
    static_df = df[["unique_id", *stat_cols]].drop_duplicates()
    for c in stat_cols:
        if not np.issubdtype(static_df[c].dtype, np.number):
            codes, uniques = pd.factorize(static_df[c], sort=True)
            static_df[c] = codes.astype(np.int32)
            print(f"[info] static_df: 非数値列をコード化 -> {c} （{len(uniques)}カテゴリ）")
    df = df.drop(columns=stat_cols)
    print(f"[info] df から stat_* を削除: {len(stat_cols)} 列")

# 最新の futr/hist を df から再抽出（stat は df に存在しない）
futr_cols, hist_cols, _ = split_exog_by_prefix(df)
print(f"[info] exog sizes (after drop stat): futr={len(futr_cols)} hist={len(hist_cols)} stat={len(stat_cols)}")

# =========================================================
# 3) futr_/hist_ の非数値対策（非数値は強制数値化 or 全欠損ならドロップ）
# =========================================================
protected = {"unique_id","ds","y"}
temporal_cols = [c for c in df.columns if c not in protected]
drop_cols=[]
for c in temporal_cols:
    if not np.issubdtype(df[c].dtype, np.number):
        coerced = pd.to_numeric(df[c], errors="coerce")
        if coerced.isna().all():
            drop_cols.append(c)
        else:
            df[c] = coerced.fillna(method="ffill").fillna(method="bfill").fillna(0.0)
            print(f"[info] temporal: {c} を数値化（欠損は前後詰め→0）")
if drop_cols:
    df = df.drop(columns=drop_cols)
    print(f"[info] temporal: 数値化不能のためドロップ -> {drop_cols}")

# 存在確認で最終リスト
futr_cols = [c for c in futr_cols if c in df.columns]
hist_cols = [c for c in hist_cols if c in df.columns]
print(f"[info] exog sizes (final): futr={len(futr_cols)} hist={len(hist_cols)} stat={len(stat_cols)}")
print(f"[info] 学習に使う futr = {len(futr_cols)}, hist = {len(hist_cols)}, stat = {len(stat_cols)}")

# =========================================================
# 4) Auto モデル設定（config(trial) に外生を渡す）※ seedは入れない
# =========================================================
def tft_config(trial):
    return {
        "input_size":   trial.suggest_categorical("input_size",   [H*3, H*4, H*5]),
        "hidden_size":  trial.suggest_categorical("hidden_size",  [64, 128]),
        "dropout":      trial.suggest_float("dropout", 0.0, 0.2),
        "learning_rate":trial.suggest_float("learning_rate", 1e-4, 3e-3, log=True),
        "max_steps":    400,
        "futr_exog_list": futr_cols,
        "hist_exog_list": hist_cols,
        "stat_exog_list": stat_cols,  # static_df と対応
        # ここに "seed": SEED を入れないこと（Lightning Trainerに渡ってエラー化）
    }

def patchtst_config(trial):
    return {
        "input_size":   trial.suggest_categorical("input_size", [H*3, H*4, H*5]),
        "patch_len":    trial.suggest_categorical("patch_len",  [7, 14, 21, 28]),
        "stride":       trial.suggest_categorical("stride",     [1, 2, 4]),
        "n_layers":     trial.suggest_categorical("n_layers",   [2, 3]),
        "d_model":      trial.suggest_categorical("d_model",    [64, 128]),
        "n_heads":      trial.suggest_categorical("n_heads",    [2, 4]),
        "d_ff":         trial.suggest_categorical("d_ff",       [256, 512, 1024]),
        "dropout":      trial.suggest_float("dropout", 0.0, 0.2),
        "learning_rate":trial.suggest_float("learning_rate", 1e-4, 3e-3, log=True),
        "max_steps":    400,
        "futr_exog_list": futr_cols,
        "hist_exog_list": hist_cols,
        "stat_exog_list": stat_cols,
        # ここも seed を入れない
    }

# =========================================================
# 5) trials の渡し先を環境で自動切替
# =========================================================
fit_accepts_n_trials = "n_trials" in inspect.signature(NeuralForecast.fit).parameters

if fit_accepts_n_trials:
    models = [
        AutoTFT(h=H, loss=SMAPE(), backend="optuna", config=tft_config, verbose=True),
        AutoPatchTST(h=H, loss=SMAPE(), backend="optuna", config=patchtst_config, verbose=True),
    ]
    nf = NeuralForecast(models=models, freq=FREQ)
    print(f"[info] 学習開始: n_trials={TRIALS}, val_size={H*2}, freq={FREQ}（fit に n_trials）")
    nf.fit(df=df, static_df=static_df, val_size=H*2, n_trials=TRIALS)
else:
    models = [
        AutoTFT(h=H, loss=SMAPE(), backend="optuna", config=tft_config, num_samples=TRIALS, verbose=True),
        AutoPatchTST(h=H, loss=SMAPE(), backend="optuna", config=patchtst_config, num_samples=TRIALS, verbose=True),
    ]
    nf = NeuralForecast(models=models, freq=FREQ)
    print(f"[info] 学習開始: num_samples={TRIALS}, val_size={H*2}, freq={FREQ}（__init__ に num_samples）")
    nf.fit(df=df, static_df=static_df, val_size=H*2)

print("[info] 学習完了")

# =========================================================
# 6) 成果物保存
# =========================================================
run_dir   = ensure_dir(os.path.join(ARTIFACTS_ROOT, f"run_{now_str()}"))
models_dir= ensure_dir(os.path.join(run_dir, "models"))
plots_dir = ensure_dir(os.path.join(run_dir, "plots"))
tables_dir= ensure_dir(os.path.join(run_dir, "tables"))

meta = {
    "DATA_CSV": DATA_CSV, "TRIALS": TRIALS, "SEED": SEED, "H": H, "FREQ": FREQ,
    "futr_exog_list": futr_cols, "hist_exog_list": hist_cols, "stat_exog_list": stat_cols,
    "models": [m.__class__.__name__ for m in models],
    "fit_accepts_n_trials": fit_accepts_n_trials,
}
with open(os.path.join(run_dir, "meta.json"), "w", encoding="utf-8") as f:
    json.dump(meta, f, ensure_ascii=False, indent=2)

nf.save(models_dir)
print(f"[info] モデルを保存しました: {models_dir}")

# =========================================================
# 7) ロード確認
# =========================================================
nf_loaded = NeuralForecast.load(models_dir)
print("[info] モデルをロードしました。")

# =========================================================
# 8) CV → 指標・可視化
# =========================================================
print("[info] 交差検証（Rolling Origin）を実行します...")
try:
    cv_df = nf_loaded.cross_validation(df=df, static_df=static_df, n_windows=3, step_size=H, h=H)
except TypeError:
    cv_df = nf_loaded.cross_validation(df=df, static_df=static_df, n_windows=3, step_size=H, max_horizon=H)

fixed_cols = {"unique_id","ds","y","cutoff"}
model_cols = [c for c in cv_df.columns if c not in fixed_cols]
cv_path = os.path.join(tables_dir, "cv_predictions.csv")
cv_df.to_csv(cv_path, index=False); print(f"[info] CV 予測テーブルを保存: {cv_path}")

metrics_df = compute_metrics(cv_df, model_cols)
metrics_path = os.path.join(tables_dir, "metrics.csv")
metrics_df.to_csv(metrics_path, index=False); print(f"[info] 評価指標を保存: {metrics_path}")
print(metrics_df)

plot_last_window(cv_df, model_cols, out_png=os.path.join(plots_dir, "last_window.png"))

# =========================================================
# 9) 将来予測（任意）
# =========================================================
if AUTO_GENERATE_FUTR_EXOG:
    pass

print(f"[done] 成果物ルート: {run_dir}")


In [None]:
# ================================================
# NeuralForecast Auto(TFT / PatchTST) 最小設定版
# ================================================
import os, json, time, warnings, inspect, random, math
from datetime import timedelta
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ---- OOM 断片化対策（torch import 前に）----
os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")

import torch
import pytorch_lightning as pl

from neuralforecast import NeuralForecast
from neuralforecast.auto import AutoTFT, AutoPatchTST
from neuralforecast.losses.pytorch import SMAPE

# ---------------------------
# ユーザ指定パラメータ
# ---------------------------
DATA_CSV = "/mnt/e/env/ts/datas/data/data_long/ft_normal/bingo5/by_unique_id/N1.csv"
TRIALS   = 16
SEED     = 1029
H        = 28
ARTIFACTS_ROOT = "/mnt/e/env/ts/ts-mlops-skeleton/nf_auto_runs"
FREQ = "D"

# ---------------------------
# Auto設定パラメータ（最小構成）
# ---------------------------
LOSS = SMAPE()
VALID_LOSS = None
SEARCH_ALG = "TPESampler"  # Optuna: TPESampler, Ray: BasicVariantGenerator
BACKEND = "optuna"
CALLBACKS = None
LOCAL_SCALER_TYPE = "standard"
EARLY_STOP_PATIENCE_STEPS = 20  # 適切な早期停止（20ステップで改善なければ停止）
VERBOSE = True

# リソース設定
CPUS = 4
GPUS = 1 if torch.cuda.is_available() else 0

# 外生変数の最大数（省メモリ）
TOPK_HIST = 32
TOPK_FUTR = 16

# ---- 再現性固定 ----
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
pl.seed_everything(SEED, workers=True)

# ---- 便利関数 ----
def ensure_dir(path: str) -> str:
    os.makedirs(path, exist_ok=True); return path

def now_str():
    return time.strftime("%Y%m%d-%H%M%S")

def to_datetime_col(df: pd.DataFrame, col: str) -> pd.DataFrame:
    out = df.copy(); out[col] = pd.to_datetime(out[col], errors="coerce"); return out

def to_numeric_col(df: pd.DataFrame, col: str) -> pd.DataFrame:
    out = df.copy(); out[col] = pd.to_numeric(out[col], errors="coerce"); return out

def preprocess_df(raw: pd.DataFrame) -> pd.DataFrame:
    df = raw.copy()
    if "unique_id" not in df.columns:
        df["unique_id"] = "series_0"
    df = to_datetime_col(df, "ds")
    df = to_numeric_col(df, "y")
    before = len(df)
    df = df.dropna(subset=["ds", "y"])
    if before - len(df) > 0:
        print(f"[info] 前処理: ds/y の変換で {before-len(df)} 行を除去しました。")
    df = df.sort_values(["unique_id", "ds"]).drop_duplicates(subset=["unique_id","ds"], keep="last")
    df = df.reset_index(drop=True)
    gap = df.groupby("unique_id")["ds"].diff().dropna()
    if not gap.empty and (gap != pd.Timedelta(days=1)).any():
        warnings.warn("日次ギャップが検出されました。")
    return df

def split_exog_by_prefix(df: pd.DataFrame):
    futr_cols = [c for c in df.columns if c.startswith("futr_")]
    hist_cols = [c for c in df.columns if c.startswith("hist_")]
    stat_cols = [c for c in df.columns if c.startswith("stat_")]
    return futr_cols, hist_cols, stat_cols

def factorize_static(static_df: pd.DataFrame) -> pd.DataFrame:
    out = static_df.copy()
    for c in out.columns:
        if c == "unique_id": 
            continue
        if not np.issubdtype(out[c].dtype, np.number):
            codes, uniques = pd.factorize(out[c], sort=True)
            out[c] = codes.astype(np.int32)
            print(f"[info] static_df: 非数値列をコード化 -> {c} （{len(uniques)}カテゴリ）")
    return out

def coerce_temporal_numeric(df: pd.DataFrame, protected=("unique_id","ds","y")):
    temporal_cols = [c for c in df.columns if c not in protected]
    drop_cols=[]
    for c in temporal_cols:
        if not np.issubdtype(df[c].dtype, np.number):
            coerced = pd.to_numeric(df[c], errors="coerce")
            if coerced.isna().all():
                drop_cols.append(c)
            else:
                df[c] = coerced.fillna(method="ffill").fillna(method="bfill").fillna(0.0)
                print(f"[info] temporal: {c} を数値化（欠損は前後詰め→0）")
    if drop_cols:
        df.drop(columns=drop_cols, inplace=True)
        print(f"[info] temporal: 数値化不能のためドロップ -> {drop_cols}")
    return df

def select_topk_features(df: pd.DataFrame, feature_cols, k: int, target_col="y", min_std=1e-12):
    if k <= 0 or len(feature_cols) == 0:
        return []
    std = df[feature_cols].std(numeric_only=True)
    keep = std[std > min_std].index.tolist()
    if not keep:
        return []
    corrs = {}
    y = df[target_col].astype(float)
    for c in keep:
        x = df[c].astype(float)
        valid = x.notna() & y.notna()
        if valid.sum() < 3:
            continue
        xc = x[valid]; yc = y[valid]
        if xc.std() < min_std:
            continue
        corrs[c] = float(abs(np.corrcoef(xc, yc)[0,1]))
    if not corrs:
        return keep[:k]
    ranked = sorted(corrs.items(), key=lambda kv: kv[1], reverse=True)
    picked = [c for c,_ in ranked[:k]]
    if len(picked) < k:
        rest = [c for c in keep if c not in picked]
        picked += rest[:(k-len(picked))]
    return picked

def lightning_precision():
    try:
        major = int(pl.__version__.split(".")[0])
    except Exception:
        major = 2
    return "16-mixed" if major >= 2 else 16

def plot_last_window(cv_df: pd.DataFrame, model_cols, out_png: str, uid: str = None):
    if "cutoff" not in cv_df.columns:
        print("[warn] cutoff がないため描画スキップ"); return
    last_cutoff = cv_df["cutoff"].max()
    sub = cv_df[cv_df["cutoff"] == last_cutoff]
    if uid is not None:
        pick = sub[sub["unique_id"] == uid]
        if not pick.empty: sub = pick
    sub = sub.sort_values(["unique_id","ds"])
    if sub["unique_id"].nunique() > 1:
        sub = sub[sub["unique_id"] == sub["unique_id"].iloc[0]]
    plt.figure(figsize=(12,5))
    plt.plot(sub["ds"], sub["y"], label="y", linewidth=2)
    for m in model_cols:
        plt.plot(sub["ds"], sub[m], label=m, linewidth=1)
    plt.title(f"Last CV Window @ cutoff={last_cutoff}")
    plt.xlabel("ds"); plt.ylabel("value"); plt.legend()
    plt.tight_layout(); plt.savefig(out_png); plt.close()
    print(f"[info] 予測可視化を保存: {out_png}")

# =========================================================
# 1) データ読込 & 前処理
# =========================================================
print(f"[info] CSV を読込中: {DATA_CSV}")
raw = pd.read_csv(DATA_CSV)
print(f"[info] loaded shape: {raw.shape}")
print(f"[info] columns: {list(raw.columns)[:10]} ...")

df = preprocess_df(raw)
futr_cols, hist_cols, stat_cols = split_exog_by_prefix(df)
print(f"[info] exog sizes (raw): futr={len(futr_cols)} hist={len(hist_cols)} stat={len(stat_cols)}")

# =========================================================
# 2) static_df 構築（非数値→factorize）＆ df から stat_* を削除
# =========================================================
static_df = None
if len(stat_cols):
    static_df = df[["unique_id", *stat_cols]].drop_duplicates()
    static_df = factorize_static(static_df)
    df = df.drop(columns=stat_cols)
    print(f"[info] df から stat_* を削除: {len(stat_cols)} 列")

futr_cols, hist_cols, _ = split_exog_by_prefix(df)

# =========================================================
# 3) 時系列側の非数値→数値化
# =========================================================
df = coerce_temporal_numeric(df)

futr_cols = [c for c in futr_cols if c in df.columns]
hist_cols = [c for c in hist_cols if c in df.columns]
print(f"[info] exog sizes (final): futr={len(futr_cols)} hist={len(hist_cols)} stat={(0 if static_df is None else static_df.shape[1]-1)}")

# =========================================================
# 4) 外生 Top-K 選抜（省メモリ）
# =========================================================
hist_sel = select_topk_features(df, hist_cols, k=TOPK_HIST, target_col="y")
futr_sel = select_topk_features(df, futr_cols, k=TOPK_FUTR, target_col="y")
print(f"[info] 選抜: hist={len(hist_sel)}/{len(hist_cols)} → {len(hist_sel)} 列使用")
print(f"[info] 選抜: futr={len(futr_sel)}/{len(futr_cols)} → {len(futr_sel)} 列使用")

stat_sel = [] if static_df is None else [c for c in static_df.columns if c!="unique_id"]

# =========================================================
# 5) Auto モデル設定（最小構成）
# =========================================================
prec = lightning_precision()
trainer_kwargs = dict(
    accelerator="gpu" if GPUS > 0 else "cpu",
    devices=GPUS if GPUS > 0 else None,
    precision=prec,
    enable_checkpointing=False,
    logger=False,
    enable_progress_bar=VERBOSE,
)

# AutoModelの設定は最小限に（h以外）
def tft_config(trial):
    return {
        "loss": LOSS,
        "valid_loss": VALID_LOSS,
        "scaler_type": LOCAL_SCALER_TYPE,
        "early_stop_patience_steps": EARLY_STOP_PATIENCE_STEPS,
        "futr_exog_list": futr_sel,
        "hist_exog_list": hist_sel,
        "stat_exog_list": stat_sel,
        "trainer_kwargs": trainer_kwargs,
    }

def patchtst_config(trial):
    return {
        "loss": LOSS,
        "valid_loss": VALID_LOSS,
        "scaler_type": LOCAL_SCALER_TYPE,
        "early_stop_patience_steps": EARLY_STOP_PATIENCE_STEPS,
        "futr_exog_list": futr_sel,
        "hist_exog_list": hist_sel,
        "stat_exog_list": stat_sel,
        "trainer_kwargs": trainer_kwargs,
    }

# NF の fit に n_trials があるかどうかで分岐
fit_accepts_n_trials = "n_trials" in inspect.signature(NeuralForecast.fit).parameters

def build_nf(trials=TRIALS):
    if fit_accepts_n_trials:
        models = [
            AutoTFT(
                h=H,  # 必須引数
                backend=BACKEND,
                config=tft_config,
                search_alg=SEARCH_ALG,
                callbacks=CALLBACKS,
                verbose=VERBOSE
            ),
            AutoPatchTST(
                h=H,  # 必須引数
                backend=BACKEND,
                config=patchtst_config,
                search_alg=SEARCH_ALG,
                callbacks=CALLBACKS,
                verbose=VERBOSE
            ),
        ]
        nf = NeuralForecast(models=models, freq=FREQ)
        return nf, dict(n_trials=trials)
    else:
        models = [
            AutoTFT(
                h=H,  # 必須引数
                backend=BACKEND,
                config=tft_config,
                num_samples=trials,
                search_alg=SEARCH_ALG,
                callbacks=CALLBACKS,
                verbose=VERBOSE
            ),
            AutoPatchTST(
                h=H,  # 必須引数
                backend=BACKEND,
                config=patchtst_config,
                num_samples=trials,
                search_alg=SEARCH_ALG,
                callbacks=CALLBACKS,
                verbose=VERBOSE
            ),
        ]
        nf = NeuralForecast(models=models, freq=FREQ)
        return nf, {}

# =========================================================
# 6) 学習（OOM 時は自動フォールバック）
# =========================================================
def try_fit(nf, fit_kwargs):
    print(f"[info] 学習開始: trials={TRIALS}, val_size={H*2}, freq={FREQ}")
    print(f"[info] early_stop={EARLY_STOP_PATIENCE_STEPS}, scaler={LOCAL_SCALER_TYPE}")
    if fit_accepts_n_trials:
        nf.fit(df=df, static_df=static_df, val_size=H*2, **fit_kwargs)
    else:
        nf.fit(df=df, static_df=static_df, val_size=H*2)

nf, fit_kwargs = build_nf(TRIALS)
try:
    try_fit(nf, fit_kwargs)
except Exception as e:
    msg = str(e)
    if "CUDA out of memory" in msg or isinstance(e, torch.cuda.OutOfMemoryError):
        print("[warn] CUDA OOM 検出。縮小構成でリトライします...")
        torch.cuda.empty_cache()
        
        # フォールバック: 外生変数をさらに削減
        TOPK_HIST_FALLBACK = 16
        TOPK_FUTR_FALLBACK = 8
        hist_sel = select_topk_features(df, hist_cols, k=TOPK_HIST_FALLBACK, target_col="y")
        futr_sel = select_topk_features(df, futr_cols, k=TOPK_FUTR_FALLBACK, target_col="y")
        print(f"[info] フォールバック選抜: hist={len(hist_sel)} / futr={len(futr_sel)}")

        # モデル再構築（試行数も削減）
        nf, fit_kwargs = build_nf(max(4, TRIALS//2))
        try_fit(nf, fit_kwargs)
    else:
        raise

print("[info] 学習完了")

# =========================================================
# 7) 成果物保存
# =========================================================
run_dir   = ensure_dir(os.path.join(ARTIFACTS_ROOT, f"run_{now_str()}"))
models_dir= ensure_dir(os.path.join(run_dir, "models"))
plots_dir = ensure_dir(os.path.join(run_dir, "plots"))
tables_dir= ensure_dir(os.path.join(run_dir, "tables"))

meta = {
    "DATA_CSV": DATA_CSV,
    "TRIALS": TRIALS,
    "SEED": SEED,
    "H": H,
    "FREQ": FREQ,
    "BACKEND": BACKEND,
    "SEARCH_ALG": SEARCH_ALG,
    "LOSS": str(LOSS),
    "VALID_LOSS": str(VALID_LOSS),
    "LOCAL_SCALER_TYPE": LOCAL_SCALER_TYPE,
    "EARLY_STOP_PATIENCE_STEPS": EARLY_STOP_PATIENCE_STEPS,
    "CPUS": CPUS,
    "GPUS": GPUS,
    "futr_exog_list": futr_sel,
    "hist_exog_list": hist_sel,
    "stat_exog_list": stat_sel,
    "fit_accepts_n_trials": fit_accepts_n_trials,
    "precision": prec,
}
with open(os.path.join(run_dir, "meta.json"), "w", encoding="utf-8") as f:
    json.dump(meta, f, ensure_ascii=False, indent=2)

nf.save(models_dir)
print(f"[info] モデルを保存しました: {models_dir}")

# =========================================================
# 8) ロード確認
# =========================================================
nf_loaded = NeuralForecast.load(models_dir)
print("[info] モデルをロードしました。")

# =========================================================
# 9) 交差検証 → 指標・可視化
# =========================================================
print("[info] 交差検証（Rolling Origin）を実行します...")
try:
    cv_df = nf_loaded.cross_validation(df=df, static_df=static_df, n_windows=3, step_size=H, h=H)
except TypeError:
    cv_df = nf_loaded.cross_validation(df=df, static_df=static_df, n_windows=3, step_size=H, max_horizon=H)

fixed_cols = {"unique_id","ds","y","cutoff"}
model_cols = [c for c in cv_df.columns if c not in fixed_cols]
cv_path = os.path.join(tables_dir, "cv_predictions.csv")
cv_df.to_csv(cv_path, index=False)
print(f"[info] CV 予測テーブルを保存: {cv_path}")

# 簡易メトリクス
def _to_arr(x): return np.asarray(x, dtype=float)
def smape_np(y,yhat,eps=1e-8): y=_to_arr(y);yhat=_to_arr(yhat);return 100*np.mean(2*np.abs(yhat-y)/(np.abs(y)+np.abs(yhat)+eps))
def mae_np(y,yhat): y=_to_arr(y);yhat=_to_arr(yhat);return float(np.mean(np.abs(yhat-y)))
def mape_np(y,yhat,eps=1e-8): y=_to_arr(y);yhat=_to_arr(yhat);den=np.clip(np.abs(y),eps,None);return 100*np.mean(np.abs((yhat-y)/den))
def rmse_np(y,yhat): y=_to_arr(y);yhat=_to_arr(yhat);return float(np.sqrt(np.mean((yhat-y)**2)))

rows=[]
for m in model_cols:
    y=cv_df["y"].values; yhat=cv_df[m].values
    rows.append({
        "model":m,
        "SMAPE":smape_np(y,yhat),
        "MAE":mae_np(y,yhat),
        "MAPE":mape_np(y,yhat),
        "RMSE":rmse_np(y,yhat),
        "n":len(cv_df)
    })
metrics_df = pd.DataFrame(rows)
metrics_path = os.path.join(tables_dir, "metrics.csv")
metrics_df.to_csv(metrics_path, index=False)
print(f"[info] 評価指標を保存: {metrics_path}")
print(metrics_df)

plot_last_window(cv_df, model_cols, out_png=os.path.join(plots_dir, "last_window.png"))

print(f"[done] 成果物ルート: {run_dir}")

In [1]:
# NF Auto — 汎用EXOG適応＆署名フィルタ改修版（CVでval_size明示＋ES一時無効フォールバック付き）
# - モデル能力に応じて F/H/S を自動で付与/除去
# - モデル __init__ 署名＋Trainer 署名で未知引数を除去（安全）
# - 主要パラの簡易シノニム変換でバージョン差に耐性
# - 交差検証でも val_size を明示、必要時は EarlyStopping を一時無効化

import os, json, time, warnings, random, inspect
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")

import torch
import pytorch_lightning as pl
import optuna

from neuralforecast import NeuralForecast
from neuralforecast.auto import AutoTFT, AutoPatchTST
from neuralforecast.models import TFT, PatchTST
from neuralforecast.losses.pytorch import SMAPE

# =============================
# 0) 実行パラメータ
# =============================
path= "/mnt/e/env/ts/datas/data/data_long/ft_normal/bingo5/by_unique_id/N1.csv"

DATA_CSV = os.environ.get("NF_DATA_CSV", path)
TRIALS   = int(os.environ.get("NF_TRIAL_NUM_SAMPLES", 1))
SEED     = int(os.environ.get("NF_SEED", 1029))
H        = int(os.environ.get("NF_H", 1))
ARTIFACTS_ROOT = os.environ.get("NF_ARTIFACTS_ROOT", "nf_auto_runs")
FREQ = os.environ.get("NF_FREQ", "D")

LOSS = SMAPE()
BACKEND = "optuna"
SEARCH_ALG = optuna.samplers.TPESampler(seed=SEED)
EARLY_STOP_PATIENCE_STEPS = int(os.environ.get("NF_EARLY_STOP", 2))
VERBOSE = True
TOPK_HIST = int(os.environ.get("NF_TOPK_HIST", 32))
TOPK_FUTR = int(os.environ.get("NF_TOPK_FUTR", 16))

CPUS = -1
GPUS = -1

random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED); pl.seed_everything(SEED, workers=True)
try:
    torch.set_float32_matmul_precision("high")
except Exception:
    pass

# =============================
# 1) 前処理ユーティリティ
# =============================

def ensure_dir(p): os.makedirs(p, exist_ok=True); return p

def now_str(): return time.strftime("%Y%m%d-%H%M%S")

def to_datetime_col(df, col): out=df.copy(); out[col]=pd.to_datetime(out[col], errors="coerce"); return out

def to_numeric_col(df, col): out=df.copy(); out[col]=pd.to_numeric(out[col], errors="coerce"); return out


def preprocess_df(raw: pd.DataFrame) -> pd.DataFrame:
    df = raw.copy()
    if "unique_id" not in df.columns: df["unique_id"]="series_0"
    df = to_datetime_col(df, "ds"); df = to_numeric_col(df, "y")
    before=len(df); df=df.dropna(subset=["ds","y"]) 
    if before-len(df)>0: print(f("[info] 前処理: ds/y の変換で {before-len(df)} 行を除去しました。"))
    df=df.sort_values(["unique_id","ds"]).drop_duplicates(subset=["unique_id","ds"], keep="last").reset_index(drop=True)
    gap=df.groupby("unique_id")["ds"].diff().dropna()
    if not gap.empty:
        if (gap.dt.days.fillna(0)!=1).any() and FREQ=="D":
            warnings.warn("等間隔でないタイムスタンプが検出されました。freq 指定または欠損補完を確認してください。")
    return df


def split_exog_by_prefix(df):
    futr=[c for c in df.columns if c.startswith("futr_")]
    hist=[c for c in df.columns if c.startswith("hist_")]
    stat=[c for c in df.columns if c.startswith("stat_")]
    return futr,hist,stat


def factorize_static(static_df: pd.DataFrame) -> pd.DataFrame:
    out=static_df.copy()
    for c in out.columns:
        if c=="unique_id": continue
        if not np.issubdtype(out[c].dtype, np.number):
            codes, _ = pd.factorize(out[c], sort=True)
            out[c]=codes.astype(np.int32)
            print(f"[info] static_df: 非数値列をコード化 -> {c}")
    return out


def coerce_temporal_numeric(df: pd.DataFrame, protected=("unique_id","ds","y")):
    temporal=[c for c in df.columns if c not in protected]
    drop=[]
    for c in temporal:
        if not np.issubdtype(df[c].dtype, np.number):
            coerced=pd.to_numeric(df[c], errors="coerce")
            if coerced.isna().all():
                drop.append(c)
            else:
                df[c]=coerced.fillna(method="ffill").fillna(method="bfill").fillna(0.0)
                print(f"[info] temporal: {c} を数値化（欠損は前後詰め→0）")
    if drop:
        df.drop(columns=drop, inplace=True)
        print(f"[info] temporal: 数値化不能のためドロップ -> {drop}")
    return df


def select_topk_features(df, feature_cols, k: int, target_col="y", min_std=1e-12):
    if k<=0 or len(feature_cols)==0: return []
    std=df[feature_cols].std(numeric_only=True)
    keep=std[std>min_std].index.tolist()
    if not keep: return []
    corrs={}; y=df[target_col].astype(float)
    for c in keep:
        x=df[c].astype(float); valid=x.notna() & y.notna()
        if valid.sum()<3: continue
        xc=x[valid]; yc=y[valid]
        if xc.std()<min_std: continue
        corrs[c]=float(abs(np.corrcoef(xc,yc)[0,1]))
    if not corrs: return keep[:k]
    ranked=sorted(corrs.items(), key=lambda kv: kv[1], reverse=True)
    picked=[c for c,_ in ranked[:k]]
    if len(picked)<k:
        rest=[c for c in keep if c not in picked]; picked+=rest[:(k-len(picked))]
    return picked


def lightning_precision():
    try: major=int(pl.__version__.split(".")[0])
    except: major=2
    return "16-mixed" if major>=2 else 16

# =============================
# 2) モデル能力→EXOG 自動適応
# =============================

MODEL_EXOG_FALLBACK = {
    "Autoformer": dict(F=True, H=False, S=False),
    "BiTCN": dict(F=True, H=True, S=True),
    "DeepAR": dict(F=True, H=False, S=True),
    "DeepNPTS": dict(F=True, H=True, S=True),
    "DilatedRNN": dict(F=True, H=True, S=True),
    "FEDformer": dict(F=True, H=False, S=False),
    "GRU": dict(F=True, H=True, S=True),
    "HINT": dict(F=True, H=True, S=True),
    "Informer": dict(F=True, H=False, S=False),
    "iTransformer": dict(F=False, H=False, S=False),
    "KAN": dict(F=True, H=True, S=True),
    "LSTM": dict(F=True, H=True, S=True),
    "MLP": dict(F=True, H=True, S=True),
    "MLPMultivariate": dict(F=True, H=True, S=True),
    "NBEATS": dict(F=False, H=False, S=False),
    "NBEATSx": dict(F=True, H=True, S=True),
    "NHITS": dict(F=True, H=True, S=True),
    "NLinear": dict(F=False, H=False, S=False),
    "PatchTST": dict(F=False, H=False, S=False),
    "RMoK": dict(F=False, H=False, S=False),
    "RNN": dict(F=True, H=True, S=True),
    "SOFTS": dict(F=False, H=False, S=False),
    "StemGNN": dict(F=False, H=False, S=False),
    "TCN": dict(F=True, H=True, S=True),
    "TFT": dict(F=True, H=True, S=True),
    "TiDE": dict(F=True, H=True, S=True),
    "TimeMixer": dict(F=False, H=False, S=False),
    "TimeLLM": dict(F=False, H=False, S=False),
    "TimesNet": dict(F=True, H=False, S=False),
    "TimeXer": dict(F=True, H=False, S=False),
    "TSMixer": dict(F=False, H=False, S=False),
    "TSMixerx": dict(F=True, H=True, S=True),
    "VanillaTransformer": dict(F=True, H=False, S=False),
}


def exog_capabilities(model_cls, fallback_key: str):
    f = bool(getattr(model_cls, "EXOGENOUS_FUTR", MODEL_EXOG_FALLBACK.get(fallback_key, {}).get("F", False)))
    h = bool(getattr(model_cls, "EXOGENOUS_HIST", MODEL_EXOG_FALLBACK.get(fallback_key, {}).get("H", False)))
    s = bool(getattr(model_cls, "EXOGENOUS_STAT", MODEL_EXOG_FALLBACK.get(fallback_key, {}).get("S", False)))
    return dict(F=f, H=h, S=s)


def attach_exog(config: dict, model_cls, fallback_key: str, futr_sel, hist_sel, stat_sel):
    caps = exog_capabilities(model_cls, fallback_key)
    config.update({
        "futr_exog_list": futr_sel if caps["F"] else [],
        "hist_exog_list": hist_sel if caps["H"] else [],
        "stat_exog_list": stat_sel if caps["S"] else [],
    })
    return config, caps

# --- 署名フィルタとシノニム補正 ---

TRAINER_ALLOWED = (set(inspect.signature(pl.Trainer.__init__).parameters.keys()) - {"self"}) if hasattr(pl, "Trainer") else set()
ALWAYS_ALLOWED = {"futr_exog_list","hist_exog_list","stat_exog_list"}

def _apply_param_synonyms(cfg: dict, model_cls) -> dict:
    try:
        params = set(inspect.signature(model_cls.__init__).parameters.keys()) - {"self"}
    except Exception:
        params = set()

    out = dict(cfg)

    def rename(src, dst):
        if src in out and src not in params and dst in params:
            out[dst] = out.pop(src)

    rename("n_heads", "n_head")
    rename("n_head", "n_heads")
    rename("d_model", "d")
    rename("d_ff", "ff_dim")
    rename("learning_rate", "lr")
    return out


def filter_kwargs_by_signature(config: dict, model_cls):
    try:
        model_params = set(inspect.signature(model_cls.__init__).parameters.keys())
    except Exception:
        model_params = set()
    model_params.discard("self")

    cfg = _apply_param_synonyms(config, model_cls)

    allowed = model_params | TRAINER_ALLOWED | ALWAYS_ALLOWED
    filtered = {k: v for k, v in cfg.items() if k in allowed}
    return filtered


# =============================
# 3) データ読み込みと外生選抜
# =============================
print(f"[info] CSV を読込中: {DATA_CSV}")
raw = pd.read_csv(DATA_CSV)
print(f"[info] loaded shape: {raw.shape}")

df = preprocess_df(raw)
futr_cols, hist_cols, stat_cols = split_exog_by_prefix(df)
print(f"[info] exog sizes (raw): futr={len(futr_cols)} hist={len(hist_cols)} stat={len(stat_cols)}")

static_df=None
if len(stat_cols):
    static_df = df[["unique_id", *stat_cols]].drop_duplicates()
    static_df = factorize_static(static_df)
    df = df.drop(columns=stat_cols)

# 型整備
futr_cols, hist_cols, _ = split_exog_by_prefix(df)
df = coerce_temporal_numeric(df)

# 再確認
futr_cols = [c for c in futr_cols if c in df.columns]
hist_cols = [c for c in hist_cols if c in df.columns]
stat_sel = [] if static_df is None else [c for c in static_df.columns if c!="unique_id"]

# Top-K
hist_sel = select_topk_features(df, hist_cols, k=TOPK_HIST, target_col="y")
futr_sel = select_topk_features(df, futr_cols, k=TOPK_FUTR, target_col="y")
print(f"[info] 選抜: hist={len(hist_sel)}/{len(hist_cols)} | futr={len(futr_sel)}/{len(futr_cols)} | stat={len(stat_sel)}")

# =============================
# 4) Auto 構成（PL引数はフラットで渡す→内部で安全に抽出）
# =============================
prec = lightning_precision()
PL_FLAT = dict(
    accelerator="auto",
    devices="auto",
    precision=prec,
    enable_checkpointing=False,
    logger=False,
    enable_progress_bar=True,
)

def tft_config(trial: optuna.trial.Trial):
    cfg = {
        "input_size": trial.suggest_categorical("input_size", [2*H, 3*H, 4*H]),
        "learning_rate": trial.suggest_float("learning_rate", 1e-4, 3e-3, log=True),
        "hidden_size": trial.suggest_categorical("hidden_size", [64, 128, 256]),
        "n_head": trial.suggest_categorical("n_head", [2, 4, 8]),
        "dropout": trial.suggest_float("dropout", 0.0, 0.3),
        "batch_size": trial.suggest_categorical("batch_size", [64, 128]),
        "max_steps": int(os.environ.get("NF_MAX_STEPS", 800)),
        "val_check_steps": int(os.environ.get("NF_VAL_CHECK_STEPS", 50)),
        "early_stop_patience_steps": EARLY_STOP_PATIENCE_STEPS,
        **PL_FLAT,
    }
    cfg, caps = attach_exog(cfg, TFT, "TFT", futr_sel, hist_sel, stat_sel)
    cfg = filter_kwargs_by_signature(cfg, TFT)
    print(f"[cap] TFT exog -> F={caps['F']} H={caps['H']} S={caps['S']}")
    return cfg


def patchtst_config(trial: optuna.trial.Trial):
    cfg = {
        "input_size": trial.suggest_categorical("input_size", [2*H, 3*H, 4*H]),
        "d_model": trial.suggest_categorical("d_model", [64, 128, 192]),
        "n_heads": trial.suggest_categorical("n_heads", [2, 4, 8]),
        "d_ff": trial.suggest_categorical("d_ff", [128, 256, 512]),
        "patch_len": trial.suggest_categorical("patch_len", [8, 16, 32]),
        "stride": trial.suggest_categorical("stride", [8, 16]),
        "dropout": trial.suggest_float("dropout", 0.0, 0.2),
        "learning_rate": trial.suggest_float("learning_rate", 1e-4, 3e-3, log=True),
        "batch_size": trial.suggest_categorical("batch_size", [64, 128]),
        "max_steps": int(os.environ.get("NF_MAX_STEPS", 800)),
        "val_check_steps": int(os.environ.get("NF_VAL_CHECK_STEPS", 50)),
        "early_stop_patience_steps": EARLY_STOP_PATIENCE_STEPS,
        **PL_FLAT,
    }
    cfg, caps = attach_exog(cfg, PatchTST, "PatchTST", futr_sel, hist_sel, stat_sel)
    cfg = filter_kwargs_by_signature(cfg, PatchTST)
    print(f"[cap] PatchTST exog -> F={caps['F']} H={caps['H']} S={caps['S']}")
    return cfg


models = [
    AutoTFT(h=H, loss=LOSS, backend=BACKEND, config=tft_config, search_alg=SEARCH_ALG, num_samples=TRIALS, verbose=VERBOSE),
    AutoPatchTST(h=H, loss=LOSS, backend=BACKEND, config=patchtst_config, search_alg=SEARCH_ALG, num_samples=TRIALS, verbose=VERBOSE),
]

nf = NeuralForecast(models=models, freq=FREQ, local_scaler_type="standard")

# =============================
# 5) 学習
# =============================

def try_fit():
    val_size = max(2*H, H)  # 余裕を持たせる
    print(f"[info] 学習開始: trials={TRIALS}, val_size={val_size}, freq={FREQ}")
    nf.fit(df=df, static_df=static_df, val_size=val_size)

try:
    try_fit()
except Exception as e:
    msg=str(e)
    if "CUDA out of memory" in msg or isinstance(e, torch.cuda.OutOfMemoryError):
        print("[warn] CUDA OOM 検出。縮小構成でリトライします...")
        torch.cuda.empty_cache()
        hist_small = select_topk_features(df, hist_cols, k=max(8, TOPK_HIST//2), target_col="y")
        futr_small = select_topk_features(df, futr_cols, k=max(4, TOPK_FUTR//2), target_col="y")
        # 再バインド（能力を再評価）
        hist_sel, futr_sel = hist_small, futr_small
        try_fit()
    else:
        raise

print("[info] 学習完了")

# =============================
# 6) 成果物保存・CV・指標
# =============================
run_dir   = ensure_dir(os.path.join(ARTIFACTS_ROOT, f"run_{now_str()}"))
models_dir= ensure_dir(os.path.join(run_dir, "models"))
plots_dir = ensure_dir(os.path.join(run_dir, "plots"))
tables_dir= ensure_dir(os.path.join(run_dir, "tables"))

meta = {
    "DATA_CSV": DATA_CSV, "TRIALS": TRIALS, "SEED": SEED, "H": H, "FREQ": FREQ,
    "BACKEND": BACKEND, "SEARCH_ALG": type(SEARCH_ALG).__name__, "LOSS": str(LOSS),
    "LOCAL_SCALER_TYPE": "standard", "EARLY_STOP_PATIENCE_STEPS": EARLY_STOP_PATIENCE_STEPS,
    "CPUS": CPUS, "GPUS": GPUS,
    "futr_exog_list": futr_sel, "hist_exog_list": hist_sel, "stat_exog_list": stat_sel,
    "precision": prec,
}
with open(os.path.join(run_dir, "meta.json"), "w", encoding="utf-8") as f:
    json.dump(meta, f, ensure_ascii=False, indent=2)

nf.save(models_dir)
print(f"[info] モデルを保存しました: {models_dir}")

nf_loaded = NeuralForecast.load(models_dir)
print("[info] モデルをロードしました。")

print("[info] 交差検証（Rolling Origin）を実行します...")
val_size_cv = max(2*H, H)
try:
    # PL2 系では ES の監視名が 'ptl/val_loss' になる実装が多いが、
    # そもそも val loader が無いと生成されないため、val_size を必ず明示する
    cv_df = nf_loaded.cross_validation(
        df=df, static_df=static_df,
        n_windows=3, step_size=H, h=H,
        val_size=val_size_cv
    )
except RuntimeError as e:
    # EarlyStopping が監視指標未出力で落ちた場合のフォールバック：CV 中のみ ES を無効化して再実行
    if "Early stopping conditioned on metric" in str(e):
        print("[warn] CV中の EarlyStopping 監視メトリクス未検出。CVに限り EarlyStopping を一時無効化して再実行します。")
        for m in nf_loaded.models:
            if hasattr(m, "early_stop_patience_steps"):
                try:
                    m.early_stop_patience_steps = None
                except Exception:
                    pass
            if hasattr(m, "trainer_kwargs"):
                try:
                    m.trainer_kwargs.pop("early_stop_patience_steps", None)
                except Exception:
                    pass
        cv_df = nf_loaded.cross_validation(
            df=df, static_df=static_df,
            n_windows=3, step_size=H, h=H,
            val_size=val_size_cv
        )
    else:
        raise
except TypeError:
    # バージョン差：max_horizon 名称の互換
    try:
        cv_df = nf_loaded.cross_validation(
            df=df, static_df=static_df,
            n_windows=3, step_size=H, max_horizon=H,
            val_size=val_size_cv
        )
    except RuntimeError as e:
        if "Early stopping conditioned on metric" in str(e):
            print("[warn] CV中の EarlyStopping 監視メトリクス未検出。CVに限り EarlyStopping を一時無効化して再実行します。")
            for m in nf_loaded.models:
                if hasattr(m, "early_stop_patience_steps"):
                    try:
                        m.early_stop_patience_steps = None
                    except Exception:
                        pass
                if hasattr(m, "trainer_kwargs"):
                    try:
                        m.trainer_kwargs.pop("early_stop_patience_steps", None)
                    except Exception:
                        pass
            cv_df = nf_loaded.cross_validation(
                df=df, static_df=static_df,
                n_windows=3, step_size=H, max_horizon=H,
                val_size=val_size_cv
            )
        else:
            raise

fixed_cols = {"unique_id","ds","y","cutoff"}
model_cols = [c for c in cv_df.columns if c not in fixed_cols]
cv_path = os.path.join(tables_dir, "cv_predictions.csv")
cv_df.to_csv(cv_path, index=False)
print(f"[info] CV 予測テーブルを保存: {cv_path}")

# 指標
_def_to_arr = lambda x: np.asarray(x, dtype=float)

def smape_np(y,yhat,eps=1e-8): y=_def_to_arr(y);yhat=_def_to_arr(yhat);return 100*np.mean(2*np.abs(yhat-y)/(np.abs(y)+np.abs(yhat)+eps))

def mae_np(y,yhat): y=_def_to_arr(y);yhat=_def_to_arr(yhat);return float(np.mean(np.abs(yhat-y)))

def mape_np(y,yhat,eps=1e-8): y=_def_to_arr(y);yhat=_def_to_arr(yhat);den=np.clip(np.abs(y),eps,None);return 100*np.mean(np.abs((yhat-y)/den))

def rmse_np(y,yhat): y=_def_to_arr(y);yhat=_def_to_arr(yhat);return float(np.sqrt(np.mean((yhat-y)**2)))

rows=[]
for m in model_cols:
    y=cv_df["y"].values; yhat=cv_df[m].values
    rows.append({"model":m, "SMAPE":smape_np(y,yhat), "MAE":mae_np(y,yhat), "MAPE":mape_np(y,yhat), "RMSE":rmse_np(y,yhat), "n":len(cv_df)})
metrics_df = pd.DataFrame(rows)
metrics_path = os.path.join(tables_dir, "metrics.csv")
metrics_df.to_csv(metrics_path, index=False)
print(f"[info] 評価指標を保存: {metrics_path}\n{metrics_df}")

# 描画
if not cv_df.empty:
    try:
        last = cv_df["cutoff"].max()
        sub = cv_df[cv_df["cutoff"] == last]
        if sub["unique_id"].nunique() > 1:
            sub = sub[sub["unique_id"] == sub["unique_id"].iloc[0]]
        plt.figure(figsize=(12,5))
        plt.plot(sub["ds"], sub["y"], label="y", linewidth=2)
        for m in model_cols: plt.plot(sub["ds"], sub[m], label=m, linewidth=1)
        plt.title(f"Last CV Window @ cutoff={last}")
        plt.xlabel("ds"); plt.ylabel("value"); plt.legend(); plt.tight_layout()
        out_png = os.path.join(plots_dir, "last_window.png")
        plt.savefig(out_png); plt.close()
        print(f"[info] 予測可視化を保存: {out_png}")
    except Exception:
        print("[warn] 可視化に失敗しました（処理を継続します）。")

print(f"[done] 成果物ルート: {run_dir}")


Seed set to 1029
  _C._set_float32_matmul_precision(precision)
[I 2025-11-12 07:52:04,089] A new study created in memory with name: no-name-eca17002-db25-4948-a0be-f2d3f1773b4b


[info] CSV を読込中: /mnt/e/env/ts/datas/data/data_long/ft_normal/bingo5/by_unique_id/N1.csv
[info] loaded shape: (364, 162)
[info] exog sizes (raw): futr=34 hist=122 stat=2
[info] static_df: 非数値列をコード化 -> stat_ds_quarteryear
[info] static_df: 非数値列をコード化 -> stat_ds_month_lbl
[info] 選抜: hist=32/122 | futr=16/34 | stat=2
[cap] TFT exog -> F=True H=True S=True
[cap] PatchTST exog -> F=False H=False S=False
[info] 学習開始: trials=1, val_size=2, freq=D


  0%|          | 0/1 [00:00<?, ?it/s]

Seed set to 1
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


[cap] TFT exog -> F=True H=True S=True


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/az/miniconda3/envs/nc/lib/python3.11/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:231: Precision 16-mixed is not supported by the model summary.  Estimated model size in MB will not be accurate. Using 32 bits instead.

  | Name                    | Type                     | Params | Mode 
-----------------------------------------------------------------------------
0 | loss                    | SMAPE                    | 0      | train
1 | padder_train            | ConstantPad1d            | 0      | train
2 | scaler                  | TemporalNorm             | 0      | train
3 | embedding               | TFTEmbedding             | 26.1 K | train
4 | static_encoder          | StaticCovariateEncoder   | 1.8 M  | train
5 | temporal_encoder        | TemporalCovariateEncoder | 23.6 M | train
6 | temporal_fusion_decoder | TemporalFusionDecoder    | 1.1 M  | train
7 | output_adapter          | Linear               

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Seed set to 1
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/az/miniconda3/envs/nc/lib/python3.11/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:231: Precision 16-mixed is not supported by the model summary.  Estimated model size in MB will not be accurate. Using 32 bits instead.

  | Name                    | Type                     | Params | Mode 
-----------------------------------------------------------------------------
0 | loss                    | SMAPE                    | 0      | train
1 | padder_train            | ConstantPad1d            | 0      | train
2 | scaler                  | TemporalNorm             | 0      | train
3 | embedding               | TFTEmbedding             | 26.1 K | train
4 | static_encoder          | StaticCovariateEncoder   | 1.8 M  | train
5 | temporal_encoder 

[I 2025-11-12 07:52:22,974] Trial 0 finished with value: 1.6655426025390625 and parameters: {'input_size': 4, 'learning_rate': 0.002134570940152967, 'hidden_size': 256, 'n_head': 2, 'dropout': 0.14874923353338593, 'batch_size': 128}. Best is trial 0 with value: 1.6655426025390625.


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[I 2025-11-12 07:52:41,127] A new study created in memory with name: no-name-4a596da5-8dec-41cd-81b4-64e3e4b14a90


  0%|          | 0/1 [00:00<?, ?it/s]

Seed set to 1
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/az/miniconda3/envs/nc/lib/python3.11/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:231: Precision 16-mixed is not supported by the model summary.  Estimated model size in MB will not be accurate. Using 32 bits instead.

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | SMAPE             | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 399 K  | train
-----------------------------------------------------------
399 K     Trainable params
3         Non-trainable params
399 K     Total params
1.600     Total estimated model params size (MB)
90        M

[cap] PatchTST exog -> F=False H=False S=False


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Seed set to 1
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/az/miniconda3/envs/nc/lib/python3.11/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:231: Precision 16-mixed is not supported by the model summary.  Estimated model size in MB will not be accurate. Using 32 bits instead.

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | SMAPE             | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 399 K  | train
-----------------------------------------------------------
399 K     Trainable params
3         Non-trainable params
399 K     Total params
1.600     Total estimated model params size (MB)
90        M

[I 2025-11-12 07:52:43,731] Trial 0 finished with value: 1.095440149307251 and parameters: {'input_size': 4, 'd_model': 64, 'n_heads': 2, 'd_ff': 256, 'patch_len': 16, 'stride': 16, 'dropout': 0.0374688791481127, 'learning_rate': 0.002440608669830101, 'batch_size': 64}. Best is trial 0 with value: 1.095440149307251.


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Seed set to 1
Seed set to 1
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/az/miniconda3/envs/nc/lib/python3.11/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:231: Precision 16-mixed is not supported by the model summary.  Estimated model size in MB will not be accurate. Using 32 bits instead.

  | Name                    | Type                     | Params | Mode 
-----------------------------------------------------------------------------
0 | loss                    | SMAPE                    | 0      | train
1 | padder_train            | ConstantPad1d            | 0      | train
2 | scaler                  | TemporalNorm             | 0      | train
3 | embedding               | TFTEmbedding             | 26.1 K | train
4 | static_encoder          | StaticCovariateEncoder   | 1.8 M  | train
5 | tem

[info] 学習完了
[info] モデルを保存しました: nf_auto_runs/run_20251112-075246/models
[info] モデルをロードしました。
[info] 交差検証（Rolling Origin）を実行します...


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Using 16bit Automatic Mixed Precision (AMP)
Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | SMAPE             | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 399 K  | train
-----------------------------------------------------------
399 K     Trainable params
3         Non-trainable params
399 K     Total params
1.600     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Using 16bit Automatic Mixed Precision (AMP)
Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

[info] CV 予測テーブルを保存: nf_auto_runs/run_20251112-075246/tables/cv_predictions.csv
[info] 評価指標を保存: nf_auto_runs/run_20251112-075246/tables/metrics.csv
          model      SMAPE       MAE       MAPE      RMSE  n
0       AutoTFT        NaN       NaN        NaN       NaN  3
1  AutoPatchTST  25.273531  1.123584  31.792946  1.388669  3
[info] 予測可視化を保存: nf_auto_runs/run_20251112-075246/plots/last_window.png
[done] 成果物ルート: nf_auto_runs/run_20251112-075246


In [5]:
# === モデルのプロパティ値を安全に一覧表示するユーティリティ ===
import inspect
import numpy as np
import pandas as pd

try:
    import torch
except Exception:
    torch = None  # torch が無い環境でも動くように

SIMPLE_TYPES = (str, int, float, bool, type(None))

def _summarize_value(v, maxlen=200):
    """値を安全・短く要約して表示用に整形"""
    try:
        # 素朴な型はそのまま（長すぎる文字列は詰める）
        if isinstance(v, SIMPLE_TYPES):
            s = str(v)
            return (s[:maxlen] + "…") if len(s) > maxlen else s

        # numpy
        if isinstance(v, np.ndarray):
            return f"np.ndarray(shape={v.shape}, dtype={v.dtype})"

        # pandas
        if isinstance(v, pd.DataFrame):
            return f"DataFrame(shape={v.shape}, columns={list(v.columns)[:6]}{'…' if v.shape[1]>6 else ''})"
        if isinstance(v, pd.Series):
            return f"Series(len={len(v)}, name={v.name}, dtype={v.dtype})"

        # torch
        if torch is not None:
            if isinstance(v, torch.Tensor):
                dev = v.device if hasattr(v, 'device') else 'cpu'
                return f"torch.Tensor(shape={tuple(v.shape)}, dtype={v.dtype}, device={dev})"
            if hasattr(v, "__class__") and v.__class__.__module__.startswith("torch.optim"):
                return f"{v.__class__.__name__}(param_groups={len(getattr(v,'param_groups',[]))})"

        # リスト／タプル／セット／辞書はサイズだけ
        if isinstance(v, (list, tuple, set)):
            return f"{type(v).__name__}(len={len(v)})"
        if isinstance(v, dict):
            keys = list(v.keys())[:8]
            return f"dict(len={len(v)}, keys={keys}{'…' if len(v)>8 else ''})"

        # その他のオブジェクトは型名＋一部repr
        r = repr(v)
        r = r.replace("\n", " ")
        if len(r) > maxlen:
            r = r[:maxlen] + "…"
        return f"{v.__class__.__name__}: {r}"
    except Exception as e:
        return f"<summarize_error: {e}>"

def collect_properties(obj, include_private=False):
    """
    obj の「公開属性（__dict__）＋ @property を含む」名前→値の辞書を作る。
    値の取得で例外が出ても飲み込んで続行。
    """
    seen = set()
    out = {}

    # 1) 実インスタンス属性
    for k, v in getattr(obj, "__dict__", {}).items():
        if not include_private and k.startswith("_"):
            continue
        seen.add(k)
        out[k] = v

    # 2) @property で定義された属性
    try:
        for name, member in inspect.getmembers(type(obj)):
            if not isinstance(member, property):
                continue
            if not include_private and name.startswith("_"):
                continue
            if name in seen:
                continue
            try:
                out[name] = getattr(obj, name)
            except Exception as e:
                out[name] = f"<property_error: {e}>"
    except Exception:
        pass

    # 3) dir で拾えるその他の公開属性（callable を除く）
    for name in dir(obj):
        if not include_private and name.startswith("_"):
            continue
        if name in seen:
            continue
        try:
            val = getattr(obj, name)
        except Exception as e:
            val = f"<attr_error: {e}>"
        # 関数やメソッドは除外
        if inspect.ismethod(val) or inspect.isfunction(val):
            continue
        out[name] = val

    return out

def print_properties(obj, title=None, sort=True, include_private=False):
    """プロパティを表形式でプリント（名前 / 型 / 要約）"""
    if title:
        print("="*len(title))
        print(title)
        print("="*len(title))
    props = collect_properties(obj, include_private=include_private)
    items = list(props.items())
    if sort:
        items.sort(key=lambda kv: kv[0])

    name_w = max(8, min(40, max((len(k) for k,_ in items), default=8)))
    type_w = 24

    header = f"{'name'.ljust(name_w)}  {'type'.ljust(type_w)}  value"
    print(header)
    print("-"*len(header))
    for k, v in items:
        t = type(v).__name__
        s = _summarize_value(v)
        print(f"{k.ljust(name_w)}  {t.ljust(type_w)}  {s}")

def print_nf_models_properties(nf):
    """
    NeuralForecast インスタンスから各モデルを辿って表示。
    Auto系モデルの場合は、チューニング後の実体（m.model）があればそれも併せて表示。
    """
    for i, m in enumerate(getattr(nf, "models", []), start=1):
        print_properties(m, title=f"[Model {i}] {m.__class__.__name__}")
        # AutoTFT/AutoPatchTST などは、最適化後に .model に実体が入る
        tuned = getattr(m, "model", None)
        if tuned is not None and tuned is not m:
            print_properties(tuned, title=f"[Model {i}] tuned -> {tuned.__class__.__name__}")

        # よく見る追加情報（存在すれば）
        trkw = getattr(m, "trainer_kwargs", None)
        if trkw is not None:
            print_properties(trkw, title=f"[Model {i}] trainer_kwargs (dict)")
        futr = getattr(m, "futr_exog_list", None)
        hist = getattr(m, "hist_exog_list", None)
        stat = getattr(m, "stat_exog_list", None)
        if any(x is not None for x in (futr, hist, stat)):
            print("\n[exog lists]")
            if futr is not None: print(f"  futr_exog_list: {futr}")
            if hist is not None: print(f"  hist_exog_list: {hist}")
            if stat is not None: print(f"  stat_exog_list: {stat}")
        print("\n")

# === 使い方（学習後 or ロード後）===
# 例: nf = NeuralForecast(models=[...], ...)
# nf.fit(...)

# 全モデルのプロパティ一覧を表示
print_nf_models_properties(nf)

# 単一モデルだけ見たい場合:
# print_properties(nf.models[0], title="First model")


[Model 1] AutoTFT
name                                      type                      value
-------------------------------------------------------------------------
CHECKPOINT_HYPER_PARAMS_KEY               str                       hyper_parameters
CHECKPOINT_HYPER_PARAMS_NAME              str                       hparams_name
CHECKPOINT_HYPER_PARAMS_TYPE              str                       hparams_type
EXOGENOUS_FUTR                            bool                      True
EXOGENOUS_HIST                            bool                      True
EXOGENOUS_STAT                            bool                      True
MULTIVARIATE                              bool                      False
RECURRENT                                 bool                      False
T_destination                             TypeVar                   TypeVar: ~T_destination
alias                                     NoneType                  None
allow_zero_length_dataloader_with_multiple_devices  boo

In [4]:
# %% [markdown]
# 依存（未インストールなら実行）
# !pip install cloudscraper beautifulsoup4 lxml
# （任意）きれいな進捗バーを使う場合
# !pip install tqdm

# %%
from __future__ import annotations

import re
import json
import time
import datetime as dt
from pathlib import Path
from typing import Iterable, Optional, Any
from urllib.parse import urljoin, urlparse

import requests  # 例外型で使用
from bs4 import BeautifulSoup, Tag
from collections import defaultdict

# ====== 基本設定 ======
UA = (
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
    "(KHTML, like Gecko) Chrome/126.0 Safari/537.36"
)

# -----------------------------------------------------------------------------
# ユーティリティ（ログ・プログレスバー）
# -----------------------------------------------------------------------------
def _now_str() -> str:
    return dt.datetime.now().strftime("%H:%M:%S")

def _log(msg: str) -> None:
    print(f"[{_now_str()}] {msg}", flush=True)

class _Progress:
    """tqdm があれば使い、無ければ簡易プログレスバーにフォールバック"""
    def __init__(self, total: int, desc: str = "進捗", disable: bool = False):
        self.total = max(0, int(total))
        self.desc = desc
        self.disable = disable
        self.count = 0
        self._use_tqdm = False
        self._tqdm = None
        if not disable and self.total > 0:
            try:
                from tqdm import tqdm  # type: ignore
                self._use_tqdm = True
                self._tqdm = tqdm(total=self.total, desc=self.desc, ncols=80)
            except Exception:
                self._use_tqdm = False
                print(f"{self.desc} 0/{self.total} [{' ' * 40}] 0%", end="\r", flush=True)

    def update(self, n: int = 1, postfix: Optional[str] = None) -> None:
        if self.disable or self.total == 0:
            return
        if self._use_tqdm:
            if postfix:
                try:
                    self._tqdm.set_postfix_str(postfix)  # type: ignore
                except Exception:
                    pass
            self._tqdm.update(n)  # type: ignore
        else:
            self.count += n
            filled = int(40 * self.count / self.total)
            bar = "#" * filled + "-" * (40 - filled)
            pct = int(100 * self.count / self.total)
            tail = f" {postfix}" if postfix else ""
            print(f"{self.desc} {self.count}/{self.total} [{bar}] {pct}%{tail}   ", end="\r", flush=True)

    def close(self) -> None:
        if self.disable or self.total == 0:
            return
        if self._use_tqdm:
            self._tqdm.close()  # type: ignore
        else:
            print()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc, tb):
        self.close()

# -----------------------------------------------------------------------------
# フェッチャ（cloudscraper を常用・meta refresh 追従・Jina フォールバック）
# -----------------------------------------------------------------------------
def _build_scraper():
    try:
        import cloudscraper  # 必須
    except ImportError as e:
        raise RuntimeError(
            "cloudscraper が見つかりません。先に `pip install cloudscraper` を実行してください。"
        ) from e

    s = cloudscraper.create_scraper(browser={'custom': UA})
    s.headers.update({
        "User-Agent": UA,
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "Accept-Language": "ja,en;q=0.9",
        "Cache-Control": "no-cache",
        "Pragma": "no-cache",
        "Connection": "keep-alive",
        "Upgrade-Insecure-Requests": "1",
    })
    return s

_SCRAPER = _build_scraper()

def _meta_refresh_target(html: str, base_url: str) -> str | None:
    soup = BeautifulSoup(html, "lxml")
    tag = soup.find("meta", attrs={"http-equiv": re.compile("^refresh$", re.I)})
    if tag and tag.get("content"):
        m = re.search(r"url\s*=\s*([^;]+)", tag["content"], flags=re.I)
        if m:
            return urljoin(base_url, m.group(1).strip('\'" '))
    a = soup.find("a", string=re.compile(r"(click here|こちら|ここ)", re.I))
    if a and a.get("href"):
        return urljoin(base_url, a["href"])
    return None

def _fetch_html(url: str, timeout: int = 20, referer: str | None = None) -> str:
    headers = {"Referer": referer} if referer else {}
    r = _SCRAPER.get(url, timeout=timeout, allow_redirects=True, headers=headers)
    r.encoding = r.apparent_encoding or r.encoding or "utf-8"
    r.raise_for_status()
    html = r.text
    nxt = _meta_refresh_target(html, url)
    if nxt and nxt != url:
        _log(f"[cloudscraper固定] meta refresh を検出: {url} -> {nxt}")
        r2 = _SCRAPER.get(nxt, timeout=timeout, allow_redirects=True, headers={"Referer": url})
        r2.encoding = r2.apparent_encoding or r2.encoding or "utf-8"
        r2.raise_for_status()
        return r2.text
    return html

def _fetch_html_with_fallbacks(url: str, timeout: int = 20) -> str:
    """常時 cloudscraper。失敗時のみ Jina Reader へフォールバック（#は落とす）。"""
    def _jina_reader(u: str) -> str:
        base = u.split("#", 1)[0]
        jurl = f"https://r.jina.ai/http://{base.replace('https://','').replace('http://','')}"
        _log(f"[cloudscraper固定] Jina Reader フォールバック: {jurl}")
        rj = _SCRAPER.get(jurl, timeout=timeout)
        rj.raise_for_status()
        return rj.text

    try:
        return _fetch_html(url, timeout=timeout)
    except requests.HTTPError as e:
        code = getattr(e.response, "status_code", None)
        _log(f"[cloudscraper固定] HTTPエラー {code} を検出: {url}")
        return _jina_reader(url)
    except requests.RequestException as e:
        _log(f"[cloudscraper固定] ネットワーク系エラー: {url} -> {e!r}")
        return _jina_reader(url)

# -----------------------------------------------------------------------------
# クリーニング・整形
# -----------------------------------------------------------------------------
def _escape_html(s: str) -> str:
    return (
        s.replace("&", "&amp;")
        .replace("<", "&lt;")
        .replace(">", "&gt;")
        .replace('"', "&quot;")
    )

def _slugify(text: str, fallback: str = "page") -> str:
    text = re.sub(r"\s+", "-", text.strip())
    text = re.sub(r"[^\w\-]+", "", text, flags=re.U)
    text = re.sub(r"-{2,}", "-", text).strip("-")
    return text or fallback

def _derive_filename(url: str, title: str, fmt: str) -> str:
    base = _slugify(title)[:60] or _slugify(urlparse(url).netloc, "page")
    ext = {"html":"html","md":"md","markdown":"md","txt":"txt","text":"txt","json":"json"}[fmt]
    return f"{base}.{ext}"

def _clean_soup(soup: BeautifulSoup) -> None:
    for tag in soup(["script", "style", "noscript", "iframe", "template"]):
        tag.decompose()
    for tag in soup.find_all(["nav", "footer", "form", "aside"]):
        tag.decompose()

def _absolutize_urls(soup: BeautifulSoup, base_url: str) -> None:
    attrs = [("a","href"),("img","src"),("link","href"),("script","src"),("source","src"),("video","src"),("audio","src")]
    for name, attr in attrs:
        for el in soup.find_all(name):
            val = el.get(attr)
            if val and not val.startswith(("data:", "mailto:", "tel:", "#")):
                el[attr] = urljoin(base_url, val)
    for img in soup.find_all("img"):
        for key in ["data-src","data-original","data-lazy-src"]:
            if not img.get("src") and img.get(key):
                img["src"] = urljoin(base_url, img[key])

def _text_len(node: Tag) -> int:
    return len(node.get_text(separator=" ", strip=True))

def _pick_main_content(soup: BeautifulSoup) -> Tag:
    cands = soup.find_all("article")
    if cands:
        return max(cands, key=_text_len)
    patterns = [
        {"attrs": {"role": "main"}},
        {"attrs": {"id": re.compile(r"\bmain\b", re.I)}},
        {"attrs": {"class": re.compile(r"\b(main|content|article|entry|bd-article)\b", re.I)}},
    ]
    for patt in patterns:
        found = soup.find_all(["main","div","section"], **patt)
        if found:
            return max(found, key=_text_len)
    blocks = soup.find_all(["main","section","div"])
    best, score = None, -1
    for b in blocks:
        p_count = len(b.find_all("p"))
        sc = p_count * 100 + _text_len(b)
        if sc > score:
            best, score = b, sc
    return best or soup.body or soup

def _extract_metadata(soup: BeautifulSoup, url: str) -> dict:
    title = (soup.title.string.strip() if soup.title and soup.title.string else "") or url
    ogt = soup.find("meta", property="og:title")
    if ogt and ogt.get("content"):
        title = ogt["content"].strip()
    desc = ""
    for name in ["description","og:description","twitter:description"]:
        tag = soup.find("meta", attrs={"name": name}) or soup.find("meta", property=name)
        if tag and tag.get("content"):
            desc = tag["content"].strip()
            break
    pub = ""
    for cand in [{"property":"article:published_time"},{"itemprop":"datePublished"},{"name":"pubdate"},{"property":"og:updated_time"}]:
        m = soup.find("meta", attrs=cand)
        if m and m.get("content"):
            pub = m["content"].strip()
            break
    return {"title": title, "description": desc, "published": pub, "url": url}

def _build_html_doc(meta: dict, base_url: str, content_html: str) -> str:
    now = dt.datetime.now().strftime("%Y-%m-%d %H:%M")
    style = """
    body{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Helvetica,Arial,'Hiragino Kaku Gothic ProN','Noto Sans JP','Yu Gothic',sans-serif;line-height:1.8;color:#1f2937;background:#fff;margin:0;padding:2rem;}
    .container{max-width:950px;margin:0 auto;}
    header{margin-bottom:2rem;border-bottom:1px solid #e5e7eb;padding-bottom:1rem;}
    h1{font-size:1.8rem;line-height:1.3;margin:0 0 .5rem 0;}
    .meta{color:#6b7280;font-size:.9rem;margin:.5rem 0;}
    article img{max-width:100%;height:auto;}
    article pre{background:#0b1020;color:#e6e6e6;padding:1rem;border-radius:10px;overflow:auto;}
    article code{background:#f2f4f8;padding:.15rem .35rem;border-radius:.35rem;}
    article{font-size:1.05rem;}
    article p{margin:1rem 0;}
    footer{margin-top:2rem;color:#6b7280;font-size:.85rem;border-top:1px solid #e5e7eb;padding-top:1rem;}
    a{color:#2563eb;text-decoration:none} a:hover{text-decoration:underline}
    """
    desc = meta.get("description") or ""
    pub = meta.get("published") or ""
    html = f"""<!doctype html>
<html lang="ja"><head><meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>{_escape_html(meta.get('title',''))}</title>
<meta name="description" content="{_escape_html(desc)}">
<base href="{base_url}">
<style>{style}</style></head>
<body><div class="container">
<header>
  <h1>{_escape_html(meta.get('title',''))}</h1>
  <div class="meta">
    元URL: <a href="{base_url}">{base_url}</a><br>
    抽出日時: {now}{f"<br>公開/更新: {_escape_html(pub)}" if pub else ""}
  </div>
  {f"<p>{_escape_html(desc)}</p>" if desc else ""}
</header>
<article>
  {content_html}
</article>
<footer>このHTMLは元ページの主要部分を自動抽出して生成されています。</footer>
</div></body></html>"""
    return html

# -----------------------------------------------------------------------------
# Markdown / テキスト化
# -----------------------------------------------------------------------------
def _node_to_md(node: Tag) -> str:
    if isinstance(node, str):
        return node
    pieces = []
    name = node.name.lower() if hasattr(node, "name") and node.name else ""

    def children_text(n):
        return "".join(_node_to_md(c) for c in n.children)

    if name in [f"h{i}" for i in range(1,7)]:
        level = int(name[1])
        pieces.append("#"*level + " " + children_text(node) + "\n\n")
    elif name == "p":
        pieces.append(children_text(node).strip() + "\n\n")
    elif name == "br":
        pieces.append("  \n")
    elif name in ("strong","b"):
        pieces.append("**" + children_text(node) + "**")
    elif name in ("em","i"):
        pieces.append("*" + children_text(node) + "*")
    elif name == "a":
        href = node.get("href") or ""
        text = children_text(node).strip() or href
        pieces.append(f"[{text}]({href})")
    elif name == "img":
        alt = node.get("alt") or ""
        src = node.get("src") or ""
        pieces.append(f"![{alt}]({src})")
    elif name in ("ul","ol"):
        ordered = (name == "ol")
        for i, li in enumerate(node.find_all("li", recursive=False), start=1):
            mark = f"{i}. " if ordered else "- "
            body = "".join(_node_to_md(c) for c in li.children).strip()
            pieces.append(mark + body + "\n")
        pieces.append("\n")
    elif name in ("pre",):
        code = node.get_text()
        pieces.append("```\n" + code.rstrip() + "\n```\n\n")
    elif name == "code":
        pieces.append("`" + node.get_text() + "`")
    elif name == "blockquote":
        lines = node.get_text().splitlines()
        pieces.append("> " + "\n> ".join(l for l in lines if l.strip()) + "\n\n")
    else:
        pieces.append(children_text(node))
    return "".join(pieces)

def _html_to_text(html: str) -> str:
    soup = BeautifulSoup(html, "lxml")
    return soup.get_text(separator="\n", strip=True)

# -----------------------------------------------------------------------------
# 単一URL 抽出・保存
# -----------------------------------------------------------------------------
def extract_and_save(
    url: str,
    selectors: list[str] | None = None,
    fmt: str = "html",
    output: str | Path | None = None,
    return_data: bool = False,
) -> Path | tuple[Path, dict]:
    _log(f"開始: {url}")
    raw = _fetch_html_with_fallbacks(url)
    soup = BeautifulSoup(raw, "lxml")
    _clean_soup(soup)
    _absolutize_urls(soup, url)

    node = None
    if selectors:
        for sel in selectors:
            hit = soup.select_one(sel)
            if hit:
                node = hit
                break
    if node is None:
        node = _pick_main_content(soup)

    for p in node.find_all("p"):
        if not p.get_text(strip=True) and not p.find("img"):
            p.decompose()

    meta = _extract_metadata(soup, url)
    content_html = str(node)

    fmt_key = fmt.lower()
    assert fmt_key in {"html","md","markdown","txt","text","json"}, "fmt は html/md/txt/json のいずれか"

    if fmt_key == "html":
        doc = _build_html_doc(meta, url, content_html)
        data_bytes = doc.encode("utf-8")
    elif fmt_key in {"md","markdown"}:
        md = _node_to_md(BeautifulSoup(content_html, "lxml"))
        header = (
            f"# {meta['title']}\n\n"
            f"- 元URL: {meta['url']}\n"
            + (f"- 公開/更新: {meta['published']}\n" if meta.get("published") else "")
            + ("\n" if meta.get("description") else "")
        )
        if meta.get("description"):
            header += meta["description"] + "\n\n"
        data_bytes = (header + md).encode("utf-8")
    elif fmt_key in {"txt","text"}:
        txt = _html_to_text(content_html)
        header = f"{meta['title']}\n{meta['url']}\n"
        if meta.get("published"):
            header += f"{meta['published']}\n"
        if meta.get("description"):
            header += f"{meta['description']}\n"
        header += "\n"
        data_bytes = (header + txt).encode("utf-8")
    else:
        payload = {
            "meta": meta,
            "content_html": content_html,
            "content_text": _html_to_text(content_html),
            "extracted_at": dt.datetime.now().isoformat(timespec="seconds"),
        }
        data_bytes = json.dumps(payload, ensure_ascii=False, indent=2).encode("utf-8")

    out_path = Path(output) if output else Path(_derive_filename(url, meta["title"], fmt_key))
    out_path.write_bytes(data_bytes)
    _log(f"完了: {url} -> {out_path}")
    if return_data:
        return out_path, {"meta": meta, "content_html": content_html}
    return out_path

# -----------------------------------------------------------------------------
# 複数URL 結合保存（重複除去・1秒スリープ・進捗バー・詳細ログ・堅牢化）
# -----------------------------------------------------------------------------
def _dedupe_and_normalize_urls(lines: Iterable[str]) -> tuple[list[str], list[str]]:
    seen: set[str] = set()
    unique, dups = [], []
    for ln in lines:
        ln = str(ln).strip()
        if not ln:
            continue
        url = ln.split()[0]
        if url in seen:
            if url not in dups:
                dups.append(url)
            continue
        seen.add(url)
        unique.append(url)
    return unique, dups

def _extract_core(url: str, selectors: list[str] | None = None, attempts: int = 2) -> dict:
    last_err: Optional[Exception] = None
    for k in range(1, attempts + 1):
        try:
            _log(f"[{k}/{attempts}] 取得開始: {url}")
            raw = _fetch_html_with_fallbacks(url)
            soup = BeautifulSoup(raw, "lxml")
            _clean_soup(soup)
            _absolutize_urls(soup, url)

            node = None
            if selectors:
                for sel in selectors:
                    hit = soup.select_one(sel)
                    if hit:
                        node = hit
                        break
            if node is None:
                node = _pick_main_content(soup)

            for p in node.find_all("p"):
                if not p.get_text(strip=True) and not p.find("img"):
                    p.decompose()

            meta = _extract_metadata(soup, url)
            content_html = str(node)
            _log(f"取得成功: {url}（タイトル: {meta.get('title','')[:60]}）")
            return {"meta": meta, "content_html": content_html}
        except Exception as e:
            last_err = e
            _log(f"取得失敗（試行{k}/{attempts}）: {url} -> {e!r}")
            time.sleep(1.0)  # バックオフ
    return {
        "meta": {"title": f"[取得失敗] {url}", "description": str(last_err or "unknown error"), "published": "", "url": url},
        "content_html": f"<p>エラー: {_escape_html(str(last_err or 'unknown error'))}</p>",
    }

def extract_many_and_save(
    urls: list[str] | str,
    selectors: list[str] | None = None,
    fmt: str = "md",
    output: str | Path = "combined.md",
    title: str = "まとめ",
    announce_duplicates: bool = True,
    sleep_seconds: float = 1.0,
    show_progress: bool = True,
) -> Path:
    if isinstance(urls, str):
        unique_urls, dup_urls = _dedupe_and_normalize_urls(urls.splitlines())
    else:
        unique_urls, dup_urls = _dedupe_and_normalize_urls(urls)

    if announce_duplicates and dup_urls:
        _log(f"[dedupe] 重複URLを削除: {len(dup_urls)}件 -> " + ", ".join(dup_urls))

    errors = []
    items = []
    _log(f"合計 {len(unique_urls)} 件を処理します -> 出力先: {output}")
    with _Progress(total=len(unique_urls), desc="全体進捗", disable=not show_progress) as prog:
        for idx, u in enumerate(unique_urls, start=1):
            if sleep_seconds and idx > 1:
                time.sleep(float(sleep_seconds))
            try:
                it = _extract_core(u, selectors)
                if it["meta"]["title"].startswith("[取得失敗]"):
                    errors.append((u, it["meta"].get("description", "")))
                items.append(it)
            except Exception as e:
                errors.append((u, repr(e)))
                items.append({
                    "meta": {"title": f"[取得失敗] {u}", "description": repr(e), "published": "", "url": u},
                    "content_html": f"<p>エラー: {_escape_html(repr(e))}</p>",
                })
            prog.update(1, postfix=f"{idx}/{len(unique_urls)}")

    now = dt.datetime.now().strftime("%Y-%m-%d %H:%M")
    fmt_key = fmt.lower()
    assert fmt_key in {"md","html","txt","json"}, "fmt は md/html/txt/json のいずれか"

    _log(f"書き出し開始: {output}（形式: {fmt_key}, 件数: {len(items)}）")
    if fmt_key == "md":
        parts = [f"# {title}\n\n- 抽出日時: {now}\n- 件数: {len(items)}\n\n## 目次\n"]
        for i, it in enumerate(items, 1):
            t = it["meta"].get("title") or it["meta"]["url"]
            slug = _slugify(t)
            parts.append(f"{i}. [{t}](#{slug})\n")
        parts.append("\n")
        for it in items:
            meta = it["meta"]
            t = meta.get("title") or meta["url"]
            slug = _slugify(t)
            parts.append("\n---\n\n")
            parts.append(f"## {t}\n<a id=\"{slug}\"></a>\n\n")
            parts.append(f"- 元URL: {meta['url']}\n")
            if meta.get("published"):
                parts.append(f"- 公開/更新: {meta['published']}\n")
            if meta.get("description"):
                parts.append(f"\n{meta['description']}\n")
            md = _node_to_md(BeautifulSoup(it["content_html"], "lxml"))
            parts.append("\n" + md.strip() + "\n")
        Path(output).write_bytes("".join(parts).encode("utf-8"))

    elif fmt_key == "html":
        style = """
        body{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Helvetica,Arial,'Hiragino Kaku Gothic ProN','Noto Sans JP','Yu Gothic',sans-serif;line-height:1.8;color:#1f2937;background:#fff;margin:0;padding:2rem;}
        .container{max-width:1000px;margin:0 auto;}
        header{margin-bottom:2rem;border-bottom:1px solid #e5e7eb;padding-bottom:1rem;}
        h1{font-size:1.9rem;margin:0 0 .25rem;}
        nav ul{padding-left:1rem;}
        article img{max-width:100%;height:auto;}
        article pre{background:#0b1020;color:#e6e6e6;padding:1rem;border-radius:10px;overflow:auto;}
        article code{background:#f2f4f8;padding:.15rem .35rem;border-radius:.35rem;}
        section{margin:2.5rem 0;}
        """
        toc, body = [], []
        for it in items:
            meta = it["meta"]
            t = _escape_html(meta.get("title") or meta["url"])
            slug = _slugify(t)
            toc.append(f'<li><a href="#{slug}">{t}</a></li>')
            desc_html = f'<p class="desc">{_escape_html(meta["description"])}</p>' if meta.get("description") else ""
            pub_html  = f'<div class="meta">公開/更新: {_escape_html(meta["published"])}</div>' if meta.get("published") else ""
            body.append(
                f'<section id="{slug}">'
                f"<h2>{t}</h2>"
                f'<div class="meta">元URL: <a href="{meta["url"]}">{meta["url"]}</a></div>'
                f"{pub_html}{desc_html}"
                f'<article>{it["content_html"]}</article>'
                f"</section>"
            )
        html = f"""<!doctype html>
<html lang="ja"><head><meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>{_escape_html(title)}</title>
<style>{style}</style></head>
<body><div class="container">
<header>
  <h1>{_escape_html(title)}</h1>
  <div class="meta">抽出日時: {now}・件数: {len(items)}</div>
  <nav><h3>目次</h3><ul>{"".join(toc)}</ul></nav>
</header>
{"".join(body)}
</div></body></html>"""
        Path(output).write_bytes(html.encode("utf-8"))

    elif fmt_key == "txt":
        lines = [f"{title}\n抽出日時: {now}\n件数: {len(items)}\n"]
        for it in items:
            meta = it["meta"]
            lines.append("\n" + "="*80)
            lines.append(f"\n{meta.get('title') or meta['url']}\n{meta['url']}")
            if meta.get("published"):
                lines.append(meta["published"])
            if meta.get("description"):
                lines.append(meta["description"])
            text = _html_to_text(it["content_html"])
            lines.append("\n" + text)
        Path(output).write_bytes("\n".join(lines).encode("utf-8"))

    else:  # json
        payload = {
            "title": title,
            "extracted_at": now,
            "count": len(items),
            "items": [
                {"meta": it["meta"], "content_html": it["content_html"], "content_text": _html_to_text(it["content_html"])}
                for it in items
            ],
        }
        Path(output).write_bytes(json.dumps(payload, ensure_ascii=False, indent=2).encode("utf-8"))

    _log(f"書き出し完了: {output}")
    if errors:
        _log(f"⚠ 取得に失敗したURL: {len(errors)}件")
        for u, e in errors:
            _log(f"  - {u} -> {e}")
    else:
        _log("すべてのURLを正常に取得しました。")
    return Path(output)

# -----------------------------------------------------------------------------
# グループ化（ドメイン別など）→ 別ファイル保存（進捗バー・詳細ログ付）
# -----------------------------------------------------------------------------
def group_by_domain(
    urls: list[str],
    domain_alias: dict[str, str] | None = None,
    drop_fragments: bool = False,
) -> dict[str, list[str]]:
    groups: dict[str, list[str]] = defaultdict(list)
    for u in urls:
        u_norm = u.split("#", 1)[0] if drop_fragments else u
        dom = urlparse(u_norm).netloc
        name = domain_alias.get(dom, dom) if domain_alias else dom
        groups[name].append(u_norm)
    return dict(groups)

def extract_grouped_and_save(
    group_map: dict[str, list[str]] | list[str],
    selectors: list[str] | None = None,
    fmt: str = "md",
    output_dir: str | Path = "bundles",
    filename_template: str = "{index:02d}_{name}.{ext}",
    title_template: str = "{name} まとめ",
    announce_duplicates: bool = True,
    drop_fragments: bool = False,
    sleep_seconds: float = 1.0,
    show_progress: bool = True,
) -> dict[str, Path]:
    """
    group_map が dict の場合: {グループ名: [URL, ...]}
    group_map が list の場合: ドメインで自動グループ化
    """
    out_dir = Path(output_dir)
    out_dir.mkdir(parents=True, exist_ok=True)

    if isinstance(group_map, list):
        group_map = group_by_domain(group_map, drop_fragments=drop_fragments)

    # 全体件数（全グループ合計）を数えて、上位のプログレスバーを表示
    all_urls = []
    for _, us in group_map.items():
        if drop_fragments:
            us = [u.split("#",1)[0] for u in us]
        all_urls.extend(us)
    unique_all, dup_all = _dedupe_and_normalize_urls(all_urls)
    if announce_duplicates and dup_all:
        _log(f"[dedupe] （全体）重複URLを削除: {len(dup_all)}件 -> " + ", ".join(dup_all))

    results: dict[str, Path] = {}
    ext = {"md":"md","html":"html","txt":"txt","json":"json"}[fmt]
    processed = 0
    total = len(unique_all)
    _log(f"グループ処理を開始（合計URL: {total}, 出力先: {out_dir}）")

    with _Progress(total=total, desc="全体進捗（グループ）", disable=not show_progress) as gprog:
        for i, (name, urls) in enumerate(group_map.items(), start=1):
            # グループ内のアンカー処理
            if drop_fragments:
                urls = [u.split("#", 1)[0] for u in urls]

            # グループ内の重複除去
            urls, dups = _dedupe_and_normalize_urls(urls)
            if announce_duplicates and dups:
                _log(f"[{name}] 重複URLを削除: {len(dups)}件 -> " + ", ".join(dups))

            fname = filename_template.format(index=i, name=_slugify(name), ext=ext)
            title = title_template.format(name=name, index=i)
            _log(f"--- グループ開始: {name}（{len(urls)}件） -> {fname}")

            before = processed
            p = extract_many_and_save(
                urls,
                selectors=selectors,
                fmt=fmt,
                output=str(out_dir / fname),
                title=title,
                announce_duplicates=False,
                sleep_seconds=sleep_seconds,
                show_progress=False,
            )
            results[name] = p
            processed += len(urls)
            gprog.update(len(urls), postfix=f"{processed}/{total}")
            _log(f"--- グループ完了: {name}（{processed-before}件処理）")

    _log("すべてのグループ処理が完了しました。")
    return results


In [5]:
URLS = [
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#1-automatic-forecasting',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#a-rnn-based',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autornn',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autolstm',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autogru',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autotcn',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autodeepar',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autodilatedrnn',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autobitcn',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#b-mlp-based',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#automlp',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autonbeats',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autonbeatsx',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autonhits',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autodlinear',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autonlinear',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autotide',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autodeepnpts',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#c-kan-based',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autokan',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#d-transformer-based',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autotft',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autovanillatransformer',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autoinformer',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autoautoformer',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autofedformer',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autopatchtst',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autoitransformer',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autotimexer',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#e-cnn-based',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autotimesnet',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#f-multivariate',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autostemgnn',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autohint',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autotsmixer',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autotsmixerx',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#automlpmultivariate',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autosofts',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autotimemixer',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#autormok',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html#tests',
'https://nixtla.github.io/neuralforecast/losses.pytorch.html',
'https://docs.ray.io/en/latest/tune/api_docs/suggestion.html',
'https://optuna.readthedocs.io/en/stable/reference/samplers/index.html',
'https://docs.ray.io/en/latest/tune/tutorials/tune-metrics.html',
'https://optuna.readthedocs.io/en/stable/tutorial/20_recipes/007_optuna_callback.html',
'https://nixtlaverse.nixtla.io/neuralforecast/core.html',
'https://nixtlaverse.nixtla.io/neuralforecast/models.rmok.html',
'https://nixtlaverse.nixtla.io/neuralforecast/models.rnn.html',
'https://nixtlaverse.nixtla.io/neuralforecast/models.softs.html',
'https://nixtlaverse.nixtla.io/neuralforecast/models.stemgnn.html',
'https://nixtlaverse.nixtla.io/neuralforecast/models.tcn.html',
'https://nixtlaverse.nixtla.io/neuralforecast/models.tft.html',
'https://nixtlaverse.nixtla.io/neuralforecast/models.tide.html',
'https://nixtlaverse.nixtla.io/neuralforecast/models.timellm.html',
'https://nixtlaverse.nixtla.io/neuralforecast/models.timemixer.html',
'https://nixtlaverse.nixtla.io/neuralforecast/models.timesnet.html',
'https://nixtlaverse.nixtla.io/neuralforecast/models.timexer.html',
'https://nixtlaverse.nixtla.io/neuralforecast/models.tsmixer.html',
'https://nixtlaverse.nixtla.io/neuralforecast/models.tsmixerx.html',
'https://nixtlaverse.nixtla.io/neuralforecast/models.vanillatransformer.html',
'https://nixtlaverse.nixtla.io/neuralforecast/models.xlstm',
'https://nixtlaverse.nixtla.io/neuralforecast/models.html',
'https://nixtlaverse.nixtla.io/neuralforecast/losses.pytorch.html',
'https://nixtlaverse.nixtla.io/neuralforecast/losses.numpy.html',
'https://nixtlaverse.nixtla.io/neuralforecast/common.base_auto.html',
'https://nixtlaverse.nixtla.io/neuralforecast/common.scalers.html',
'https://nixtlaverse.nixtla.io/neuralforecast/common.modules.html',
'https://nixtlaverse.nixtla.io/neuralforecast/tsdataset.html',
'https://nixtlaverse.nixtla.io/neuralforecast/utils.html',
'https://discuss.ray.io/t/custom-path-for-ray-results-without-using-tune/13223',
'https://discuss.ray.io/',
'https://mlflow.org/docs/latest/ml/tracking/',
'https://mlflow.org/docs/latest/ml/tracking/tracking-api/',
'https://mlflow.org/docs/latest/ml/tracking/#mlflow-tracking-apis',
'https://mlflow.org/docs/latest/self-hosting/architecture/backend-store/',
'https://mlflow.org/docs/latest/ml/tracking/#backend-store',
'https://mlflow.org/docs/latest/self-hosting/architecture/artifact-store/',
'https://mlflow.org/docs/latest/ml/tracking/#artifact-stores',
'https://mlflow.org/docs/latest/self-hosting/architecture/tracking-server/',
'https://mlflow.org/docs/latest/ml/tracking/#tracking_server',
'https://mlflow.org/docs/latest/ml/tracking/quickstart/',
'https://mlflow.org/docs/latest/ml/tracking/tutorials/local-database/',
'https://mlflow.org/docs/latest/ml/tracking/tutorials/remote-server/',
'https://mlflow.org/docs/latest/ml/tracking/#how-can-i-organize-many-mlflow-runs-neatly',
'https://mlflow.org/docs/latest/ml/tracking/#can-i-directly-access-remote-storage-without-running-the-tracking-server',
'https://mlflow.org/docs/latest/ml/model-registry/',
'https://mlflow.org/docs/latest/ml/',
'https://docs.databricks.com/aws/en/mlflow/',
'https://mlflow.org/docs/latest/genai/',
'https://docs.databricks.com/aws/en/mlflow3/genai/',
'https://mlflow.org/docs/latest/api_reference/index.html',
'https://mlflow.org/docs/latest/self-hosting/',
'https://mlflow.org/docs/latest/community/',
'https://github.com/mlflow/mlflow',
'https://lightning.ai/docs/pytorch/stable/extensions/logging.html',
'https://github.com/Lightning-AI/pytorch-lightning#looking-for-gpus',
'https://lightning.ai/?utm_source=ptl_readme&utm_medium=referral&utm_campaign=ptl_readme',
'https://lightning.ai/pricing?utm_source=ptl_readme&utm_medium=referral&utm_campaign=ptl_readme',
'https://lightning.ai/clusters?utm_source=ptl_readme&utm_medium=referral&utm_campaign=ptl_readme',
'https://lightning.ai/studios?utm_source=ptl_readme&utm_medium=referral&utm_campaign=ptl_readme',
'https://lightning.ai/notebooks?utm_source=ptl_readme&utm_medium=referral&utm_campaign=ptl_readme',
'https://lightning.ai/deploy?utm_source=ptl_readme&utm_medium=referral&utm_campaign=ptl_readme',
'https://github.com/Lightning-AI/pytorch-lightning#why-pytorch-lightning',
'https://github.com/Lightning-AI/pytorch-lightning#lightning-fabric-expert-contro',
"https://docs.ray.io/en/latest/tune/api_docs/suggestion.html",
"https://optuna.readthedocs.io/en/stable/reference/samplers/index.html",
"https://docs.ray.io/en/latest/tune/tutorials/tune-metrics.html",
"https://optuna.readthedocs.io/en/stable/tutorial/20_recipes/007_optuna_callback.html",
"https://docs.ray.io/en/latest/tune/index.html",
"https://docs.ray.io/en/latest/tune/key-concepts.html",
"https://docs.ray.io/en/latest/tune/getting-started.html",
"https://docs.ray.io/en/latest/tune/tutorials/tune-lifecycle.html",
"https://docs.ray.io/en/latest/tune/api/search_space.html",
"https://docs.ray.io/en/latest/tune/tutorials/tune-search-spaces.html",
"https://docs.ray.io/en/latest/tune/api/suggestion.html",
"https://docs.ray.io/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.html",
"https://docs.ray.io/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.save_to_dir.html",
"https://docs.ray.io/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.restore_from_dir.html",
"https://docs.ray.io/en/latest/tune/api/schedulers.html",
"https://docs.ray.io/en/latest/tune/api/doc/ray.tune.schedulers.ASHAScheduler.html",
"https://docs.ray.io/en/latest/tune/api/doc/ray.tune.schedulers.ResourceChangingScheduler.html",
"https://docs.ray.io/en/latest/tune/api/callbacks.html",
"https://docs.ray.io/en/latest/tune/api/doc/ray.tune.Callback.html",
"https://docs.ray.io/en/latest/tune/tutorials/tune-resources.html",
"https://docs.ray.io/en/latest/tune/tutorials/tune-storage.html",
"https://docs.ray.io/en/latest/tune/api/api.html",
"https://docs.ray.io/en/latest/tune/examples/optuna_example.html",
"https://docs.pytorch.org/tutorials/beginner/hyperparameter_tuning_tutorial.html",
"https://docs.wandb.ai/models/integrations/ray-tune",
"https://docs.ultralytics.com/integrations/ray-tune/",
"https://docs.ultralytics.com/reference/utils/callbacks/raytune/",
"https://www.kaggle.com/code/moritzm00/xgboost-hyperparameter-tuning-with-ray-tune",
"https://arxiv.org/abs/1807.05118",
"https://optuna.org/",
"https://optuna.readthedocs.io/en/stable/reference/generated/optuna.study.Study.html",
"https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html",
"https://optuna.readthedocs.io/en/stable/reference/generated/optuna.study.create_study.html",
"https://optuna.readthedocs.io/en/stable/reference/generated/optuna.study.load_study.html",
"https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.BaseSampler.html",
"https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.TPESampler.html",
"https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.CmaEsSampler.html",
"https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.QMCSampler.html",
"https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.RandomSampler.html",
"https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.GPSampler.html",
"https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.NSGAIISampler.html",
"https://optuna.readthedocs.io/en/stable/reference/pruners.html",
"https://optuna.readthedocs.io/en/stable/reference/generated/optuna.pruners.MedianPruner.html",
"https://optuna.readthedocs.io/en/stable/reference/generated/optuna.pruners.SuccessiveHalvingPruner.html",
"https://optuna.readthedocs.io/en/stable/reference/generated/optuna.pruners.HyperbandPruner.html",
"https://optuna.readthedocs.io/en/stable/reference/generated/optuna.pruners.ThresholdPruner.html",
"https://optuna.readthedocs.io/en/stable/reference/generated/optuna.pruners.PatientPruner.html",
"https://optuna.readthedocs.io/en/stable/tutorial/20_recipes/006_user_defined_pruner.html",
"https://optuna.readthedocs.io/en/stable/tutorial/20_recipes/009_ask_and_tell.html",
"https://optuna.readthedocs.io/en/stable/reference/generated/optuna.TrialPruned.html",
"https://optuna.readthedocs.io/en/stable/reference/generated/optuna.storages.RetryFailedTrialCallback.html",
"https://optuna.readthedocs.io/en/stable/reference/generated/optuna.copy_study.html"
]
len(URLS)

157

In [None]:

SELECTORS = [".bd-article", "article", "main[role='main']", "main", "#main", "#content"]

# _log("=== ① 全URLを1ファイルにまとめて出力 ===")
# out_all = extract_many_and_save(
#     URLS,
#     selectors=SELECTORS,
#     fmt="md",                         # "html" / "txt" / "json" も可
#     output="ALL_bundle.md",
#     title="",
#     announce_duplicates=True,
#     sleep_seconds=1.0,
#     show_progress=True,
# )
# _log(f"出力ファイル: {out_all}")

_log("=== ② グループごとに別ファイルへ ===")
alias = {
    "docs.ray.io": "Ray",
    "discuss.ray.io": "Ray",
    "nixtlaverse.nixtla.io": "NeuralForecast",
    "nixtla.github.io": "NeuralForecast",
    "mlflow.org": "MLflow",
    "docs.databricks.com": "MLflow-Databricks",
    "lightning.ai": "Lightning",
    "github.com": "GitHub",
    "optuna.readthedocs.io": "optuna.readthedocs.io",
}
groups = group_by_domain(URLS, domain_alias=alias, drop_fragments=True)
results = extract_grouped_and_save(
    groups,
    selectors=SELECTORS,
    fmt="md",
    output_dir="bundles",
    filename_template="{index:02d}_{name}.md",
    title_template="{name}",
    announce_duplicates=True,
    drop_fragments=True,
    sleep_seconds=5.0,
    show_progress=True,
)
_log(f"出力ファイル群: {results}")


[11:12:05] === ② グループごとに別ファイルへ ===
[11:12:05] [dedupe] （全体）重複URLを削除: 7件 -> https://nixtlaverse.nixtla.io/neuralforecast/models.html, https://docs.ray.io/en/latest/tune/api_docs/suggestion.html, https://docs.ray.io/en/latest/tune/tutorials/tune-metrics.html, https://optuna.readthedocs.io/en/stable/reference/samplers/index.html, https://optuna.readthedocs.io/en/stable/tutorial/20_recipes/007_optuna_callback.html, https://mlflow.org/docs/latest/ml/tracking/, https://github.com/Lightning-AI/pytorch-lightning
[11:12:05] グループ処理を開始（合計URL: 104, 出力先: bundles）


全体進捗（グループ）:   0%|                             | 0/104 [00:00<?, ?it/s]

[11:12:05] [NeuralForecast] 重複URLを削除: 1件 -> https://nixtlaverse.nixtla.io/neuralforecast/models.html
[11:12:05] --- グループ開始: NeuralForecast（25件） -> 01_NeuralForecast.md
[11:12:05] 合計 25 件を処理します -> 出力先: bundles\01_NeuralForecast.md
[11:12:05] [1/2] 取得開始: https://nixtlaverse.nixtla.io/neuralforecast/models.html
[11:12:06] 取得成功: https://nixtlaverse.nixtla.io/neuralforecast/models.html（タイトル: AutoModels - Nixtla）
[11:12:11] [1/2] 取得開始: https://nixtla.github.io/neuralforecast/losses.pytorch.html
[11:12:11] [cloudscraper固定] meta refresh を検出: https://nixtla.github.io/neuralforecast/losses.pytorch.html -> https://nixtlaverse.nixtla.io/neuralforecast/losses.pytorch.html
[11:12:12] 取得成功: https://nixtla.github.io/neuralforecast/losses.pytorch.html（タイトル: PyTorch Losses - Nixtla）
[11:12:17] [1/2] 取得開始: https://nixtlaverse.nixtla.io/neuralforecast/core.html
[11:12:18] 取得成功: https://nixtlaverse.nixtla.io/neuralforecast/core.html（タイトル: Core - Nixtla）
[11:12:23] [1/2] 取得開始: https://nixtlaverse.nixtla.io/

全体進捗（グループ）:  24%|██▉         | 25/104 [02:12<06:59,  5.31s/it, 25/104]

[11:14:18] --- グループ完了: NeuralForecast（25件処理）
[11:14:18] [Ray] 重複URLを削除: 2件 -> https://docs.ray.io/en/latest/tune/api_docs/suggestion.html, https://docs.ray.io/en/latest/tune/tutorials/tune-metrics.html
[11:14:18] --- グループ開始: Ray（23件） -> 02_Ray.md
[11:14:18] 合計 23 件を処理します -> 出力先: bundles\02_Ray.md
[11:14:18] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/api_docs/suggestion.html
[11:14:18] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api_docs/suggestion.html
[11:14:18] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/api_docs/suggestion.html
[11:14:19] 取得成功: https://docs.ray.io/en/latest/tune/api_docs/suggestion.html（タイトル: https://docs.ray.io/en/latest/tune/api_docs/suggestion.html）
[11:14:24] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/tutorials/tune-metrics.html
[11:14:24] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/tutorials/tune-metrics.html
[11:14:24] [cloudscraper固定] Jina Reader フォールバック: h

In [6]:
# === Ray / Optuna だけ実行して別ファイルに書き出す ===
from urllib.parse import urlparse
from pathlib import Path

# 対象ドメイン定義
RAY_DOMAINS = {"docs.ray.io", "discuss.ray.io"}
OPTUNA_DOMAINS = {"optuna.readthedocs.io", "optuna.org"}

def _filter_by_domain(urls, domains):
    """指定ドメインだけ抽出し、順序を保ったまま一意化"""
    seen, out = set(), []
    for u in urls:
        if urlparse(u).netloc in domains and u not in seen:
            out.append(u)
            seen.add(u)
    return out

# フィルタリング
ray_urls    = _filter_by_domain(URLS, RAY_DOMAINS)
optuna_urls = _filter_by_domain(URLS, OPTUNA_DOMAINS)

# 対象が0件だと空ファイルになるので一応ログ
_log(f"Ray 対象: {len(ray_urls)} 件")
_log(f"Optuna 対象: {len(optuna_urls)} 件")

# 主要コンテンツの候補セレクタ（既存と同じ）
SELECTORS = [".bd-article", "article", "main[role='main']", "main", "#main", "#content"]

# 出力ディレクトリ
Path("bundles").mkdir(exist_ok=True)

# Rayのみ
out_ray = extract_many_and_save(
    ray_urls,
    selectors=SELECTORS,
    fmt="md",
    output="bundles/02_Ray.md",                  # 既存ファイルがあれば上書き
    title="Ray ドキュメントまとめ（Rayのみ実行）",
    announce_duplicates=True,
    sleep_seconds=1.5,                            # 少し待つと安定しやすい
    show_progress=True,
)
print("Ray 出力:", out_ray)

# Optunaのみ
out_optuna = extract_many_and_save(
    optuna_urls,
    selectors=SELECTORS,
    fmt="md",
    output="bundles/03_Optuna.md",               # 任意の名前に変更可
    title="Optuna ドキュメントまとめ（Optunaのみ実行）",
    announce_duplicates=True,
    sleep_seconds=1.5,
    show_progress=True,
)
print("Optuna 出力:", out_optuna)


[11:37:28] Ray 対象: 23 件
[11:37:28] Optuna 対象: 25 件
[11:37:28] 合計 23 件を処理します -> 出力先: bundles/02_Ray.md


全体進捗:   0%|                                          | 0/23 [00:00<?, ?it/s]

[11:37:28] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/api_docs/suggestion.html
[11:37:28] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api_docs/suggestion.html
[11:37:28] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/api_docs/suggestion.html
[11:37:28] 取得成功: https://docs.ray.io/en/latest/tune/api_docs/suggestion.html（タイトル: https://docs.ray.io/en/latest/tune/api_docs/suggestion.html）


全体進捗:   4%|█▏                          | 1/23 [00:00<00:18,  1.17it/s, 1/23]

[11:37:30] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/tutorials/tune-metrics.html
[11:37:30] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/tutorials/tune-metrics.html
[11:37:30] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/tutorials/tune-metrics.html
[11:37:30] 取得成功: https://docs.ray.io/en/latest/tune/tutorials/tune-metrics.html（タイトル: https://docs.ray.io/en/latest/tune/tutorials/tune-metrics.ht）


全体進捗:   9%|██▍                         | 2/23 [00:02<00:32,  1.55s/it, 2/23]

[11:37:32] [1/2] 取得開始: https://discuss.ray.io/t/custom-path-for-ray-results-without-using-tune/13223
[11:37:33] 取得成功: https://discuss.ray.io/t/custom-path-for-ray-results-without-using-tune/13223（タイトル: Custom path for ~/ray_results without using Tune）


全体進捗:  13%|███▋                        | 3/23 [00:05<00:38,  1.93s/it, 3/23]

[11:37:34] [1/2] 取得開始: https://discuss.ray.io/
[11:37:35] 取得成功: https://discuss.ray.io/（タイトル: Ray）


全体進捗:  17%|████▊                       | 4/23 [00:07<00:36,  1.92s/it, 4/23]

[11:37:36] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/index.html
[11:37:36] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/index.html
[11:37:36] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/index.html
[11:37:37] 取得成功: https://docs.ray.io/en/latest/tune/index.html（タイトル: https://docs.ray.io/en/latest/tune/index.html）


全体進捗:  22%|██████                      | 5/23 [00:09<00:37,  2.07s/it, 5/23]

[11:37:39] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/key-concepts.html
[11:37:39] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/key-concepts.html
[11:37:39] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/key-concepts.html
[11:37:39] 取得成功: https://docs.ray.io/en/latest/tune/key-concepts.html（タイトル: https://docs.ray.io/en/latest/tune/key-concepts.html）


全体進捗:  26%|███████▎                    | 6/23 [00:11<00:35,  2.11s/it, 6/23]

[11:37:41] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/getting-started.html
[11:37:41] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/getting-started.html
[11:37:41] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/getting-started.html
[11:37:41] 取得成功: https://docs.ray.io/en/latest/tune/getting-started.html（タイトル: https://docs.ray.io/en/latest/tune/getting-started.html）


全体進捗:  30%|████████▌                   | 7/23 [00:13<00:33,  2.11s/it, 7/23]

[11:37:43] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/tutorials/tune-lifecycle.html
[11:37:43] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/tutorials/tune-lifecycle.html
[11:37:43] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/tutorials/tune-lifecycle.html
[11:37:44] 取得成功: https://docs.ray.io/en/latest/tune/tutorials/tune-lifecycle.html（タイトル: https://docs.ray.io/en/latest/tune/tutorials/tune-lifecycle.）


全体進捗:  35%|█████████▋                  | 8/23 [00:16<00:32,  2.17s/it, 8/23]

[11:37:45] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/api/search_space.html
[11:37:45] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/search_space.html
[11:37:45] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/api/search_space.html
[11:37:46] 取得成功: https://docs.ray.io/en/latest/tune/api/search_space.html（タイトル: https://docs.ray.io/en/latest/tune/api/search_space.html）


全体進捗:  39%|██████████▉                 | 9/23 [00:18<00:30,  2.17s/it, 9/23]

[11:37:47] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/tutorials/tune-search-spaces.html
[11:37:47] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/tutorials/tune-search-spaces.html
[11:37:47] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/tutorials/tune-search-spaces.html
[11:37:48] 取得成功: https://docs.ray.io/en/latest/tune/tutorials/tune-search-spaces.html（タイトル: https://docs.ray.io/en/latest/tune/tutorials/tune-search-spa）


全体進捗:  43%|███████████▎              | 10/23 [00:20<00:27,  2.14s/it, 10/23]

[11:37:49] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/api/suggestion.html
[11:37:49] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/suggestion.html
[11:37:49] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/api/suggestion.html
[11:37:59] 取得成功: https://docs.ray.io/en/latest/tune/api/suggestion.html（タイトル: https://docs.ray.io/en/latest/tune/api/suggestion.html）


全体進捗:  48%|████████████▍             | 11/23 [00:31<00:58,  4.85s/it, 11/23]

[11:38:00] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.html
[11:38:00] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.html
[11:38:00] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.html
[11:38:01] 取得成功: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.html（タイトル: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.search.o）


全体進捗:  52%|█████████████▌            | 12/23 [00:33<00:44,  4.03s/it, 12/23]

[11:38:03] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.save_to_dir.html
[11:38:03] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.save_to_dir.html
[11:38:03] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.save_to_dir.html
[11:38:03] 取得成功: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.save_to_dir.html（タイトル: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.search.o）


全体進捗:  57%|██████████████▋           | 13/23 [00:35<00:34,  3.48s/it, 13/23]

[11:38:05] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.restore_from_dir.html
[11:38:05] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.restore_from_dir.html
[11:38:05] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.restore_from_dir.html
[11:38:05] 取得成功: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.restore_from_dir.html（タイトル: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.search.o）


全体進捗:  61%|███████████████▊          | 14/23 [00:37<00:27,  3.05s/it, 14/23]

[11:38:07] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/api/schedulers.html
[11:38:07] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/schedulers.html
[11:38:07] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/api/schedulers.html
[11:38:11] 取得成功: https://docs.ray.io/en/latest/tune/api/schedulers.html（タイトル: https://docs.ray.io/en/latest/tune/api/schedulers.html）


全体進捗:  65%|████████████████▉         | 15/23 [00:43<00:30,  3.85s/it, 15/23]

[11:38:13] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.schedulers.ASHAScheduler.html
[11:38:13] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.schedulers.ASHAScheduler.html
[11:38:13] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/api/doc/ray.tune.schedulers.ASHAScheduler.html
[11:38:33] 取得失敗（試行1/2）: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.schedulers.ASHAScheduler.html -> ReadTimeout(ReadTimeoutError("HTTPSConnectionPool(host='r.jina.ai', port=443): Read timed out. (read timeout=20)"))
[11:38:34] [2/2] 取得開始: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.schedulers.ASHAScheduler.html
[11:38:34] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.schedulers.ASHAScheduler.html
[11:38:34] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/api/doc/ray.tune.schedulers.ASHAScheduler.html
[11:38:54] 取得失敗

全体進捗:  70%|██████████████████        | 16/23 [01:27<01:50, 15.83s/it, 16/23]

[11:38:56] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.schedulers.ResourceChangingScheduler.html
[11:38:56] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.schedulers.ResourceChangingScheduler.html
[11:38:56] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/api/doc/ray.tune.schedulers.ResourceChangingScheduler.html
[11:39:16] 取得失敗（試行1/2）: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.schedulers.ResourceChangingScheduler.html -> ReadTimeout(ReadTimeoutError("HTTPSConnectionPool(host='r.jina.ai', port=443): Read timed out. (read timeout=20)"))
[11:39:17] [2/2] 取得開始: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.schedulers.ResourceChangingScheduler.html
[11:39:17] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.schedulers.ResourceChangingScheduler.html
[11:39:17] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/la

全体進捗:  74%|███████████████████▏      | 17/23 [02:10<02:25, 24.21s/it, 17/23]

[11:39:40] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/api/callbacks.html
[11:39:40] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/callbacks.html
[11:39:40] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/api/callbacks.html
[11:40:00] 取得失敗（試行1/2）: https://docs.ray.io/en/latest/tune/api/callbacks.html -> ReadTimeout(ReadTimeoutError("HTTPSConnectionPool(host='r.jina.ai', port=443): Read timed out. (read timeout=20)"))
[11:40:01] [2/2] 取得開始: https://docs.ray.io/en/latest/tune/api/callbacks.html
[11:40:01] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/callbacks.html
[11:40:01] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/api/callbacks.html
[11:40:21] 取得失敗（試行2/2）: https://docs.ray.io/en/latest/tune/api/callbacks.html -> ReadTimeout(ReadTimeoutError("HTTPSConnectionPool(host='r.jina.ai', port=443): Read timed out. (read timeout=20)"))


全体進捗:  78%|████████████████████▎     | 18/23 [02:54<02:30, 30.09s/it, 18/23]

[11:40:24] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.Callback.html
[11:40:24] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.Callback.html
[11:40:24] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/api/doc/ray.tune.Callback.html
[11:40:44] 取得失敗（試行1/2）: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.Callback.html -> ReadTimeout(ReadTimeoutError("HTTPSConnectionPool(host='r.jina.ai', port=443): Read timed out. (read timeout=20)"))
[11:40:45] [2/2] 取得開始: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.Callback.html
[11:40:45] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.Callback.html
[11:40:45] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/api/doc/ray.tune.Callback.html
[11:41:05] 取得失敗（試行2/2）: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.Callback.html -> ReadTimeout(ReadTimeoutError("HTTP

全体進捗:  83%|█████████████████████▍    | 19/23 [03:38<02:16, 34.19s/it, 19/23]

[11:41:07] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/tutorials/tune-resources.html
[11:41:07] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/tutorials/tune-resources.html
[11:41:07] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/tutorials/tune-resources.html
[11:41:28] 取得失敗（試行1/2）: https://docs.ray.io/en/latest/tune/tutorials/tune-resources.html -> ReadTimeout(ReadTimeoutError("HTTPSConnectionPool(host='r.jina.ai', port=443): Read timed out. (read timeout=20)"))
[11:41:29] [2/2] 取得開始: https://docs.ray.io/en/latest/tune/tutorials/tune-resources.html
[11:41:29] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/tutorials/tune-resources.html
[11:41:29] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/tutorials/tune-resources.html
[11:41:49] 取得失敗（試行2/2）: https://docs.ray.io/en/latest/tune/tutorials/tune-resources.html -> ReadTimeout(ReadTimeoutError("HTTPSConnect

全体進捗:  87%|██████████████████████▌   | 20/23 [04:22<01:51, 37.06s/it, 20/23]

[11:41:51] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/tutorials/tune-storage.html
[11:41:51] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/tutorials/tune-storage.html
[11:41:51] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/tutorials/tune-storage.html
[11:41:55] 取得成功: https://docs.ray.io/en/latest/tune/tutorials/tune-storage.html（タイトル: https://docs.ray.io/en/latest/tune/tutorials/tune-storage.ht）


全体進捗:  91%|███████████████████████▋  | 21/23 [04:27<00:55, 27.64s/it, 21/23]

[11:41:57] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/api/api.html
[11:41:57] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/api.html
[11:41:57] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/api/api.html
[11:42:17] 取得失敗（試行1/2）: https://docs.ray.io/en/latest/tune/api/api.html -> ReadTimeout(ReadTimeoutError("HTTPSConnectionPool(host='r.jina.ai', port=443): Read timed out. (read timeout=20)"))
[11:42:18] [2/2] 取得開始: https://docs.ray.io/en/latest/tune/api/api.html
[11:42:18] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/api.html
[11:42:18] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/api/api.html
[11:42:38] 取得失敗（試行2/2）: https://docs.ray.io/en/latest/tune/api/api.html -> ReadTimeout(ReadTimeoutError("HTTPSConnectionPool(host='r.jina.ai', port=443): Read timed out. (read timeout=20)"))


全体進捗:  96%|████████████████████████▊ | 22/23 [05:11<00:32, 32.45s/it, 22/23]

[11:42:41] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/examples/optuna_example.html
[11:42:41] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/examples/optuna_example.html
[11:42:41] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/examples/optuna_example.html
[11:43:01] 取得失敗（試行1/2）: https://docs.ray.io/en/latest/tune/examples/optuna_example.html -> ReadTimeout(ReadTimeoutError("HTTPSConnectionPool(host='r.jina.ai', port=443): Read timed out. (read timeout=20)"))
[11:43:02] [2/2] 取得開始: https://docs.ray.io/en/latest/tune/examples/optuna_example.html
[11:43:02] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/examples/optuna_example.html
[11:43:02] [cloudscraper固定] Jina Reader フォールバック: https://r.jina.ai/http://docs.ray.io/en/latest/tune/examples/optuna_example.html
[11:43:22] 取得失敗（試行2/2）: https://docs.ray.io/en/latest/tune/examples/optuna_example.html -> ReadTimeout(ReadTimeoutError("HTTPSConnectionPool(

全体進捗: 100%|██████████████████████████| 23/23 [05:55<00:00, 15.44s/it, 23/23]

[11:43:23] 書き出し開始: bundles/02_Ray.md（形式: md, 件数: 23）
[11:43:23] 書き出し完了: bundles/02_Ray.md
[11:43:23] ⚠ 取得に失敗したURL: 7件
[11:43:23]   - https://docs.ray.io/en/latest/tune/api/doc/ray.tune.schedulers.ASHAScheduler.html -> HTTPSConnectionPool(host='r.jina.ai', port=443): Read timed out. (read timeout=20)
[11:43:23]   - https://docs.ray.io/en/latest/tune/api/doc/ray.tune.schedulers.ResourceChangingScheduler.html -> HTTPSConnectionPool(host='r.jina.ai', port=443): Read timed out. (read timeout=20)
[11:43:23]   - https://docs.ray.io/en/latest/tune/api/callbacks.html -> HTTPSConnectionPool(host='r.jina.ai', port=443): Read timed out. (read timeout=20)
[11:43:23]   - https://docs.ray.io/en/latest/tune/api/doc/ray.tune.Callback.html -> HTTPSConnectionPool(host='r.jina.ai', port=443): Read timed out. (read timeout=20)
[11:43:23]   - https://docs.ray.io/en/latest/tune/tutorials/tune-resources.html -> HTTPSConnectionPool(host='r.jina.ai', port=443): Read timed out. (read timeout=20)
[11:43:23]   - h


全体進捗:   0%|                                          | 0/25 [00:00<?, ?it/s]

[11:43:23] [1/2] 取得開始: https://optuna.readthedocs.io/en/stable/reference/samplers/index.html
[11:43:24] 取得成功: https://optuna.readthedocs.io/en/stable/reference/samplers/index.html（タイトル: optuna.samplers — Optuna 4.6.0 documentation）


全体進捗:   4%|█                           | 1/25 [00:00<00:18,  1.30it/s, 1/25]

[11:43:25] [1/2] 取得開始: https://optuna.readthedocs.io/en/stable/tutorial/20_recipes/007_optuna_callback.html
[11:43:25] 取得成功: https://optuna.readthedocs.io/en/stable/tutorial/20_recipes/007_optuna_callback.html（タイトル: Callback for Study.optimize — Optuna 4.6.0 documentation）


全体進捗:   8%|██▏                         | 2/25 [00:02<00:31,  1.38s/it, 2/25]

[11:43:27] [1/2] 取得開始: https://optuna.org/
[11:43:27] 取得成功: https://optuna.org/（タイトル: Optuna - A hyperparameter optimization framework）


全体進捗:  12%|███▎                        | 3/25 [00:04<00:36,  1.64s/it, 3/25]

[11:43:29] [1/2] 取得開始: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.study.Study.html
[11:43:29] 取得成功: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.study.Study.html（タイトル: optuna.study.Study — Optuna 4.6.0 documentation）


全体進捗:  16%|████▍                       | 4/25 [00:06<00:37,  1.78s/it, 4/25]

[11:43:31] [1/2] 取得開始: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html
[11:43:31] 取得成功: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html（タイトル: optuna.trial.Trial — Optuna 4.6.0 documentation）


全体進捗:  20%|█████▌                      | 5/25 [00:08<00:36,  1.82s/it, 5/25]

[11:43:33] [1/2] 取得開始: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.study.create_study.html
[11:43:33] 取得成功: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.study.create_study.html（タイトル: optuna.study.create_study — Optuna 4.6.0 documentation）


全体進捗:  24%|██████▋                     | 6/25 [00:10<00:33,  1.79s/it, 6/25]

[11:43:34] [1/2] 取得開始: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.study.load_study.html
[11:43:35] 取得成功: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.study.load_study.html（タイトル: optuna.study.load_study — Optuna 4.6.0 documentation）


全体進捗:  28%|███████▊                    | 7/25 [00:11<00:31,  1.73s/it, 7/25]

[11:43:36] [1/2] 取得開始: https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.BaseSampler.html
[11:43:36] 取得成功: https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.BaseSampler.html（タイトル: optuna.samplers.BaseSampler — Optuna 4.6.0 documentation）


全体進捗:  32%|████████▉                   | 8/25 [00:13<00:28,  1.71s/it, 8/25]

[11:43:38] [1/2] 取得開始: https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.TPESampler.html
[11:43:38] 取得成功: https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.TPESampler.html（タイトル: optuna.samplers.TPESampler — Optuna 4.6.0 documentation）


全体進捗:  36%|██████████                  | 9/25 [00:15<00:29,  1.86s/it, 9/25]

[11:43:40] [1/2] 取得開始: https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.CmaEsSampler.html
[11:43:40] 取得成功: https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.CmaEsSampler.html（タイトル: optuna.samplers.CmaEsSampler — Optuna 4.6.0 documentation）


全体進捗:  40%|██████████▍               | 10/25 [00:17<00:27,  1.82s/it, 10/25]

[11:43:42] [1/2] 取得開始: https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.QMCSampler.html
[11:43:42] 取得成功: https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.QMCSampler.html（タイトル: optuna.samplers.QMCSampler — Optuna 4.6.0 documentation）


全体進捗:  44%|███████████▍              | 11/25 [00:19<00:24,  1.78s/it, 11/25]

[11:43:43] [1/2] 取得開始: https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.RandomSampler.html
[11:43:43] 取得成功: https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.RandomSampler.html（タイトル: optuna.samplers.RandomSampler — Optuna 4.6.0 documentation）


全体進捗:  48%|████████████▍             | 12/25 [00:20<00:22,  1.74s/it, 12/25]

[11:43:45] [1/2] 取得開始: https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.GPSampler.html
[11:43:45] 取得成功: https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.GPSampler.html（タイトル: optuna.samplers.GPSampler — Optuna 4.6.0 documentation）


全体進捗:  52%|█████████████▌            | 13/25 [00:22<00:20,  1.72s/it, 13/25]

[11:43:47] [1/2] 取得開始: https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.NSGAIISampler.html
[11:43:47] 取得成功: https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.NSGAIISampler.html（タイトル: optuna.samplers.NSGAIISampler — Optuna 4.6.0 documentation）


全体進捗:  56%|██████████████▌           | 14/25 [00:24<00:18,  1.72s/it, 14/25]

[11:43:48] [1/2] 取得開始: https://optuna.readthedocs.io/en/stable/reference/pruners.html
[11:43:48] 取得成功: https://optuna.readthedocs.io/en/stable/reference/pruners.html（タイトル: optuna.pruners — Optuna 4.6.0 documentation）


全体進捗:  60%|███████████████▌          | 15/25 [00:25<00:16,  1.69s/it, 15/25]

[11:43:50] [1/2] 取得開始: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.pruners.MedianPruner.html
[11:43:50] 取得成功: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.pruners.MedianPruner.html（タイトル: optuna.pruners.MedianPruner — Optuna 4.6.0 documentation）


全体進捗:  64%|████████████████▋         | 16/25 [00:27<00:15,  1.67s/it, 16/25]

[11:43:52] [1/2] 取得開始: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.pruners.SuccessiveHalvingPruner.html
[11:43:52] 取得成功: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.pruners.SuccessiveHalvingPruner.html（タイトル: optuna.pruners.SuccessiveHalvingPruner — Optuna 4.6.0 docume）


全体進捗:  68%|█████████████████▋        | 17/25 [00:29<00:13,  1.68s/it, 17/25]

[11:43:53] [1/2] 取得開始: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.pruners.HyperbandPruner.html
[11:43:53] 取得成功: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.pruners.HyperbandPruner.html（タイトル: optuna.pruners.HyperbandPruner — Optuna 4.6.0 documentation）


全体進捗:  72%|██████████████████▋       | 18/25 [00:30<00:11,  1.67s/it, 18/25]

[11:43:55] [1/2] 取得開始: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.pruners.ThresholdPruner.html
[11:43:55] 取得成功: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.pruners.ThresholdPruner.html（タイトル: optuna.pruners.ThresholdPruner — Optuna 4.6.0 documentation）


全体進捗:  76%|███████████████████▊      | 19/25 [00:32<00:10,  1.67s/it, 19/25]

[11:43:57] [1/2] 取得開始: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.pruners.PatientPruner.html
[11:43:57] 取得成功: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.pruners.PatientPruner.html（タイトル: optuna.pruners.PatientPruner — Optuna 4.6.0 documentation）


全体進捗:  80%|████████████████████▊     | 20/25 [00:33<00:08,  1.66s/it, 20/25]

[11:43:58] [1/2] 取得開始: https://optuna.readthedocs.io/en/stable/tutorial/20_recipes/006_user_defined_pruner.html
[11:43:58] 取得成功: https://optuna.readthedocs.io/en/stable/tutorial/20_recipes/006_user_defined_pruner.html（タイトル: User-Defined Pruner — Optuna 4.6.0 documentation）


全体進捗:  84%|█████████████████████▊    | 21/25 [00:35<00:06,  1.66s/it, 21/25]

[11:44:00] [1/2] 取得開始: https://optuna.readthedocs.io/en/stable/tutorial/20_recipes/009_ask_and_tell.html
[11:44:00] 取得成功: https://optuna.readthedocs.io/en/stable/tutorial/20_recipes/009_ask_and_tell.html（タイトル: Ask-and-Tell Interface — Optuna 4.6.0 documentation）


全体進捗:  88%|██████████████████████▉   | 22/25 [00:37<00:05,  1.69s/it, 22/25]

[11:44:02] [1/2] 取得開始: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.TrialPruned.html
[11:44:02] 取得成功: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.TrialPruned.html（タイトル: optuna.TrialPruned — Optuna 4.6.0 documentation）


全体進捗:  92%|███████████████████████▉  | 23/25 [00:39<00:03,  1.68s/it, 23/25]

[11:44:03] [1/2] 取得開始: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.storages.RetryFailedTrialCallback.html
[11:44:03] 取得成功: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.storages.RetryFailedTrialCallback.html（タイトル: optuna.storages.RetryFailedTrialCallback — Optuna 4.6.0 docu）


全体進捗:  96%|████████████████████████▉ | 24/25 [00:40<00:01,  1.67s/it, 24/25]

[11:44:05] [1/2] 取得開始: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.copy_study.html
[11:44:05] 取得成功: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.copy_study.html（タイトル: optuna.copy_study — Optuna 4.6.0 documentation）


全体進捗: 100%|██████████████████████████| 25/25 [00:42<00:00,  1.69s/it, 25/25]

[11:44:05] 書き出し開始: bundles/03_Optuna.md（形式: md, 件数: 25）





[11:44:06] 書き出し完了: bundles/03_Optuna.md
[11:44:06] すべてのURLを正常に取得しました。
Optuna 出力: bundles\03_Optuna.md


In [7]:
# --- 追加: Ray専用 代替候補URLの生成（_sources / stable / GitHub raw） ---
from urllib.parse import urlparse

def _swap_latest_stable(url: str) -> str:
    return url.replace("/en/latest/", "/en/stable/")

def _ray_sources_candidate(url: str) -> str | None:
    """docs.ray.io の .html を _sources の .rst.txt に変換（latest）"""
    p = urlparse(url)
    if p.netloc != "docs.ray.io" or not p.path.endswith(".html"):
        return None
    # /en/latest/xxx.html -> /en/latest/_sources/xxx.rst.txt
    path_no_html = p.path[:-5]  # drop .html
    return f"https://{p.netloc}/en/latest/_sources{path_no_html}.rst.txt"

def _ray_sources_candidate_stable(url: str) -> str | None:
    p = urlparse(_swap_latest_stable(url))
    if p.netloc != "docs.ray.io" or not p.path.endswith(".html"):
        return None
    path_no_html = p.path[:-5]
    return f"https://{p.netloc}/en/stable/_sources{path_no_html}.rst.txt"

def _ray_github_candidates(url: str) -> list[str]:
    """
    Ray公式リポのRst原稿を推測（最も当たりやすいパターンのみ）
    例: https://docs.ray.io/en/latest/tune/api/schedulers.html
        -> https://raw.githubusercontent.com/ray-project/ray/master/doc/source/tune/api/schedulers.rst
    例: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.schedulers.ASHAScheduler.html
        -> .../doc/source/tune/api/doc/ray.tune.schedulers.ASHAScheduler.rst
    """
    cands = []
    p = urlparse(url)
    if p.netloc != "docs.ray.io" or not p.path.endswith(".html"):
        return cands
    path_no_root = p.path.split("/en/latest/", 1)[-1].rstrip("/")
    base = "https://raw.githubusercontent.com/ray-project/ray/master/doc/source/"
    # rst / md の両方を一応試す
    if path_no_root.endswith(".html"):
        stem = path_no_root[:-5]  # drop .html
        cands.append(base + stem + ".rst")
        cands.append(base + stem + ".md")
    return cands

def _wrap_as_pre(text: str) -> str:
    return "<article><pre>" + _escape_html(text) + "</pre></article>"


In [8]:
# --- 置換: フェッチ関数（フォールバック強化 & どの経路かログ表示） ---
def _fetch_html_with_fallbacks(url: str, timeout: int = 20) -> str:
    """
    順序:
      1) cloudscraper (latest)
      2) Sphinx _sources (latest -> stable)
      3) cloudscraper (stable)
      4) Jina Reader (latest -> stable) [timeout 60s]
      5) GitHub raw (doc/source/... .rst / .md)
    """
    def _get(u: str, t: int = timeout, referer: str | None = None) -> requests.Response:
        hdrs = {"Referer": referer} if referer else {}
        r = _SCRAPER.get(u, timeout=t, allow_redirects=True, headers=hdrs)
        r.raise_for_status()
        return r

    def _jina_reader(u: str, t: int = 60) -> str:
        base = u.split("#", 1)[0]
        jurl = f"https://r.jina.ai/http://{base.replace('https://','').replace('http://','')}"
        _log(f"[cloudscraper固定] Jina Reader フォールバック: {jurl}")
        rj = _SCRAPER.get(jurl, timeout=t)
        rj.raise_for_status()
        return rj.text

    # 1) cloudscraper でそのまま
    try:
        r = _get(url)
        _log(f"[fetch] success[cloudscraper/latest]: {url}")
        return r.text
    except requests.HTTPError as e:
        code = getattr(e.response, "status_code", None)
        _log(f"[cloudscraper固定] HTTPエラー {code} を検出: {url}")
    except requests.RequestException as e:
        _log(f"[cloudscraper固定] ネットワーク系: {url} -> {e!r}")

    # 2) _sources（latest → stable）
    for conv in (_ray_sources_candidate, _ray_sources_candidate_stable):
        s = conv(url)
        if s:
            try:
                _log(f"[cloudscraper固定] Sphinx _sources を試行: {s}")
                r = _get(s, t=30, referer=url)
                _log(f"[fetch] success[_sources]: {s}")
                return _wrap_as_pre(r.text)
            except Exception as e:
                _log(f"[cloudscraper固定] _sources 失敗: {s} -> {e!r}")

    # 3) stable に差し替えて再試行
    url_stable = _swap_latest_stable(url)
    if url_stable != url:
        try:
            r = _get(url_stable)
            _log(f"[fetch] success[cloudscraper/stable]: {url_stable}")
            return r.text
        except Exception as e:
            _log(f"[cloudscraper固定] stable 失敗: {url_stable} -> {e!r}")

    # 4) Jina（latest → stable）: 60秒
    for u in (url, url_stable):
        try:
            html = _jina_reader(u, t=60)
            _log(f"[fetch] success[jina]: {u}")
            return html
        except Exception as e:
            _log(f"[cloudscraper固定] Jina 失敗: {u} -> {e!r}")

    # 5) GitHub raw の原稿を最後に試す
    for cand in _ray_github_candidates(url):
        try:
            _log(f"[cloudscraper固定] GitHub raw を試行: {cand}")
            r = _get(cand, t=30)
            _log(f"[fetch] success[github_raw]: {cand}")
            return _wrap_as_pre(r.text)
        except Exception as e:
            _log(f"[cloudscraper固定] GitHub raw 失敗: {cand} -> {e!r}")

    raise requests.RequestException(f"all fallbacks failed for {url}")


In [10]:
SELECTORS = [".bd-article", "article", "main[role='main']", "main", "#main", "#content"]
from urllib.parse import urlparse
def _filter_by_domain(urls, domains):
    seen, out = set(), []
    for u in urls:
        if urlparse(u).netloc in domains and u not in seen:
            out.append(u); seen.add(u)
    return out

RAY_DOMAINS = {"docs.ray.io", "discuss.ray.io"}
ray_urls = _filter_by_domain(URLS, RAY_DOMAINS)

out_ray = extract_many_and_save(
    ray_urls,
    selectors=SELECTORS,
    fmt="md",
    output="bundles/02_Ray.md",
    title="Ray ドキュメントまとめ（Rayのみ再実行 with 強化フォールバック）",
    announce_duplicates=True,
    sleep_seconds=1.5,
    show_progress=True,
)
print("Ray 出力:", out_ray)


[11:49:15] 合計 23 件を処理します -> 出力先: bundles/02_Ray.md


全体進捗:   0%|                                          | 0/23 [00:00<?, ?it/s]

[11:49:15] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/api_docs/suggestion.html
[11:49:15] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api_docs/suggestion.html
[11:49:15] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/latest/_sources/en/latest/tune/api_docs/suggestion.rst.txt
[11:49:15] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/latest/_sources/en/latest/tune/api_docs/suggestion.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/latest/_sources/en/latest/tune/api_docs/suggestion.rst.txt')
[11:49:15] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/stable/_sources/en/stable/tune/api_docs/suggestion.rst.txt
[11:49:15] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/stable/_sources/en/stable/tune/api_docs/suggestion.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/stable/_sources/en/stable/tune/api_docs/suggestion.rst.txt')
[11:49:16] [cloudscraper固定] stable

全体進捗:   4%|█▏                          | 1/23 [00:00<00:16,  1.34it/s, 1/23]

[11:49:18] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/tutorials/tune-metrics.html
[11:49:18] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/tutorials/tune-metrics.html
[11:49:18] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/latest/_sources/en/latest/tune/tutorials/tune-metrics.rst.txt
[11:49:18] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/latest/_sources/en/latest/tune/tutorials/tune-metrics.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/latest/_sources/en/latest/tune/tutorials/tune-metrics.rst.txt')
[11:49:18] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/stable/_sources/en/stable/tune/tutorials/tune-metrics.rst.txt
[11:49:18] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/stable/_sources/en/stable/tune/tutorials/tune-metrics.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/stable/_sources/en/stable/tune/tutorials/tune-metrics.rst.txt')
[11:49:18]

全体進捗:   9%|██▍                         | 2/23 [00:02<00:33,  1.59s/it, 2/23]

[11:49:20] [1/2] 取得開始: https://discuss.ray.io/t/custom-path-for-ray-results-without-using-tune/13223
[11:49:20] [fetch] success[cloudscraper/latest]: https://discuss.ray.io/t/custom-path-for-ray-results-without-using-tune/13223
[11:49:20] 取得成功: https://discuss.ray.io/t/custom-path-for-ray-results-without-using-tune/13223（タイトル: Custom path for ~/ray_results without using Tune）


全体進捗:  13%|███▋                        | 3/23 [00:05<00:37,  1.86s/it, 3/23]

[11:49:22] [1/2] 取得開始: https://discuss.ray.io/
[11:49:22] [fetch] success[cloudscraper/latest]: https://discuss.ray.io/
[11:49:22] 取得成功: https://discuss.ray.io/（タイトル: Ray）


全体進捗:  17%|████▊                       | 4/23 [00:06<00:35,  1.87s/it, 4/23]

[11:49:24] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/index.html
[11:49:24] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/index.html
[11:49:24] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/latest/_sources/en/latest/tune/index.rst.txt
[11:49:24] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/latest/_sources/en/latest/tune/index.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/latest/_sources/en/latest/tune/index.rst.txt')
[11:49:24] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/stable/_sources/en/stable/tune/index.rst.txt
[11:49:24] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/stable/_sources/en/stable/tune/index.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/stable/_sources/en/stable/tune/index.rst.txt')
[11:49:24] [cloudscraper固定] stable 失敗: https://docs.ray.io/en/stable/tune/index.html -> HTTPError('403 Client Error: Forbidden for url: https://do

全体進捗:  22%|██████                      | 5/23 [00:09<00:35,  1.99s/it, 5/23]

[11:49:26] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/key-concepts.html
[11:49:26] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/key-concepts.html
[11:49:26] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/latest/_sources/en/latest/tune/key-concepts.rst.txt
[11:49:26] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/latest/_sources/en/latest/tune/key-concepts.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/latest/_sources/en/latest/tune/key-concepts.rst.txt')
[11:49:26] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/stable/_sources/en/stable/tune/key-concepts.rst.txt
[11:49:26] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/stable/_sources/en/stable/tune/key-concepts.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/stable/_sources/en/stable/tune/key-concepts.rst.txt')
[11:49:26] [cloudscraper固定] stable 失敗: https://docs.ray.io/en/stable/tune/key-concepts.htm

全体進捗:  26%|███████▎                    | 6/23 [00:11<00:36,  2.16s/it, 6/23]

[11:49:29] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/getting-started.html
[11:49:29] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/getting-started.html
[11:49:29] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/latest/_sources/en/latest/tune/getting-started.rst.txt
[11:49:29] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/latest/_sources/en/latest/tune/getting-started.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/latest/_sources/en/latest/tune/getting-started.rst.txt')
[11:49:29] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/stable/_sources/en/stable/tune/getting-started.rst.txt
[11:49:29] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/stable/_sources/en/stable/tune/getting-started.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/stable/_sources/en/stable/tune/getting-started.rst.txt')
[11:49:29] [cloudscraper固定] stable 失敗: https://docs.ray.io/en/stab

全体進捗:  30%|████████▌                   | 7/23 [00:13<00:34,  2.16s/it, 7/23]

[11:49:31] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/tutorials/tune-lifecycle.html
[11:49:31] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/tutorials/tune-lifecycle.html
[11:49:31] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/latest/_sources/en/latest/tune/tutorials/tune-lifecycle.rst.txt
[11:49:31] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/latest/_sources/en/latest/tune/tutorials/tune-lifecycle.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/latest/_sources/en/latest/tune/tutorials/tune-lifecycle.rst.txt')
[11:49:31] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/stable/_sources/en/stable/tune/tutorials/tune-lifecycle.rst.txt
[11:49:31] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/stable/_sources/en/stable/tune/tutorials/tune-lifecycle.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/stable/_sources/en/stable/tune/tutorials/tune-lifecycle.rst.

全体進捗:  35%|█████████▋                  | 8/23 [00:16<00:33,  2.26s/it, 8/23]

[11:49:33] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/api/search_space.html
[11:49:33] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/search_space.html
[11:49:33] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/search_space.rst.txt
[11:49:33] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/search_space.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/search_space.rst.txt')
[11:49:33] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/stable/_sources/en/stable/tune/api/search_space.rst.txt
[11:49:33] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/stable/_sources/en/stable/tune/api/search_space.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/stable/_sources/en/stable/tune/api/search_space.rst.txt')
[11:49:33] [cloudscraper固定] stable 失敗: https://docs.ray.io

全体進捗:  39%|██████████▉                 | 9/23 [00:18<00:30,  2.21s/it, 9/23]

[11:49:35] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/tutorials/tune-search-spaces.html
[11:49:35] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/tutorials/tune-search-spaces.html
[11:49:35] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/latest/_sources/en/latest/tune/tutorials/tune-search-spaces.rst.txt
[11:49:35] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/latest/_sources/en/latest/tune/tutorials/tune-search-spaces.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/latest/_sources/en/latest/tune/tutorials/tune-search-spaces.rst.txt')
[11:49:35] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/stable/_sources/en/stable/tune/tutorials/tune-search-spaces.rst.txt
[11:49:35] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/stable/_sources/en/stable/tune/tutorials/tune-search-spaces.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/stable/_sources/en/stable/tune/t

全体進捗:  43%|███████████▎              | 10/23 [00:20<00:28,  2.19s/it, 10/23]

[11:49:37] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/api/suggestion.html
[11:49:37] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/suggestion.html
[11:49:37] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/suggestion.rst.txt
[11:49:37] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/suggestion.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/suggestion.rst.txt')
[11:49:37] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/stable/_sources/en/stable/tune/api/suggestion.rst.txt
[11:49:38] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/stable/_sources/en/stable/tune/api/suggestion.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/stable/_sources/en/stable/tune/api/suggestion.rst.txt')
[11:49:38] [cloudscraper固定] stable 失敗: https://docs.ray.io/en/stable/tune/

全体進捗:  48%|████████████▍             | 11/23 [00:22<00:26,  2.21s/it, 11/23]

[11:49:40] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.html
[11:49:40] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.html
[11:49:40] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.rst.txt
[11:49:40] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.rst.txt')
[11:49:40] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/stable/_sources/en/stable/tune/api/doc/ray.tune.search.optuna.OptunaSearch.rst.txt
[11:49:40] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/stable/_sources/en/stable/tune/api/doc/ray.tune.search.optuna.OptunaSearch.rst.txt -

全体進捗:  52%|█████████████▌            | 12/23 [00:25<00:24,  2.23s/it, 12/23]

[11:49:42] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.save_to_dir.html
[11:49:42] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.save_to_dir.html
[11:49:42] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.save_to_dir.rst.txt
[11:49:42] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.save_to_dir.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.save_to_dir.rst.txt')
[11:49:42] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/stable/_sources/en/stable/tune/api/doc/ray.tune.search.optuna.OptunaSearch.save_to_dir.rst.txt
[11:49:42] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/stable/_sour

全体進捗:  57%|██████████████▋           | 13/23 [00:27<00:22,  2.21s/it, 13/23]

[11:49:44] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.restore_from_dir.html
[11:49:44] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.restore_from_dir.html
[11:49:44] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.restore_from_dir.rst.txt
[11:49:44] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.restore_from_dir.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/doc/ray.tune.search.optuna.OptunaSearch.restore_from_dir.rst.txt')
[11:49:44] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/stable/_sources/en/stable/tune/api/doc/ray.tune.search.optuna.OptunaSearch.restore_from_dir.rst.txt
[11:49:44] [cloudscraper固定] _sources 失敗: https

全体進捗:  61%|███████████████▊          | 14/23 [00:29<00:19,  2.20s/it, 14/23]

[11:49:46] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/api/schedulers.html
[11:49:46] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/schedulers.html
[11:49:46] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/schedulers.rst.txt
[11:49:46] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/schedulers.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/schedulers.rst.txt')
[11:49:46] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/stable/_sources/en/stable/tune/api/schedulers.rst.txt
[11:49:46] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/stable/_sources/en/stable/tune/api/schedulers.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/stable/_sources/en/stable/tune/api/schedulers.rst.txt')
[11:49:46] [cloudscraper固定] stable 失敗: https://docs.ray.io/en/stable/tune/

全体進捗:  65%|████████████████▉         | 15/23 [00:33<00:21,  2.65s/it, 15/23]

[11:49:50] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.schedulers.ASHAScheduler.html
[11:49:50] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.schedulers.ASHAScheduler.html
[11:49:50] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/doc/ray.tune.schedulers.ASHAScheduler.rst.txt
[11:49:50] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/doc/ray.tune.schedulers.ASHAScheduler.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/doc/ray.tune.schedulers.ASHAScheduler.rst.txt')
[11:49:50] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/stable/_sources/en/stable/tune/api/doc/ray.tune.schedulers.ASHAScheduler.rst.txt
[11:49:50] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/stable/_sources/en/stable/tune/api/doc/ray.tune.schedulers.ASHAScheduler.rst.txt -> HTTPError('4

全体進捗:  70%|██████████████████        | 16/23 [00:35<00:17,  2.52s/it, 16/23]

[11:49:52] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.schedulers.ResourceChangingScheduler.html
[11:49:52] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.schedulers.ResourceChangingScheduler.html
[11:49:52] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/doc/ray.tune.schedulers.ResourceChangingScheduler.rst.txt
[11:49:52] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/doc/ray.tune.schedulers.ResourceChangingScheduler.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/doc/ray.tune.schedulers.ResourceChangingScheduler.rst.txt')
[11:49:52] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/stable/_sources/en/stable/tune/api/doc/ray.tune.schedulers.ResourceChangingScheduler.rst.txt
[11:49:52] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/stable/_sources/en/stabl

全体進捗:  74%|███████████████████▏      | 17/23 [00:37<00:14,  2.47s/it, 17/23]

[11:49:55] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/api/callbacks.html
[11:49:55] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/callbacks.html
[11:49:55] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/callbacks.rst.txt
[11:49:55] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/callbacks.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/callbacks.rst.txt')
[11:49:55] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/stable/_sources/en/stable/tune/api/callbacks.rst.txt
[11:49:55] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/stable/_sources/en/stable/tune/api/callbacks.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/stable/_sources/en/stable/tune/api/callbacks.rst.txt')
[11:49:55] [cloudscraper固定] stable 失敗: https://docs.ray.io/en/stable/tune/api/call

全体進捗:  78%|████████████████████▎     | 18/23 [00:39<00:11,  2.36s/it, 18/23]

[11:49:57] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.Callback.html
[11:49:57] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.Callback.html
[11:49:57] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/doc/ray.tune.Callback.rst.txt
[11:49:57] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/doc/ray.tune.Callback.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/doc/ray.tune.Callback.rst.txt')
[11:49:57] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/stable/_sources/en/stable/tune/api/doc/ray.tune.Callback.rst.txt
[11:49:57] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/stable/_sources/en/stable/tune/api/doc/ray.tune.Callback.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/stable/_sources/en/stable/tune/api/doc/ray.tune.Callb

全体進捗:  83%|█████████████████████▍    | 19/23 [00:41<00:09,  2.30s/it, 19/23]

[11:49:59] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/tutorials/tune-resources.html
[11:49:59] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/tutorials/tune-resources.html
[11:49:59] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/latest/_sources/en/latest/tune/tutorials/tune-resources.rst.txt
[11:49:59] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/latest/_sources/en/latest/tune/tutorials/tune-resources.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/latest/_sources/en/latest/tune/tutorials/tune-resources.rst.txt')
[11:49:59] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/stable/_sources/en/stable/tune/tutorials/tune-resources.rst.txt
[11:49:59] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/stable/_sources/en/stable/tune/tutorials/tune-resources.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/stable/_sources/en/stable/tune/tutorials/tune-resources.rst.

全体進捗:  87%|██████████████████████▌   | 20/23 [00:44<00:06,  2.28s/it, 20/23]

[11:50:01] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/tutorials/tune-storage.html
[11:50:01] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/tutorials/tune-storage.html
[11:50:01] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/latest/_sources/en/latest/tune/tutorials/tune-storage.rst.txt
[11:50:01] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/latest/_sources/en/latest/tune/tutorials/tune-storage.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/latest/_sources/en/latest/tune/tutorials/tune-storage.rst.txt')
[11:50:01] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/stable/_sources/en/stable/tune/tutorials/tune-storage.rst.txt
[11:50:01] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/stable/_sources/en/stable/tune/tutorials/tune-storage.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/stable/_sources/en/stable/tune/tutorials/tune-storage.rst.txt')
[11:50:01]

全体進捗:  91%|███████████████████████▋  | 21/23 [00:46<00:04,  2.24s/it, 21/23]

[11:50:03] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/api/api.html
[11:50:03] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/api/api.html
[11:50:03] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/api.rst.txt
[11:50:03] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/api.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/latest/_sources/en/latest/tune/api/api.rst.txt')
[11:50:03] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/stable/_sources/en/stable/tune/api/api.rst.txt
[11:50:03] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/stable/_sources/en/stable/tune/api/api.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/stable/_sources/en/stable/tune/api/api.rst.txt')
[11:50:03] [cloudscraper固定] stable 失敗: https://docs.ray.io/en/stable/tune/api/api.html -> HTTPError('403 Client Error: Forbidden f

全体進捗:  96%|████████████████████████▊ | 22/23 [00:48<00:02,  2.20s/it, 22/23]

[11:50:05] [1/2] 取得開始: https://docs.ray.io/en/latest/tune/examples/optuna_example.html
[11:50:05] [cloudscraper固定] HTTPエラー 403 を検出: https://docs.ray.io/en/latest/tune/examples/optuna_example.html
[11:50:05] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/latest/_sources/en/latest/tune/examples/optuna_example.rst.txt
[11:50:05] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/latest/_sources/en/latest/tune/examples/optuna_example.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/latest/_sources/en/latest/tune/examples/optuna_example.rst.txt')
[11:50:05] [cloudscraper固定] Sphinx _sources を試行: https://docs.ray.io/en/stable/_sources/en/stable/tune/examples/optuna_example.rst.txt
[11:50:05] [cloudscraper固定] _sources 失敗: https://docs.ray.io/en/stable/_sources/en/stable/tune/examples/optuna_example.rst.txt -> HTTPError('403 Client Error: Forbidden for url: https://docs.ray.io/en/stable/_sources/en/stable/tune/examples/optuna_example.rst.txt')
[1

全体進捗: 100%|██████████████████████████| 23/23 [00:53<00:00,  2.33s/it, 23/23]

[11:50:09] 書き出し開始: bundles/02_Ray.md（形式: md, 件数: 23）
[11:50:09] 書き出し完了: bundles/02_Ray.md
[11:50:09] ⚠ 取得に失敗したURL: 1件
[11:50:09]   - https://docs.ray.io/en/latest/tune/examples/optuna_example.html -> all fallbacks failed for https://docs.ray.io/en/latest/tune/examples/optuna_example.html
Ray 出力: bundles\02_Ray.md



