In [None]:
# --- Find the Dynasty repo root (must contain BOTH src/ and data/Bakery) ---

from pathlib import Path
import sys
import pandas as pd


import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)

def find_repo_root(start: Path) -> Path:
    for p in [start, *start.parents]:
        if (p / "src" / "models").exists() and (p / "data" / "Bakery").exists():
            return p
    raise FileNotFoundError(
        "Could not locate the Dynasty repo root (needs both 'src/models' and 'data/Bakery')."
    )

REPO_ROOT = find_repo_root(Path.cwd())
print("✅ REPO_ROOT:", REPO_ROOT)

# Make sure we can import from src/
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

from src.models.multi_tune_by_position import (
    run_seed_for_subsets,
    save_pareto_chart,
    default_out_dir,
    default_csv_for_position,
)

# ---- Config ----
position = "WR"            # RB / WR / TE / QB
seeds = [123, 456, 789]
subset_grid = [ 10, 20, 30]

# Optional constraints (leave empty if none)
must_feats  = ["DOM+"]           # e.g. ["DOM+", "YPC"]
ban_feats   = ["Conference Rank", "Draft Age"]           # e.g. ["aDOT"]
must_inters = ["SpeedxBMI", "Wide%xSlot%"]           # e.g. ["SpeedxBMI"]
ban_inters  = []           # e.g. ["Wide%xSlot%"]
hierarchy   = "none"       # "strong" | "weak" | "none"

# Confirm CSV location (under REPO_ROOT/data/Bakery/...)
csv_path = default_csv_for_position(REPO_ROOT, position)
print("CSV path:", csv_path)
assert csv_path.exists(), f"CSV not found at {csv_path}"


df = pd.read_csv(csv_path)
print(f"Loaded CSV from: {csv_path}")
print(f"Shape: {df.shape}")
print("Columns:", df.columns.tolist())
print("First 5 rows:")
print(df.head())

# Run
all_runs = []
for n in subset_grid:
    res = run_seed_for_subsets(
        position=position,
        project_root=REPO_ROOT,     # ← IMPORTANT: use the Dynasty repo root
        n_subsets=n,
        seeds=seeds,
        max_base_feats=13,
        max_interactions=3,
        n_iter_per_model=15,
        cv_folds=5,
        test_size=0.20,
        must_feats=must_feats,
        ban_feats=ban_feats,
        must_inters=must_inters,
        ban_inters=ban_inters,
        interaction_hierarchy=hierarchy,
        draft_cap_cap=0.30,          # try 0.40 first; lower to 0.30 / 0.20 if still dominant
        draft_cap_lower_q=0.05,
        draft_cap_upper_q=0.95,
        draft_cap_importance_cap=0.1,
        breakout_age_importance_cap=0.1,
        draft_age_importance_cap=None
    )
    all_runs.append(res)

summary = pd.concat(all_runs, ignore_index=True)

# Save summary + Pareto chart under REPO_ROOT/data/Bakery/_derived/<POS>/
out_dir = default_out_dir(REPO_ROOT, position)
out_dir.mkdir(parents=True, exist_ok=True)

summary_path = out_dir / f"{position.lower()}_runtime_accuracy_summary.csv"
summary.to_csv(summary_path, index=False)

png_path = save_pareto_chart(summary, position, out_dir)

print("\n✅ Run complete!")
print("Summary CSV:", summary_path)
print("Pareto PNG :", png_path)

summary.head()


In [None]:
# --- Find the Dynasty repo root (must contain BOTH src/ and data/Bakery) ---

from pathlib import Path
import sys
import pandas as pd


import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)

def find_repo_root(start: Path) -> Path:
    for p in [start, *start.parents]:
        if (p / "src" / "models").exists() and (p / "data" / "Bakery").exists():
            return p
    raise FileNotFoundError(
        "Could not locate the Dynasty repo root (needs both 'src/models' and 'data/Bakery')."
    )

REPO_ROOT = find_repo_root(Path.cwd())
print("✅ REPO_ROOT:", REPO_ROOT)

# Make sure we can import from src/
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

from src.models.multi_tune_by_position import (
    run_seed_for_subsets,
    save_pareto_chart,
    default_out_dir,
    default_csv_for_position,
)

# ---- Config ----
position = "RB"            # RB / WR / TE / QB
seeds = [123, 456, 789]
subset_grid = [ 10, 20, 30]

# Optional constraints (leave empty if none)
must_feats  = ["DOM+"]           # e.g. ["DOM+", "YPC"]
ban_feats   = ["Conference Rank", "Draft Age"]           # e.g. ["aDOT"]
must_inters = ["SpeedxBMI"]           # e.g. ["SpeedxBMI"]
ban_inters  = []           # e.g. ["Wide%xSlot%"]
hierarchy   = "none"       # "strong" | "weak" | "none"

# Confirm CSV location (under REPO_ROOT/data/Bakery/...)
csv_path = default_csv_for_position(REPO_ROOT, position)
print("CSV path:", csv_path)
assert csv_path.exists(), f"CSV not found at {csv_path}"


df = pd.read_csv(csv_path)
print(f"Loaded CSV from: {csv_path}")
print(f"Shape: {df.shape}")
print("Columns:", df.columns.tolist())
print("First 5 rows:")
print(df.head())

# Run
all_runs = []
for n in subset_grid:
    res = run_seed_for_subsets(
        position=position,
        project_root=REPO_ROOT,     # ← IMPORTANT: use the Dynasty repo root
        n_subsets=n,
        seeds=seeds,
        max_base_feats=13,
        max_interactions=3,
        n_iter_per_model=15,
        cv_folds=5,
        test_size=0.20,
        must_feats=must_feats,
        ban_feats=ban_feats,
        must_inters=must_inters,
        ban_inters=ban_inters,
        interaction_hierarchy=hierarchy,
        draft_cap_cap=0.30,          # try 0.40 first; lower to 0.30 / 0.20 if still dominant
        draft_cap_lower_q=0.05,
        draft_cap_upper_q=0.95,
        draft_cap_importance_cap=0.1,
        breakout_age_importance_cap=0.1,
        draft_age_importance_cap=None
    )
    all_runs.append(res)

summary = pd.concat(all_runs, ignore_index=True)

# Save summary + Pareto chart under REPO_ROOT/data/Bakery/_derived/<POS>/
out_dir = default_out_dir(REPO_ROOT, position)
out_dir.mkdir(parents=True, exist_ok=True)

summary_path = out_dir / f"{position.lower()}_runtime_accuracy_summary.csv"
summary.to_csv(summary_path, index=False)

png_path = save_pareto_chart(summary, position, out_dir)

print("\n✅ Run complete!")
print("Summary CSV:", summary_path)
print("Pareto PNG :", png_path)

summary.head()


In [None]:
# --- Find the Dynasty repo root (must contain BOTH src/ and data/Bakery) ---

from pathlib import Path
import sys
import pandas as pd


import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)

def find_repo_root(start: Path) -> Path:
    for p in [start, *start.parents]:
        if (p / "src" / "models").exists() and (p / "data" / "Bakery").exists():
            return p
    raise FileNotFoundError(
        "Could not locate the Dynasty repo root (needs both 'src/models' and 'data/Bakery')."
    )

REPO_ROOT = find_repo_root(Path.cwd())
print("✅ REPO_ROOT:", REPO_ROOT)

# Make sure we can import from src/
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

from src.models.multi_tune_by_position import (
    run_seed_for_subsets,
    save_pareto_chart,
    default_out_dir,
    default_csv_for_position,
)

# ---- Config ----
position = "TE"            # RB / WR / TE / QB
seeds = [123, 456, 789]
subset_grid = [ 10, 20, 30]

# Optional constraints (leave empty if none)
must_feats  = ["DOM+"]           # e.g. ["DOM+", "YPC"]
ban_feats   = ["Conference Rank", "Draft Age"]           # e.g. ["aDOT"]
must_inters = ["SpeedxBMI"]           # e.g. ["SpeedxBMI"]
ban_inters  = []           # e.g. ["Wide%xSlot%"]
hierarchy   = "none"       # "strong" | "weak" | "none"

# Confirm CSV location (under REPO_ROOT/data/Bakery/...)
csv_path = default_csv_for_position(REPO_ROOT, position)
print("CSV path:", csv_path)
assert csv_path.exists(), f"CSV not found at {csv_path}"


df = pd.read_csv(csv_path)
print(f"Loaded CSV from: {csv_path}")
print(f"Shape: {df.shape}")
print("Columns:", df.columns.tolist())
print("First 5 rows:")
print(df.head())

# Run
all_runs = []
for n in subset_grid:
    res = run_seed_for_subsets(
        position=position,
        project_root=REPO_ROOT,     # ← IMPORTANT: use the Dynasty repo root
        n_subsets=n,
        seeds=seeds,
        max_base_feats=13,
        max_interactions=3,
        n_iter_per_model=15,
        cv_folds=5,
        test_size=0.20,
        must_feats=must_feats,
        ban_feats=ban_feats,
        must_inters=must_inters,
        ban_inters=ban_inters,
        interaction_hierarchy=hierarchy,
        draft_cap_cap=0.30,          # try 0.40 first; lower to 0.30 / 0.20 if still dominant
        draft_cap_lower_q=0.05,
        draft_cap_upper_q=0.95,
        draft_cap_importance_cap=0.1,
        breakout_age_importance_cap=0.1,
        draft_age_importance_cap=None
    )
    all_runs.append(res)

summary = pd.concat(all_runs, ignore_index=True)

# Save summary + Pareto chart under REPO_ROOT/data/Bakery/_derived/<POS>/
out_dir = default_out_dir(REPO_ROOT, position)
out_dir.mkdir(parents=True, exist_ok=True)

summary_path = out_dir / f"{position.lower()}_runtime_accuracy_summary.csv"
summary.to_csv(summary_path, index=False)

png_path = save_pareto_chart(summary, position, out_dir)

print("\n✅ Run complete!")
print("Summary CSV:", summary_path)
print("Pareto PNG :", png_path)

summary.head()


In [None]:
# --- Find the Dynasty repo root (must contain BOTH src/ and data/Bakery) ---

from pathlib import Path
import sys
import pandas as pd


import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)

def find_repo_root(start: Path) -> Path:
    for p in [start, *start.parents]:
        if (p / "src" / "models").exists() and (p / "data" / "Bakery").exists():
            return p
    raise FileNotFoundError(
        "Could not locate the Dynasty repo root (needs both 'src/models' and 'data/Bakery')."
    )

REPO_ROOT = find_repo_root(Path.cwd())
print("✅ REPO_ROOT:", REPO_ROOT)

# Make sure we can import from src/
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

from src.models.multi_tune_by_position import (
    run_seed_for_subsets,
    save_pareto_chart,
    default_out_dir,
    default_csv_for_position,
)

# ---- Config ----
position = "QB"            # RB / WR / TE / QB
seeds = [123, 456, 789]
subset_grid = [ 10, 20, 30]

# Optional constraints (leave empty if none)
must_feats  = ["DOM+"]           # e.g. ["DOM+", "YPC"]
ban_feats   = ["Conference Rank", "Draft Age"]           # e.g. ["aDOT"]
must_inters = ["SpeedxBMI"]           # e.g. ["SpeedxBMI"]
ban_inters  = []           # e.g. ["Wide%xSlot%"]
hierarchy   = "none"       # "strong" | "weak" | "none"

# Confirm CSV location (under REPO_ROOT/data/Bakery/...)
csv_path = default_csv_for_position(REPO_ROOT, position)
print("CSV path:", csv_path)
assert csv_path.exists(), f"CSV not found at {csv_path}"


df = pd.read_csv(csv_path)
print(f"Loaded CSV from: {csv_path}")
print(f"Shape: {df.shape}")
print("Columns:", df.columns.tolist())
print("First 5 rows:")
print(df.head())

# Run
all_runs = []
for n in subset_grid:
    res = run_seed_for_subsets(
        position=position,
        project_root=REPO_ROOT,     # ← IMPORTANT: use the Dynasty repo root
        n_subsets=n,
        seeds=seeds,
        max_base_feats=13,
        max_interactions=3,
        n_iter_per_model=15,
        cv_folds=5,
        test_size=0.20,
        must_feats=must_feats,
        ban_feats=ban_feats,
        must_inters=must_inters,
        ban_inters=ban_inters,
        interaction_hierarchy=hierarchy,
        draft_cap_cap=0.30,          # try 0.40 first; lower to 0.30 / 0.20 if still dominant
        draft_cap_lower_q=0.05,
        draft_cap_upper_q=0.95,
        draft_cap_importance_cap=0.1,
        breakout_age_importance_cap=0.1,
        draft_age_importance_cap=None
    )
    all_runs.append(res)

summary = pd.concat(all_runs, ignore_index=True)

# Save summary + Pareto chart under REPO_ROOT/data/Bakery/_derived/<POS>/
out_dir = default_out_dir(REPO_ROOT, position)
out_dir.mkdir(parents=True, exist_ok=True)

summary_path = out_dir / f"{position.lower()}_runtime_accuracy_summary.csv"
summary.to_csv(summary_path, index=False)

png_path = save_pareto_chart(summary, position, out_dir)

print("\n✅ Run complete!")
print("Summary CSV:", summary_path)
print("Pareto PNG :", png_path)

summary.head()
