In [1]:
# --- Find the Dynasty repo root (must contain BOTH src/ and data/Bakery) ---

from pathlib import Path
import sys
import pandas as pd


import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)

def find_repo_root(start: Path) -> Path:
    for p in [start, *start.parents]:
        if (p / "src" / "models").exists() and (p / "data" / "Bakery").exists():
            return p
    raise FileNotFoundError(
        "Could not locate the Dynasty repo root (needs both 'src/models' and 'data/Bakery')."
    )

REPO_ROOT = find_repo_root(Path.cwd())
print("✅ REPO_ROOT:", REPO_ROOT)

# Make sure we can import from src/
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

from src.models.multi_tune_by_position import (
    run_seed_for_subsets,
    save_pareto_chart,
    default_out_dir,
    default_csv_for_position,
)

# ---- Config ----
position = "WR"            # RB / WR / TE / QB
seeds = [123, 456, 789]
subset_grid = [ 10, 20, 30]

# Optional constraints (leave empty if none)
must_feats  = ["DOM+"]           # e.g. ["DOM+", "YPC"]
ban_feats   = ["Conference Rank", "Draft Age"]           # e.g. ["aDOT"]
must_inters = ["SpeedxBMI", "Wide%xSlot%"]           # e.g. ["SpeedxBMI"]
ban_inters  = []           # e.g. ["Wide%xSlot%"]
hierarchy   = "none"       # "strong" | "weak" | "none"

# Confirm CSV location (under REPO_ROOT/data/Bakery/...)
csv_path = default_csv_for_position(REPO_ROOT, position)
print("CSV path:", csv_path)
assert csv_path.exists(), f"CSV not found at {csv_path}"


df = pd.read_csv(csv_path)
print(f"Loaded CSV from: {csv_path}")
print(f"Shape: {df.shape}")
print("Columns:", df.columns.tolist())
print("First 5 rows:")
print(df.head())

# Run
all_runs = []
for n in subset_grid:
    res = run_seed_for_subsets(
        position=position,
        project_root=REPO_ROOT,     # ← IMPORTANT: use the Dynasty repo root
        n_subsets=n,
        seeds=seeds,
        max_base_feats=13,
        max_interactions=3,
        n_iter_per_model=15,
        cv_folds=5,
        test_size=0.20,
        must_feats=must_feats,
        ban_feats=ban_feats,
        must_inters=must_inters,
        ban_inters=ban_inters,
        interaction_hierarchy=hierarchy,
        draft_cap_cap=0.30,          # try 0.40 first; lower to 0.30 / 0.20 if still dominant
        draft_cap_lower_q=0.05,
        draft_cap_upper_q=0.95,
        draft_cap_importance_cap=0.1,
        breakout_age_importance_cap=0.1,
        draft_age_importance_cap=None
    )
    all_runs.append(res)

summary = pd.concat(all_runs, ignore_index=True)

# Save summary + Pareto chart under REPO_ROOT/data/Bakery/_derived/<POS>/
out_dir = default_out_dir(REPO_ROOT, position)
out_dir.mkdir(parents=True, exist_ok=True)

summary_path = out_dir / f"{position.lower()}_runtime_accuracy_summary.csv"
summary.to_csv(summary_path, index=False)

png_path = save_pareto_chart(summary, position, out_dir)

print("\n✅ Run complete!")
print("Summary CSV:", summary_path)
print("Pareto PNG :", png_path)

summary.head()


✅ REPO_ROOT: /Users/chasesiegel/Desktop/Comp_Sci/Capstone/Dynasty
CSV path: /Users/chasesiegel/Desktop/Comp_Sci/Capstone/Dynasty/data/Bakery/WR/Bakery_WR_Overall.csv
Loaded CSV from: /Users/chasesiegel/Desktop/Comp_Sci/Capstone/Dynasty/data/Bakery/WR/Bakery_WR_Overall.csv
Shape: (618, 126)
Columns: ['Rank', 'Class Rk', 'Year', 'Player Name', 'Tier', 'WR Grade', 'Overall', 'Z-Score', 'School', 'Conf Rk', 'DOB', 'Age', 'Year.1', 'Draft Age', 'Type', 'Build', 'Separator', 'Height', 'Weight', 'BMI', '40 Time', 'Draft Cap', 'DOM1', 'DOM2', 'DOM3', 'DOM4', 'DOM5', 'TDOM', 'DOM+', 'Breakout', 'Y/RR', 'YAC/R', 'aDOT', 'EPA/P', 'aYPTPA', 'CTPRR', 'UCTPRR', 'Drop%', 'CC%', 'Wide %', 'Slot %', 'Speed', 'HaSS', 'DOM++', 'BOUT+', 'Y/RR+', 'YAC/R+', '40+', 'HaSS+', 'DC+', 'BAMA', 'Rec Yds', 'Team Yds', 'Tgts', 'Recs', 'TDs', 'Team TDs', 'EPA/P.1', 'aYPTPA.1', 'Y/RR.1', 'YAC/R.1', 'aDOT.1', 'CC %', 'Drop %', 'Wide %.1', 'Slot %.1', 'Rec Yds.1', 'Team Yds.1', 'Tgts.1', 'Recs.1', 'TDs.1', 'Team TDs.1',

  from .autonotebook import tqdm as notebook_tqdm



[WR] Seed 123 | n_subsets=10 → R²=0.8405 | MAE=0.666 | RMSE=0.852 | time=121.1s

[WR] Seed 456 | n_subsets=10 → R²=0.8846 | MAE=0.688 | RMSE=0.836 | time=120.7s

[WR] Seed 789 | n_subsets=10 → R²=0.8868 | MAE=0.574 | RMSE=0.705 | time=123.0s

Position          : WR
CSV path          : /Users/chasesiegel/Desktop/Comp_Sci/Capstone/Dynasty/data/Bakery/WR/Bakery_WR_Overall.csv
Output directory  : /Users/chasesiegel/Desktop/Comp_Sci/Capstone/Dynasty/data/Bakery/_derived/WR
Hierarchy         : none
Must feats        : ['DOM+']
Ban feats         : ['Conference Rank', 'Draft Age']
Must inters       : ['SpeedxBMI', 'Wide%xSlot%']
Ban inters        : []
DraftCap limiter  : cap=0.3, lower_q=0.05, upper_q=0.95

[WR] Rows after filtering: 353 | Feature cols: 18

[WR] Seed 123 | n_subsets=20 → R²=0.8405 | MAE=0.666 | RMSE=0.852 | time=223.8s

[WR] Seed 456 | n_subsets=20 → R²=0.8886 | MAE=0.668 | RMSE=0.821 | time=222.6s

[WR] Seed 789 | n_subsets=20 → R²=0.8868 | MAE=0.574 | RMSE=0.705 | time=224.

Unnamed: 0,position,seed,n_subsets,best_test_R2,best_test_MAE,best_test_RMSE,runtime_sec,leaderboard_csv,predictions_csv,best_model_tag,best_bases,best_interactions
0,WR,123,10,0.840467,0.666449,0.851732,121.138796,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,GB,DOM+|YAC/R|Breakout Age|Y/RR|Draft Capital|CTP...,SpeedxBMI|Wide%xSlot%
1,WR,456,10,0.884575,0.68847,0.835973,120.714309,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,GB,DOM+|Breakout Age|EPA/P|aYPTPA|CC%|YAC/R|aDOT|...,SpeedxBMI|Wide%xSlot%
2,WR,789,10,0.886796,0.573757,0.705379,123.043335,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,GB,DOM+|CTPRR|Breakout Age|Y/RR|UCTPRR|CC%|Draft ...,SpeedxBMI|Wide%xSlot%
3,WR,123,20,0.840467,0.666449,0.851732,223.764539,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,GB,DOM+|YAC/R|Breakout Age|Y/RR|Draft Capital|CTP...,SpeedxBMI|Wide%xSlot%
4,WR,456,20,0.888602,0.667627,0.821259,222.626593,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,GB,DOM+|Breakout Age|aDOT|Draft Capital|Drop Rate...,SpeedxBMI|Wide%xSlot%


In [2]:
# --- Find the Dynasty repo root (must contain BOTH src/ and data/Bakery) ---

from pathlib import Path
import sys
import pandas as pd


import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)

def find_repo_root(start: Path) -> Path:
    for p in [start, *start.parents]:
        if (p / "src" / "models").exists() and (p / "data" / "Bakery").exists():
            return p
    raise FileNotFoundError(
        "Could not locate the Dynasty repo root (needs both 'src/models' and 'data/Bakery')."
    )

REPO_ROOT = find_repo_root(Path.cwd())
print("✅ REPO_ROOT:", REPO_ROOT)

# Make sure we can import from src/
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

from src.models.multi_tune_by_position import (
    run_seed_for_subsets,
    save_pareto_chart,
    default_out_dir,
    default_csv_for_position,
)

# ---- Config ----
position = "RB"            # RB / WR / TE / QB
seeds = [123, 456, 789]
subset_grid = [ 10, 20, 30]

# Optional constraints (leave empty if none)
must_feats  = ["DOM+"]           # e.g. ["DOM+", "YPC"]
ban_feats   = ["Conference Rank", "Draft Age"]           # e.g. ["aDOT"]
must_inters = ["SpeedxBMI"]           # e.g. ["SpeedxBMI"]
ban_inters  = []           # e.g. ["Wide%xSlot%"]
hierarchy   = "none"       # "strong" | "weak" | "none"

# Confirm CSV location (under REPO_ROOT/data/Bakery/...)
csv_path = default_csv_for_position(REPO_ROOT, position)
print("CSV path:", csv_path)
assert csv_path.exists(), f"CSV not found at {csv_path}"


df = pd.read_csv(csv_path)
print(f"Loaded CSV from: {csv_path}")
print(f"Shape: {df.shape}")
print("Columns:", df.columns.tolist())
print("First 5 rows:")
print(df.head())

# Run
all_runs = []
for n in subset_grid:
    res = run_seed_for_subsets(
        position=position,
        project_root=REPO_ROOT,     # ← IMPORTANT: use the Dynasty repo root
        n_subsets=n,
        seeds=seeds,
        max_base_feats=13,
        max_interactions=3,
        n_iter_per_model=15,
        cv_folds=5,
        test_size=0.20,
        must_feats=must_feats,
        ban_feats=ban_feats,
        must_inters=must_inters,
        ban_inters=ban_inters,
        interaction_hierarchy=hierarchy,
        draft_cap_cap=0.30,          # try 0.40 first; lower to 0.30 / 0.20 if still dominant
        draft_cap_lower_q=0.05,
        draft_cap_upper_q=0.95,
        draft_cap_importance_cap=0.1,
        breakout_age_importance_cap=0.1,
        draft_age_importance_cap=None
    )
    all_runs.append(res)

summary = pd.concat(all_runs, ignore_index=True)

# Save summary + Pareto chart under REPO_ROOT/data/Bakery/_derived/<POS>/
out_dir = default_out_dir(REPO_ROOT, position)
out_dir.mkdir(parents=True, exist_ok=True)

summary_path = out_dir / f"{position.lower()}_runtime_accuracy_summary.csv"
summary.to_csv(summary_path, index=False)

png_path = save_pareto_chart(summary, position, out_dir)

print("\n✅ Run complete!")
print("Summary CSV:", summary_path)
print("Pareto PNG :", png_path)

summary.head()


✅ REPO_ROOT: /Users/chasesiegel/Desktop/Comp_Sci/Capstone/Dynasty
CSV path: /Users/chasesiegel/Desktop/Comp_Sci/Capstone/Dynasty/data/Bakery/RB/Bakery_RB_Overall.csv
Loaded CSV from: /Users/chasesiegel/Desktop/Comp_Sci/Capstone/Dynasty/data/Bakery/RB/Bakery_RB_Overall.csv
Shape: (340, 127)
Columns: ['Rank', 'Class Rk', 'Year', 'Player Name', 'Tier', 'RB Grade', 'Overall', 'Z-Score', 'School', 'Conf Rk', 'DOB', 'Age', 'Class', 'Draft Age', 'Build', 'Type', 'Height', 'Weight', 'BMI', '40 Time', 'Draft Cap', 'DOM1', 'DOM2', 'DOM3', 'DOM4', 'DOM5', 'TDOM', 'DOM+', 'RDOM1', 'RDOM2', 'ADOM', 'RDOM+', 'BOUT', 'Speed', 'BMI.1', 'MTF/A', 'YPC', 'YPR', 'RPT', 'ELU', 'YCO/A', 'Break %', 'DOM++', 'RDOM++', 'BOUT+', 'Speed+', 'MTF/A.1', 'YPC+', 'ELU+', 'YCO/A+', 'DC+', 'BAMA', 'Rush Yds', 'Team Yds', 'Rush TDs', 'Team TD', 'Att', 'MTF', 'YPC.1', 'ELU.1', 'YCO/A.1', 'Break %.1', 'Rec Yds', 'Team Yds.1', 'Recs', 'Rec TDs', 'Team TDs', 'Rush Yds.1', 'Team Yds.2', 'Rush TDs.1', 'Team TD.1', 'Att.1', 'M

Unnamed: 0,position,seed,n_subsets,best_test_R2,best_test_MAE,best_test_RMSE,runtime_sec,leaderboard_csv,predictions_csv,best_model_tag,best_bases,best_interactions
0,RB,123,10,0.835027,0.895611,1.032485,94.572886,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,GB,DOM+|MTF/A|Draft Capital|Breakout Age|RDOM+|YP...,SpeedxBMI
1,RB,456,10,0.897948,0.710555,0.905423,91.773828,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,GB,DOM+|Breakout Age|YCO/A|MTF/A|YPC|ELU|Draft Ca...,SpeedxBMI
2,RB,789,10,0.91798,0.621143,0.757108,100.957577,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,GB,DOM+|YPC|YCO/A|Breakout Age|RDOM+|ELU|YPR|MTF/...,SpeedxBMI
3,RB,123,20,0.841218,0.827356,1.012925,190.577302,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,GB,DOM+|Draft Capital|ELU|YPC|RDOM+|YCO/A,SpeedxBMI
4,RB,456,20,0.904989,0.693067,0.873631,216.999278,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,GB,DOM+|RDOM+|Draft Capital|Breakout Age|MTF/A|YP...,SpeedxBMI


In [3]:
# --- Find the Dynasty repo root (must contain BOTH src/ and data/Bakery) ---

from pathlib import Path
import sys
import pandas as pd


import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)

def find_repo_root(start: Path) -> Path:
    for p in [start, *start.parents]:
        if (p / "src" / "models").exists() and (p / "data" / "Bakery").exists():
            return p
    raise FileNotFoundError(
        "Could not locate the Dynasty repo root (needs both 'src/models' and 'data/Bakery')."
    )

REPO_ROOT = find_repo_root(Path.cwd())
print("✅ REPO_ROOT:", REPO_ROOT)

# Make sure we can import from src/
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

from src.models.multi_tune_by_position import (
    run_seed_for_subsets,
    save_pareto_chart,
    default_out_dir,
    default_csv_for_position,
)

# ---- Config ----
position = "TE"            # RB / WR / TE / QB
seeds = [123, 456, 789]
subset_grid = [ 10, 20, 30]

# Optional constraints (leave empty if none)
must_feats  = ["DOM+"]           # e.g. ["DOM+", "YPC"]
ban_feats   = ["Conference Rank", "Draft Age"]           # e.g. ["aDOT"]
must_inters = ["SpeedxBMI"]           # e.g. ["SpeedxBMI"]
ban_inters  = []           # e.g. ["Wide%xSlot%"]
hierarchy   = "none"       # "strong" | "weak" | "none"

# Confirm CSV location (under REPO_ROOT/data/Bakery/...)
csv_path = default_csv_for_position(REPO_ROOT, position)
print("CSV path:", csv_path)
assert csv_path.exists(), f"CSV not found at {csv_path}"


df = pd.read_csv(csv_path)
print(f"Loaded CSV from: {csv_path}")
print(f"Shape: {df.shape}")
print("Columns:", df.columns.tolist())
print("First 5 rows:")
print(df.head())

# Run
all_runs = []
for n in subset_grid:
    res = run_seed_for_subsets(
        position=position,
        project_root=REPO_ROOT,     # ← IMPORTANT: use the Dynasty repo root
        n_subsets=n,
        seeds=seeds,
        max_base_feats=13,
        max_interactions=3,
        n_iter_per_model=15,
        cv_folds=5,
        test_size=0.20,
        must_feats=must_feats,
        ban_feats=ban_feats,
        must_inters=must_inters,
        ban_inters=ban_inters,
        interaction_hierarchy=hierarchy,
        draft_cap_cap=0.30,          # try 0.40 first; lower to 0.30 / 0.20 if still dominant
        draft_cap_lower_q=0.05,
        draft_cap_upper_q=0.95,
        draft_cap_importance_cap=0.1,
        breakout_age_importance_cap=0.1,
        draft_age_importance_cap=None
    )
    all_runs.append(res)

summary = pd.concat(all_runs, ignore_index=True)

# Save summary + Pareto chart under REPO_ROOT/data/Bakery/_derived/<POS>/
out_dir = default_out_dir(REPO_ROOT, position)
out_dir.mkdir(parents=True, exist_ok=True)

summary_path = out_dir / f"{position.lower()}_runtime_accuracy_summary.csv"
summary.to_csv(summary_path, index=False)

png_path = save_pareto_chart(summary, position, out_dir)

print("\n✅ Run complete!")
print("Summary CSV:", summary_path)
print("Pareto PNG :", png_path)

summary.head()


✅ REPO_ROOT: /Users/chasesiegel/Desktop/Comp_Sci/Capstone/Dynasty
CSV path: /Users/chasesiegel/Desktop/Comp_Sci/Capstone/Dynasty/data/Bakery/TE/Bakery_TE_Overall.csv
Loaded CSV from: /Users/chasesiegel/Desktop/Comp_Sci/Capstone/Dynasty/data/Bakery/TE/Bakery_TE_Overall.csv
Shape: (213, 110)
Columns: ['Rank', 'Class Rk', 'Year', 'Player Name', 'Tier', 'TE Grade', 'Overall', 'Z-Score', 'School', 'Conf Rk', 'DOB', 'Age', 'Year.1', 'Draft Age', 'Height', 'Weight', 'BMI', '40 Time', 'Draft', 'DOM1', 'DOM2', 'DOM3', 'DOM4', 'DOM5', 'TDOM', 'DOM+', 'BOUT', 'Y/RR', 'Y/REC', 'YAC/R', 'aDOT', 'EPA/P', 'aYPTPA', 'CC%', 'Drop %', 'Speed', 'HaSS', 'DOM++', 'BOUT+', 'Y/RR+', 'Y/REC+', 'YAC/R+', 'HaSS+', 'DC+', 'BAMA', 'Rec Yds', 'Team Yds', 'Tgts', 'Recs', 'TDs', 'Team TDs', 'EPA/P.1', 'aYPTPA.1', 'Y/RR.1', 'YAC/R.1', 'aDOT.1', 'CC %', 'Drop %.1', 'Rec Yds.1', 'Team Yds.1', 'Tgts.1', 'Recs.1', 'TDs.1', 'Team TDs.1', 'EPA/P.2', 'aYPTPA.2', 'Y/RR.2', 'YAC/R.2', 'aDOT.2', 'CC %.1', 'Drop %.2', 'Rec Yds.

Unnamed: 0,position,seed,n_subsets,best_test_R2,best_test_MAE,best_test_RMSE,runtime_sec,leaderboard_csv,predictions_csv,best_model_tag,best_bases,best_interactions
0,TE,123,10,0.725985,1.451568,1.710438,97.564203,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,RF,DOM+|CC%|Breakout Age|YAC/R|aDOT|aYPTPA|Drop R...,SpeedxBMI
1,TE,456,10,0.616048,1.538232,1.87515,89.158478,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,RF,DOM+|Y/RR|EPA/P,SpeedxBMI
2,TE,789,10,0.657806,1.652104,1.883132,96.011407,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,RF,DOM+|Drop Rate|Breakout Age|aYPTPA|CC%|YAC/R|a...,SpeedxBMI
3,TE,123,20,0.725985,1.451568,1.710438,184.340877,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,RF,DOM+|CC%|Breakout Age|YAC/R|aDOT|aYPTPA|Drop R...,SpeedxBMI
4,TE,456,20,0.617128,1.527363,1.872512,176.51342,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,RF,DOM+|aDOT|Y/RR,SpeedxBMI


In [4]:
# --- Find the Dynasty repo root (must contain BOTH src/ and data/Bakery) ---

from pathlib import Path
import sys
import pandas as pd


import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)

def find_repo_root(start: Path) -> Path:
    for p in [start, *start.parents]:
        if (p / "src" / "models").exists() and (p / "data" / "Bakery").exists():
            return p
    raise FileNotFoundError(
        "Could not locate the Dynasty repo root (needs both 'src/models' and 'data/Bakery')."
    )

REPO_ROOT = find_repo_root(Path.cwd())
print("✅ REPO_ROOT:", REPO_ROOT)

# Make sure we can import from src/
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

from src.models.multi_tune_by_position import (
    run_seed_for_subsets,
    save_pareto_chart,
    default_out_dir,
    default_csv_for_position,
)

# ---- Config ----
position = "QB"            # RB / WR / TE / QB
seeds = [123, 456, 789]
subset_grid = [ 10, 20, 30]

# Optional constraints (leave empty if none)
must_feats  = ["DOM+"]           # e.g. ["DOM+", "YPC"]
ban_feats   = ["Conference Rank", "Draft Age"]           # e.g. ["aDOT"]
must_inters = ["SpeedxBMI"]           # e.g. ["SpeedxBMI"]
ban_inters  = []           # e.g. ["Wide%xSlot%"]
hierarchy   = "none"       # "strong" | "weak" | "none"

# Confirm CSV location (under REPO_ROOT/data/Bakery/...)
csv_path = default_csv_for_position(REPO_ROOT, position)
print("CSV path:", csv_path)
assert csv_path.exists(), f"CSV not found at {csv_path}"


df = pd.read_csv(csv_path)
print(f"Loaded CSV from: {csv_path}")
print(f"Shape: {df.shape}")
print("Columns:", df.columns.tolist())
print("First 5 rows:")
print(df.head())

# Run
all_runs = []
for n in subset_grid:
    res = run_seed_for_subsets(
        position=position,
        project_root=REPO_ROOT,     # ← IMPORTANT: use the Dynasty repo root
        n_subsets=n,
        seeds=seeds,
        max_base_feats=13,
        max_interactions=3,
        n_iter_per_model=15,
        cv_folds=5,
        test_size=0.20,
        must_feats=must_feats,
        ban_feats=ban_feats,
        must_inters=must_inters,
        ban_inters=ban_inters,
        interaction_hierarchy=hierarchy,
        draft_cap_cap=0.30,          # try 0.40 first; lower to 0.30 / 0.20 if still dominant
        draft_cap_lower_q=0.05,
        draft_cap_upper_q=0.95,
        draft_cap_importance_cap=0.1,
        breakout_age_importance_cap=0.1,
        draft_age_importance_cap=None
    )
    all_runs.append(res)

summary = pd.concat(all_runs, ignore_index=True)

# Save summary + Pareto chart under REPO_ROOT/data/Bakery/_derived/<POS>/
out_dir = default_out_dir(REPO_ROOT, position)
out_dir.mkdir(parents=True, exist_ok=True)

summary_path = out_dir / f"{position.lower()}_runtime_accuracy_summary.csv"
summary.to_csv(summary_path, index=False)

png_path = save_pareto_chart(summary, position, out_dir)

print("\n✅ Run complete!")
print("Summary CSV:", summary_path)
print("Pareto PNG :", png_path)

summary.head()


✅ REPO_ROOT: /Users/chasesiegel/Desktop/Comp_Sci/Capstone/Dynasty
CSV path: /Users/chasesiegel/Desktop/Comp_Sci/Capstone/Dynasty/data/Bakery/QB/Bakery_QB_Overall.csv
Loaded CSV from: /Users/chasesiegel/Desktop/Comp_Sci/Capstone/Dynasty/data/Bakery/QB/Bakery_QB_Overall.csv
Shape: (764, 149)
Columns: ['Overall RK', 'Class Rk', 'Year', 'Player Name', 'Unnamed: 4', 'QB Grade', 'Overall', 'Z-Score', 'School', 'Conf Rk', 'DOB', 'Age', 'Year.1', 'Draft Age', 'Archetype', 'Height', 'Weight', 'BMI', '40 Time', 'Draft Cap', 'DOM1', 'DOM2', 'DOM3', 'DOM4', 'DOM5', 'TDOM', 'DOM+', 'RDOM1', 'RDOM2', 'ADOM', 'RDOM+', 'BOUT', 'Speed', 'Comp', 'YPC', 'ADJ%', 'BTT%', 'TWP%', 'DAA', 'YPA', 'MTF/A', 'YCO/A', 'BOUT.1', 'YPC+', 'ADJ%+', 'BTT%+', 'TWP%+', 'DAA+', 'RDOM++', 'YPA+', 'MTF/A+', 'YCO/A+', 'DC+', 'BAMA', 'Raw Passing', 'Pass Grade', 'Raw Run', 'Run Grade', 'Pass Yards', 'Team Yds', 'Pass TDs', 'INT', 'ATT', 'COMP', 'ADJ%.1', 'BTT', 'TWP', 'DAA.1', 'Rush Yds', 'Team Yds.1', 'ATT.1', 'YPC.1', 'TDs'

Unnamed: 0,position,seed,n_subsets,best_test_R2,best_test_MAE,best_test_RMSE,runtime_sec,leaderboard_csv,predictions_csv,best_model_tag,best_bases,best_interactions
0,QB,123,10,0.906395,0.855429,1.091009,85.143912,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,GB,DOM+|ADJ%|MTF/A|RDOM+|BTT%|YCO/A|YPA|DAA|Draft...,SpeedxBMI
1,QB,456,10,0.934719,0.764533,1.004917,84.95654,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,HGB,DOM+|ADJ%|BTT%|MTF/A|DAA|Comp%|YCO/A|YPC|RDOM+...,SpeedxBMI
2,QB,789,10,0.946258,0.635898,0.762418,82.992176,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,HGB,DOM+|DAA|TWP%|MTF/A|YPC|YCO/A|RDOM+|ADJ%|BTT%|...,SpeedxBMI
3,QB,123,20,0.926093,0.757911,0.969441,170.777462,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,HGB,DOM+|Draft Capital|Breakout Age|YPA|ADJ%|DAA|C...,SpeedxBMI
4,QB,456,20,0.934719,0.764533,1.004917,164.043561,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,/Users/chasesiegel/Desktop/Comp_Sci/Capstone/D...,HGB,DOM+|ADJ%|BTT%|MTF/A|DAA|Comp%|YCO/A|YPC|RDOM+...,SpeedxBMI


OVERVIEW
I developed a Python function called run_seed_for_subsets that automates model testing and evaluation.
This function:
-Randomly tests different sets of features and interaction terms (for example, Speed × BMI).
-Runs multiple machine learning algorithms automatically, including:
    -Gradient Boosting (GB)
    -Histogram Gradient Boosting (HGB)
    -Random Forest (RF)
    -Ridge Regression
    -Bayesian Ridge Regression
-Performs cross-validation to ensure models generalize well and don’t overfit.
-Uses randomized hyperparameter search to optimize each model’s performance.
-Calculates multiple evaluation metrics (R², MAE, RMSE) for performance comparison.
-Uses SHAP values to analyze which features have the most influence on predictions.
-This automated approach allows for large-scale model testing without manual intervention 
and ensures consistent comparison across all positional datasets.
-RMSE was the primary performance metric used.

MODEL RESULTS
-Best fit model by position
    -WR--GB
    -RB--GB
    -TE--RF
    -QB--HGB

KEY FINDINGS
-Gradient Boosting models were the most consistent overall, particularly for WR and RB.
-Random Forest worked best for TE, where data was smaller and more variable.
-Cross-validation and multiple random seeds helped confirm model stability and reduced overfitting.
-SHAP feature importance analysis showed that metrics like Draft Capital 
and Dominator Rating (DOM+) were the most influential predictors.
-Creating ensemble models (averaging multiple algorithms) provided 
only a small performance gain, showing that the top individual models already generalized effectively.

CONCLUSION
-Each position benefits from a slightly different modeling approach
    -Wide Receivers and Running Backs perform best with nonlinear Gradient Boosting models, 
    which capture subtle relationships between physical and performance metrics.
    -Tight Ends are better modeled with Random Forests, which are more tolerant of missing values 
    and data variability.
    -Quarterbacks perform best with Histogram Gradient Boosting, which efficiently handles larger, 
    smoother numerical features like completion percentage and adjusted accuracy.



