In [1]:
import pandas as pd
from pathlib import Path
import re

# ========= CONFIG =========
ROOT = Path("/Users/chasesiegel/Desktop/Comp_Sci/Capstone/Dynasty")

MASTER_PATH    = ROOT / "data" / "processed" / "master_list.csv"
RANKINGS_ROOT  = ROOT / "data" / "Rankings"

OUT_MASTER     = ROOT / "data" / "Rankings" / "master_list_with_ranks.csv"
UNMATCHED_CSV  = ROOT / "data" / "Rankings" / "master_list_unranked_players.csv"

POSITIONS = ["WR", "RB", "TE", "QB"]


# ========= HELPERS =========

def normalize_name(name: str) -> str:
    if pd.isna(name):
        return ""
    s = str(name).strip().lower()
    s = re.sub(r"[^a-z0-9\s]+", " ", s)
    s = re.sub(r"\s+", " ", s).strip()
    return s

def find_pos_col(cols):
    for c in cols:
        if c.lower() in ["pos", "position"]:
            return c
    return None

def find_name_col(cols):
    for c in cols:
        if c.lower() in ["player_name", "player", "name", "player name"]:
            return c
    return None

def find_rank_col(cols):
    for c in cols:
        if c.lower() in ["rank", "ranking", "overall_rank"]:
            return c
    return None


# ========= LOAD MASTER LIST =========
print("Loading master list:", MASTER_PATH)
master = pd.read_csv(MASTER_PATH)

pos_col = find_pos_col(master.columns)
player_col = find_name_col(master.columns)

if pos_col is None:
    raise ValueError("No pos column found on master list.")
if player_col is None:
    raise ValueError("No player-name column found on master list.")

master["rank"] = pd.NA   # ensure rank column exists


# ========= PROCESS EACH POSITION =========
for pos in POSITIONS:
    pos_dir = RANKINGS_ROOT / pos
    print(f"\n===== Processing position: {pos} =====")

    rank_dfs = []
    if pos_dir.is_file() and pos_dir.suffix == ".csv":
        rank_dfs.append(pd.read_csv(pos_dir))
    elif pos_dir.is_dir():
        for p in sorted(pos_dir.glob("*.csv")):
            print(f"  Loaded rankings file: {p.name}")
            rank_dfs.append(pd.read_csv(p))

    if not rank_dfs:
        print(f"[WARN] No ranking files found for {pos}")
        continue

    rankings = pd.concat(rank_dfs, ignore_index=True)

    r_name_col = find_name_col(rankings.columns)
    r_rank_col = find_rank_col(rankings.columns)

    if r_name_col is None or r_rank_col is None:
        print(f"[WARN] Ranking file missing required columns for {pos}")
        continue

    rankings["player_key"] = rankings[r_name_col].map(normalize_name)

    # Filter master by position
    mask = master[pos_col].astype(str).str.upper() == pos
    master.loc[mask, "player_key"] = master.loc[mask, player_col].map(normalize_name)

    merged_rank = master.loc[mask, ["player_key"]].merge(
        rankings[["player_key", r_rank_col]],
        on="player_key",
        how="left"
    )

    master.loc[mask, "rank"] = merged_rank[r_rank_col].values

    print(f"[INFO] Assigned {merged_rank[r_rank_col].notna().sum()} ranks for {pos}")
    print(f"[INFO] {merged_rank[r_rank_col].isna().sum()} players missing ranks for {pos}")


# ========= REMOVE TEMP KEYS =========
master = master.drop(columns=["player_key"], errors="ignore")


# ========= IDENTIFY UNRANKED PLAYERS =========
unmatched = master[master["rank"].isna()][[player_col, pos_col]]
unmatched.to_csv(UNMATCHED_CSV, index=False)

print("\n===== SUMMARY =====")
print("Players total           :", len(master))
print("Players with ranks      :", len(master) - len(unmatched))
print("Players WITHOUT ranks   :", len(unmatched))
print("Unranked list saved to  :", UNMATCHED_CSV)


# ========= DROP ALL UNRANKED PLAYERS =========
master_ranked = master[master["rank"].notna()].copy()

# Put rank right after pos
cols = list(master_ranked.columns)
cols.remove("rank")
insert_idx = cols.index(pos_col) + 1
cols.insert(insert_idx, "rank")
master_ranked = master_ranked[cols]


# ========= SAVE RANKED-ONLY MASTER LIST =========
OUT_MASTER.parent.mkdir(parents=True, exist_ok=True)
master_ranked.to_csv(OUT_MASTER, index=False)

print("\n[INFO] Final ranked-only master list saved →", OUT_MASTER)
print("Final row count:", len(master_ranked))


Loading master list: /Users/chasesiegel/Desktop/Comp_Sci/Capstone/Dynasty/data/processed/master_list.csv

===== Processing position: WR =====
  Loaded rankings file: wr_ranks.csv
[INFO] Assigned 332 ranks for WR
[INFO] 374 players missing ranks for WR

===== Processing position: RB =====
  Loaded rankings file: rb_ranks.csv
[INFO] Assigned 212 ranks for RB
[INFO] 281 players missing ranks for RB

===== Processing position: TE =====
  Loaded rankings file: te_ranks.csv
[INFO] Assigned 144 ranks for TE
[INFO] 175 players missing ranks for TE

===== Processing position: QB =====
  Loaded rankings file: qb_ranks.csv
[INFO] Assigned 118 ranks for QB
[INFO] 149 players missing ranks for QB

===== SUMMARY =====
Players total           : 1785
Players with ranks      : 806
Players WITHOUT ranks   : 979
Unranked list saved to  : /Users/chasesiegel/Desktop/Comp_Sci/Capstone/Dynasty/data/Rankings/master_list_unranked_players.csv

[INFO] Final ranked-only master list saved → /Users/chasesiegel/Desk

In [3]:
import pandas as pd
from pathlib import Path

# ========= CONFIG =========
ROOT = Path("/Users/chasesiegel/Desktop/Comp_Sci/Capstone/Dynasty")

MASTER_WITH_RANKS = ROOT / "data" / "Rankings" / "master_list_with_ranks.csv"
OUT_DIR           = ROOT / "data" / "Rankings" / "ranked_by_position"

POSITIONS = ["WR", "RB", "TE", "QB"]

print("Loading ranked master list:", MASTER_WITH_RANKS)
df = pd.read_csv(MASTER_WITH_RANKS)

# Ensure output directory exists
OUT_DIR.mkdir(parents=True, exist_ok=True)

# Detect the pos column
pos_col = None
for c in df.columns:
    if c.lower() in ["pos", "position"]:
        pos_col = c
        break

if pos_col is None:
    raise ValueError("Could not find 'pos' column in master_list_with_ranks.csv")

print("Detected position column:", pos_col)

# ========= SPLIT AND SAVE =========
for pos in POSITIONS:
    mask = df[pos_col].astype(str).str.upper() == pos
    df_pos = df.loc[mask].copy()

    if df_pos.empty:
        print(f"[WARN] No players for position {pos}")
        continue

    out_path = OUT_DIR / f"master_list_with_ranks_{pos}.csv"
    df_pos.to_csv(out_path, index=False)
    print(f"[INFO] Saved {len(df_pos)} rows → {out_path}")

print("\n[DONE] All position-specific ranked lists created.")


Loading ranked master list: /Users/chasesiegel/Desktop/Comp_Sci/Capstone/Dynasty/data/Rankings/master_list_with_ranks.csv
Detected position column: pos
[INFO] Saved 332 rows → /Users/chasesiegel/Desktop/Comp_Sci/Capstone/Dynasty/data/Rankings/ranked_by_position/master_list_with_ranks_WR.csv
[INFO] Saved 212 rows → /Users/chasesiegel/Desktop/Comp_Sci/Capstone/Dynasty/data/Rankings/ranked_by_position/master_list_with_ranks_RB.csv
[INFO] Saved 144 rows → /Users/chasesiegel/Desktop/Comp_Sci/Capstone/Dynasty/data/Rankings/ranked_by_position/master_list_with_ranks_TE.csv
[INFO] Saved 118 rows → /Users/chasesiegel/Desktop/Comp_Sci/Capstone/Dynasty/data/Rankings/ranked_by_position/master_list_with_ranks_QB.csv

[DONE] All position-specific ranked lists created.
