In [10]:
%%writefile ../pyproject.toml
[build-system]
requires = ["setuptools>=70", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "data_science"
version = "0.0.1"
description = "General data science/ML environment"
authors = [{ name = "Geoffrey Hadfield" }]
requires-python = ">=3.10,<3.12"   # stay on 3.10/3.11; 3.12 still shaky for some wheels

dependencies = [
  "numpy>=1.26",              # keep modern NumPy
  "pandas>=2.2",
  "scikit-learn>=1.5",
  "joblib",
  "matplotlib",
  "seaborn",
  "jupyterlab<5.0",
  "ipykernel<6.30",
  "dash",
  "dash-bootstrap-components",
  "plotly",
  "opencv-python-headless",
  "pillow",
  "tqdm",
  "statsmodels",
  "streamlit",
  "xgboost",
  "lightgbm",
  "requests",
  "IPython",
  "tabulate",
  "pyarrow",
  "requests-cache",
  "diskcache",
  "unidecode",
  "cpi>=2.0.0",
  "lxml",
  # ---- Explainability stack ----
  "shap>=0.46.0",             # supports NumPy 2, so fine with 1.26+
  "numba>=0.58.1,<0.61",      # 0.58.1 adds NumPy 1.26 support; 0.60 adds NumPy2
  # llvmlite will be pulled transitively with the correct version
  # ---- NBA tooling ----
  "nba_api<=1.4.1",
  "beautifulsoup4",
]

[project.optional-dependencies]
spark = [
  "pyspark",
  "install-jdk>=1.1.0",
]
dev = [
  "pytest",
  "black",
  "flake8",
  "mypy",
]

[tool.black]
line-length = 88
target-version = ["py310"]

[tool.flake8]
max-line-length = 88
extend-ignore = ["E203"]

[tool.mypy]
python_version = "3.10"
ignore_missing_imports = true
strict_optional = true

[tool.setuptools.packages.find]
where = ["src"]



Overwriting ../pyproject.toml


In [11]:
%%writefile ../src/salary_nba_data_pull/__init__.py
"""
NBA Data Pull Package

A comprehensive package for fetching, processing, and analyzing NBA player data
including salaries, statistics, and advanced metrics.
"""

__version__ = "0.1.0"
__all__ = [
    "main",
    "fetch_utils", 
    "process_utils",
    "scrape_utils",
    "data_utils",
    "settings",
    "notebook_helper"
] 

Overwriting ../src/salary_nba_data_pull/__init__.py


In [12]:
%%writefile ../src/salary_nba_data_pull/settings.py
# src/salary_nba_data_pull/settings.py
from pathlib import Path
import os
import typing as _t

# 🗂️  Central data directory (override via env if needed)
DATA_PROCESSED_DIR = Path(
    (Path(__file__).resolve().parent.parent.parent)  # project root
    / "data"
    / "new_processed"
)

# optional: allow `DATA_PROCESSED_DIR=/tmp/demo python main.py …`
ENV_OVERRIDE: _t.Optional[str] = os.getenv("DATA_PROCESSED_DIR")
if ENV_OVERRIDE:
    DATA_PROCESSED_DIR = Path(ENV_OVERRIDE).expanduser().resolve()

# Legacy path for backward compatibility
LEGACY_DATA_PROCESSED_DIR = Path(
    (Path(__file__).resolve().parent.parent.parent)  # project root
    / "data"
    / "processed"
) 

Overwriting ../src/salary_nba_data_pull/settings.py


In [13]:
%%writefile ../src/salary_nba_data_pull/fetch_utils.py
import threading
import time
import random
import logging
import os
from concurrent.futures import ThreadPoolExecutor
from functools import lru_cache, wraps
from http import HTTPStatus
from typing import Callable
import requests
from nba_api.stats.endpoints import commonallplayers, commonplayerinfo, playercareerstats, leaguestandings
from requests.exceptions import RequestException
from json.decoder import JSONDecodeError
from joblib import Memory
from unidecode import unidecode
from tenacity import (
    retry, retry_if_exception, wait_random_exponential,
    stop_after_attempt, before_log
)

REQUESTS_PER_MIN = 8   # ↓ a bit safer for long pulls (NBA suggests ≤10)
_SEM = threading.BoundedSemaphore(REQUESTS_PER_MIN)

# Set up joblib memory for caching API responses
cache_dir = os.path.join(os.path.dirname(__file__), '../../data/cache/nba_api')
memory = Memory(cache_dir, verbose=0)

def _throttle():
    """Global semaphore + sleep to stay under REQUESTS_PER_MIN."""
    _SEM.acquire()
    time.sleep(60 / REQUESTS_PER_MIN)
    _SEM.release()

def _needs_retry(exc: Exception) -> bool:
    """Return True if we should retry."""
    if isinstance(exc, requests.HTTPError) and exc.response is not None:
        code = exc.response.status_code
        if code in (HTTPStatus.TOO_MANY_REQUESTS, HTTPStatus.SERVICE_UNAVAILABLE):
            return True
    return isinstance(exc, (requests.ConnectionError, requests.Timeout))

def _respect_retry_after(resp: requests.Response):
    """Sleep for server‑suggested time if header present."""
    if resp is not None and 'Retry-After' in resp.headers:
        try:
            sleep = int(resp.headers['Retry-After'])
            logging.warning("↺ server asked to wait %ss", sleep)
            time.sleep(sleep)
        except ValueError:
            pass   # header unparsable, ignore

def _make_retry(fn: Callable) -> Callable:
    """Decorator to add tenacity retry with jitter + respect Retry-After."""
    @retry(
        retry=retry_if_exception(_needs_retry),
        wait=wait_random_exponential(multiplier=2, max=60),
        stop=stop_after_attempt(5),
        before_sleep=before_log(logging.getLogger(__name__), logging.WARNING),
        reraise=True,
    )
    @wraps(fn)
    def _wrapper(*args, **kwargs):
        try:
            return fn(*args, **kwargs)
        except requests.HTTPError as exc:
            _respect_retry_after(exc.response)
            raise
    return _wrapper

@memory.cache
@_make_retry
def fetch_with_retry(endpoint, *, timeout=90, debug=False, **kwargs):
    """
    Thread‑safe, rate‑limited, cached NBA‑Stats call with adaptive back‑off.
    """
    _throttle()
    start = time.perf_counter()
    resp = endpoint(timeout=timeout, **kwargs)
    df = resp.get_data_frames()[0]
    if debug:
        logging.debug("✓ %s in %.1fs %s", endpoint.__name__,
                      time.perf_counter() - start, kwargs)
    return df

@memory.cache
def fetch_all_players(season: str, debug: bool = False) -> dict[str, dict]:
    """Return {clean_name: {'player_id':…, 'team_id':…}} for *active* roster."""
    roster_df = fetch_with_retry(
        commonallplayers.CommonAllPlayers,
        season=season,
        is_only_current_season=1,        # <‑‑ key fix
        league_id="00",
        debug=debug,
    )
    players: dict[str, dict] = {}
    if roster_df is not None:
        for _, row in roster_df.iterrows():
            clean = unidecode(row["DISPLAY_FIRST_LAST"]).strip().lower()
            players[clean] = {
                "player_id": int(row["PERSON_ID"]),
                "team_id": int(row["TEAM_ID"]),
            }
    if debug:
        print(f"[fetch_all_players] {len(players)} active players for {season}")
    return players

@lru_cache(maxsize=None)
def fetch_season_players(season: str, debug: bool = False) -> dict[str, dict]:
    """
    Return {clean_name: {'player_id':…, 'team_id':…}} for *everyone who was
    on a roster at any time during the given season*.
    """
    # call once for the whole database (not "current‑season only")
    df = fetch_with_retry(
        commonallplayers.CommonAllPlayers,
        season=season,
        is_only_current_season=0,         # <-- key change
        league_id="00",
        debug=debug,
    )
    players: dict[str, dict] = {}
    if df is not None:
        yr = int(season[:4])
        # keep rows whose career window encloses this season
        df = df[(df.FROM_YEAR.astype(int) <= yr) & (df.TO_YEAR.astype(int) >= yr)]
        for _, row in df.iterrows():
            clean = unidecode(row["DISPLAY_FIRST_LAST"]).strip().lower()
            players[clean] = {
                "player_id": int(row["PERSON_ID"]),
                "team_id": int(row["TEAM_ID"]),
            }

    if debug:
        print(f"[fetch_season_players] {len(players)} players for {season}")
    return players

@memory.cache
def fetch_player_info(player_id, debug=False):
    return fetch_with_retry(commonplayerinfo.CommonPlayerInfo, player_id=player_id, debug=debug)

@memory.cache
def fetch_career_stats(player_id, debug=False):
    return fetch_with_retry(playercareerstats.PlayerCareerStats, player_id=player_id, debug=debug)

@memory.cache
def fetch_league_standings(season, debug=False):
    return fetch_with_retry(leaguestandings.LeagueStandings, season=season, debug=debug)

def clear_cache():
    """Clear the joblib memory cache."""
    memory.clear()

if __name__ == "__main__":
    # Example usage
    debug = True
    season = "2022-23"
    sample_player_name = "LeBron James"

    # Fetch all players
    all_players = fetch_all_players(season, debug=debug)
    print(f"Total players fetched: {len(all_players)}")

    # Fetch player info for a sample player
    if sample_player_name.lower() in all_players:
        sample_player_id = all_players[sample_player_name.lower()]['player_id']
        player_info = fetch_player_info(sample_player_id, debug=debug)
        print(f"Sample player info for {sample_player_name}:")
        print(player_info)

        # Fetch career stats for the sample player
        career_stats = fetch_career_stats(sample_player_id, debug=debug)
        print(f"Sample player career stats for {sample_player_name}:")
        print(career_stats)
    else:
        print(f"Player {sample_player_name} not found in the {season} season data.")

    # Fetch league standings
    standings = fetch_league_standings(season, debug=debug)
    print("League standings:")
    print(standings)


Overwriting ../src/salary_nba_data_pull/fetch_utils.py


In [14]:
%%writefile ../src/salary_nba_data_pull/scrape_utils.py
import pandas as pd
import requests
import time
import random
import re
from bs4 import BeautifulSoup
from io import StringIO
from typing import Optional
import os
import requests_cache
from unidecode import unidecode
from pathlib import Path
from datetime import datetime
from salary_nba_data_pull.settings import DATA_PROCESSED_DIR
from functools import lru_cache
import threading
_ADV_LOCK   = threading.Lock()
_ADV_CACHE: dict[str, pd.DataFrame] = {}   # season -> DataFrame

# Install cache for all requests
requests_cache.install_cache('nba_scraping', expire_after=86400)  # 24 hours

# Create cached session with stale-if-error capability
session = requests_cache.CachedSession(
    'nba_scraping',
    expire_after=86400,
    stale_if_error=True       # <-- NEW: serve expired cache if remote 429s
)

def scrape_salary_cap_history(*, debug: bool = False) -> pd.DataFrame | None:
    """
    Robust pull of historical cap / tax / apron lines.

    Strategy:
    1. Try RealGM (live HTML).
    2. If the selector fails, look for an existing CSV in DATA_PROCESSED_DIR.
    3. As a last‑chance fallback, hit NBA.com / Reuters bulletins for the
       current season only (so we still merge *something*).
    """
    import json
    from salary_nba_data_pull.settings import DATA_PROCESSED_DIR

    url = "https://basketball.realgm.com/nba/info/salary_cap"

    try:
        html = requests.get(url, timeout=30).text
        soup = BeautifulSoup(html, "html.parser")

        # -------- 1️⃣  RealGM table (new markup) --------------------
        blk = soup.find("pre")                      # new 2025 layout
        if blk:                                     # parse fixed‑width block
            rows = [r.strip().split() for r in blk.text.strip().splitlines()]
            header = rows[0]
            data = rows[1:]
            df = pd.DataFrame(data, columns=header)
        else:
            # Legacy table path (kept for safety)
            tbl = soup.select_one("table")
            if not tbl:
                raise ValueError("salary_cap table not found")
            df = pd.read_html(str(tbl))[0]

        # ---- normalise ----
        df["Season"] = df["Season"].str.extract(r"(\d{4}-\d{4})")
        money_cols = [c for c in df.columns if c != "Season"]
        for c in money_cols:
            df[c] = (
                df[c]
                .astype(str)
                .str.replace(r"[$,]", "", regex=True)
                .replace("", pd.NA)
                .astype(float)
            )

        if debug:
            print(f"[salary‑cap] scraped {len(df)} rows from RealGM")

        return df

    except Exception as exc:
        if debug:
            print(f"[salary‑cap] primary scrape failed → {exc!s}")

        # -------- 2️⃣  local cached CSV ----------------------------
        fallback = DATA_PROCESSED_DIR / "salary_cap_history_inflated.csv"
        if fallback.exists():
            if debug:
                print(f"[salary‑cap] using cached CSV at {fallback}")
            return pd.read_csv(fallback)

        # -------- 3️⃣  NBA.com / Reuters one‑liner -----------------
        try:
            # Latest season only
            # For now, create a minimal fallback with current season data
            year = datetime.now().year
            cap = 140.588  # 2024-25 cap as fallback
            df = pd.DataFrame(
                {"Season": [f"{year}-{str(year+1)[-2:]}"],
                 "Salary Cap": [cap * 1_000_000]}
            )
            if debug:
                print("[salary‑cap] built minimal one‑row DataFrame "
                      "from fallback values")
            return df
        except Exception:
            pass

    if debug:
        print("[salary‑cap] giving up – no data available")
    return None

# User-Agent header to avoid Cloudflare blocks
UA = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/126.0.0.0 Safari/537.36"
    )
}
DELAY_BETWEEN_REQUESTS = 3  # seconds

# Define column templates to guarantee DataFrame structure
PLAYER_COLS = ["Player", "Salary", "Season"]
TEAM_COLS = ["Team", "Team_Salary", "Season"]

# Salary parsing pattern
_salary_pat = re.compile(r"\$?\d[\d,]*")

def _clean_salary(text: str) -> int | None:
    """Return salary as int or None when text has no digits."""
    m = _salary_pat.search(text)
    return int(m.group(0).replace(",", "").replace("$", "")) if m else None

# Name normalization pattern with unidecode
def _normalise_name(raw: str) -> str:
    """ASCII‑fold, trim, lower."""
    return unidecode(raw).split(",")[0].split("(")[0].strip().lower()


# ------- INTERNAL HELPER --------
def _get_hoopshype_soup(url: str, debug: bool = False) -> Optional[BeautifulSoup]:
    """
    Hit HoopsHype once with a realistic UA.  
    Return BeautifulSoup if the page looks OK, else None.
    """
    for attempt in range(2):
        try:
            if debug:
                print(f"[fetch] {url} (attempt {attempt+1})")
            resp = requests.get(url, headers=UA, timeout=30)
            if resp.status_code != 200:
                if debug:
                    print(f"  -> HTTP {resp.status_code}, skipping.")
                return None
            html = resp.text
            # crude Cloudflare challenge check
            if ("Access denied" in html) or ("cf-chl" in html):
                if debug:
                    print("  -> Cloudflare challenge detected; giving up.")
                return None
            return BeautifulSoup(html, "html.parser")
        except requests.RequestException as e:
            if debug:
                print(f"  -> network error {e}, retrying…")
            time.sleep(2 ** attempt + random.random())
    return None
# --------------------------------------------------------------------------


def _scrape_espn_player_salaries(season_start: int, debug: bool = False) -> list[dict]:
    """ESPN fallback: return list of dicts with clean player names."""
    rows, year = [], season_start + 1
    for page in range(1, 30):
        url = f"https://www.espn.com/nba/salaries/_/year/{year}/page/{page}"
        soup = _get_hoopshype_soup(url, debug)
        if soup is None:
            break
        tbl = soup.find("table")
        if not tbl or len(tbl.find_all("tr")) < 3:
            break
        for tr in tbl.find_all("tr")[1:]:
            tds = tr.find_all("td")
            if len(tds) < 4:
                continue
            salary_int = _clean_salary(tds[3].get_text(strip=True))
            if salary_int is None:           # header row ('SALARY', etc.)
                continue
            raw_name = tds[1].get_text(strip=True)
            rows.append({
                "Player":  _normalise_name(raw_name).title(),   # keep display case
                "Salary":  salary_int,
                "Season":  f"{season_start}-{str(season_start+1)[-2:]}"
            })
        time.sleep(0.5)
    return rows


def scrape_player_salary_data(start_season: int, end_season: int,
                              player_filter: str | None = None,
                              debug: bool = False) -> pd.DataFrame:
    """
    Pull player salaries – HoopsHype first, ESPN fallback.
    Player names are normalised so they match nba_api keys.
    """
    out: list[dict] = []

    for yr in range(start_season, end_season + 1):
        canon = f"{yr}-{str(yr+1)[-2:]}"
        for slug in [canon, f"{yr}-{yr+1}"]:          # HH dual slugs
            url = f"https://hoopshype.com/salaries/players/{slug}/"
            soup = _get_hoopshype_soup(url, debug)
            if soup is None:
                continue

            table = soup.find("table", class_="hh-salaries-ranking-table")
            if not table:
                if debug:
                    print(f"  -> salary table not found for {slug}")
                continue

            for row in table.find_all("tr")[1:]:
                tds = row.find_all("td")
                if len(tds) < 3:
                    continue
                raw_name = tds[1].get_text(strip=True)
                clean_name = _normalise_name(raw_name)
                if player_filter and player_filter.lower() != "all" \
                   and clean_name != _normalise_name(player_filter):
                    continue
                salary_int = _clean_salary(tds[2].get_text(strip=True)) or 0
                out.append({"Player": clean_name.title(),
                            "Salary": salary_int,
                            "Season": canon})
            if any(r["Season"] == canon for r in out):
                break  # success for this season

        if not any(r["Season"] == canon for r in out):
            espn_rows = _scrape_espn_player_salaries(yr, debug)
            if debug and espn_rows:
                print(f"  -> ESPN fallback added {len(espn_rows)} rows")
            out.extend(espn_rows)

        if debug:
            got = sum(r['Season'] == canon for r in out)
            print(f"  -> scraped {got} rows for {canon}")
        time.sleep(DELAY_BETWEEN_REQUESTS)

    df = pd.DataFrame(out, columns=PLAYER_COLS)
    
    # ---- NEW: log & drop duplicate Season/Player rows ----
    dup_mask = df.duplicated(subset=["Season","Player"], keep=False)
    if dup_mask.any():
        if debug:
            print("[salary_scrape] duplicate salary rows detected:")
            print(df.loc[dup_mask].sort_values(["Season","Player"]))
        df = df.drop_duplicates(subset=["Season","Player"])
        if debug:
            print(f"[salary_scrape] dropped {dup_mask.sum()} duplicate rows")

    return df
# --------------------------------------------------------------------------


def _scrape_espn_team_salaries(season: str, debug: bool = False) -> list[dict]:
    """Parse ESPN team‑salary table; skip ranking column (#)."""
    rows, year = [], int(season[:4]) + 1
    url = f"https://www.espn.com/nba/salaries/_/type/team/year/{year}"
    soup = _get_hoopshype_soup(url, debug)
    if not soup:
        return rows

    tbl = soup.find("table")
    for tr in tbl.find_all("tr")[1:]:
        tds = tr.find_all("td")
        if len(tds) < 4:          # rank | team | conf | salary
            continue
        team_name = tds[1].get_text(strip=True)   # <‑‑ skip rank col
        salary_int = _clean_salary(tds[3].get_text(strip=True))
        if salary_int is not None:
            rows.append(
                {"Team": team_name, "Team_Salary": salary_int, "Season": season}
            )
    return rows


def scrape_team_salary_data(season: str, debug: bool = False) -> pd.DataFrame:
    """
    Team payrolls for a single season (YYYY-YY or YYYY-YYYY slug resilient).
    """
    records = []
    for slug in [season, f"{season[:4]}-{int(season[:4])+1}"]:
        url = f"https://hoopshype.com/salaries/{slug}/"
        soup = _get_hoopshype_soup(url, debug)
        if soup is None:
            continue

        table = soup.find("table", class_="hh-salaries-ranking-table")
        if not table:
            if debug:
                print(f"  -> team salary table missing for {slug}")
            continue

        for row in table.find_all("tr")[1:]:
            cols = row.find_all("td")
            if len(cols) < 3:
                continue
            team = cols[1].get_text(strip=True)
            salary = int(cols[2].get_text(strip=True).replace("$", "").replace(",", ""))
            records.append({"Team": team, "Team_Salary": salary, "Season": season})
        break  # stop after first hit

    # Fallback if no records found
    if not records:
        espn_rows = _scrape_espn_team_salaries(season, debug)
        if debug and espn_rows:
            print(f"  -> ESPN team salary fallback added {len(espn_rows)} rows")
        records.extend(espn_rows)

    df = pd.DataFrame(records, columns=TEAM_COLS)  # <- guarantees columns
    if debug and not df.empty:
        print(df.head())
    return df

# --- Season‑level advanced stats --------------------------------------------
ADV_METRIC_COLS = [
    "PER", "TS%", "3PAr", "FTr", "ORB%", "DRB%", "TRB%", "AST%", "STL%", "BLK%",
    "TOV%", "USG%", "OWS", "DWS", "WS", "WS/48", "OBPM", "DBPM", "BPM", "VORP",
    "ORtg", "DRtg",  # extra goodies if you want them
]

def _season_advanced_df(season: str) -> pd.DataFrame:
    """
    Thread‑safe, memoised download of the *season‑wide* advanced‑stats table.

    The first thread to request a given season does the HTTP work while holding
    a lock; all others simply wait for the result instead of firing duplicate
    requests. The DataFrame is cached in‑process for the life of the run.
    """
    if season in _ADV_CACHE:            # fast path, no lock
        return _ADV_CACHE[season]

    with _ADV_LOCK:                     # only one thread may enter the block
        if season in _ADV_CACHE:        # double‑checked locking
            return _ADV_CACHE[season]

        end_year = int(season[:4]) + 1
        url = f"https://www.basketball-reference.com/leagues/NBA_{end_year}_advanced.html"
        print(f"[adv] fetching {url}")
        resp = session.get(url, headers=UA, timeout=30)
        resp.raise_for_status()

        df = pd.read_html(StringIO(resp.text), header=0)[0]
        df = df[df.Player != "Player"]          # drop repeated header rows
        df["player_key"] = df.Player.map(_normalise_name)

        avail = [c for c in ADV_METRIC_COLS if c in df.columns]
        if avail:
            df[avail] = df[avail].apply(pd.to_numeric, errors="coerce")

        _ADV_CACHE[season] = df                # memoise
        time.sleep(random.uniform(1.5, 2.5))   # be polite
        return df

def scrape_advanced_metrics(player_name: str,
                            season: str,
                            *,
                            debug: bool = False) -> dict:
    """
    O(1) lookup in the cached season DataFrame – zero extra HTTP traffic.
    """
    df = _season_advanced_df(season)
    key = _normalise_name(player_name)
    row = df.loc[df.player_key == key]
    if row.empty:
        if debug:
            print(f"[adv] no advanced stats for {player_name} in {season}")
        return {}

    row = row.iloc[0]
    # Only return columns that actually exist in the DataFrame
    available_cols = [col for col in ADV_METRIC_COLS if col in row.index]
    result = {col: row[col] for col in available_cols}
    if debug:
        print(f"[adv] {player_name} → {result}")
    return result
# --- End of new season-level advanced stats ---------------------------------

def load_injury_data(
    file_path: str | Path | None = None,
    *,
    base_dir: str | Path | None = None,
    debug: bool = False,
):
    """
    Load the historical injury CSV. By default we look inside the *new*
    processed folder; pass ``file_path`` to override a specific file,
    or ``base_dir`` to point at a different processed directory.
    """
    root = Path(base_dir) if base_dir else DATA_PROCESSED_DIR
    if file_path is None:
        file_path = root / "NBA Player Injury Stats(1951 - 2023).csv"
    file_path = Path(file_path).expanduser().resolve()

    try:
        injury = (
            pd.read_csv(file_path)
            .assign(Date=lambda d: pd.to_datetime(d["Date"]))
        )
        injury["Season"] = injury["Date"].apply(
            lambda x: (
                f"{x.year}-{str(x.year + 1)[-2:]}"
                if x.month >= 10
                else f"{x.year - 1}-{str(x.year)[-2:]}"
            )
        )
        if debug:
            print(f"[load_injury_data] loaded {len(injury):,} rows from {file_path}")
        return injury
    except FileNotFoundError:
        if debug:
            print(f"[load_injury_data] ✖ no injury file at {file_path}")
        return None

if __name__ == "__main__":
    # Example usage and testing of all functions
    debug = True
    start_season = 2022
    end_season = 2023
    sample_player = "Ja Morant"  # Example player

    print("1. Testing scrape_salary_cap_history:")
    salary_cap_history = scrape_salary_cap_history(debug=debug)

    print("\n2. Testing scrape_player_salary_data:")
    player_salary_data = scrape_player_salary_data(start_season, end_season, player_filter=sample_player, debug=debug)

    print("\n3. Testing scrape_team_salary_data:")
    team_salary_data = scrape_team_salary_data(f"{start_season}-{str(start_season+1)[-2:]}", debug=debug)

    print("\n4. Testing scrape_advanced_metrics:")
    advanced_metrics = scrape_advanced_metrics(sample_player, f"{start_season}-{str(start_season+1)[-2:]}", debug=debug)
    print(f"Advanced Metrics for {sample_player}:")
    print(advanced_metrics)

    print("\n5. Testing load_injury_data and merge_injury_data:")
    injury_data = load_injury_data()
    if injury_data is not None:
        print(injury_data.head())
    else:
        print("No injury data loaded.")
    if not player_salary_data.empty and injury_data is not None:
        from salary_nba_data_pull.process_utils import merge_injury_data
        merged_data = merge_injury_data(player_salary_data, injury_data)
        print("Merged data with injury info:")
        columns_to_display = ['Player', 'Season', 'Salary']
        if 'Injured' in merged_data.columns:
            columns_to_display.append('Injured')
        if 'Injury_Periods' in merged_data.columns:
            columns_to_display.append('Injury_Periods')
        if 'Total_Days_Injured' in merged_data.columns:
            columns_to_display.append('Total_Days_Injured')
        if 'Injury_Risk' in merged_data.columns:
            columns_to_display.append('Injury_Risk')
        print(merged_data[columns_to_display].head())

    if not player_salary_data.empty:
        avg_salary = player_salary_data['Salary'].mean()
        print(f"Average salary for {sample_player} from {start_season} to {end_season}: ${avg_salary:,.2f}")

    if not team_salary_data.empty:
        highest_team_salary = team_salary_data.loc[team_salary_data['Team_Salary'].idxmax()]
        print(f"Team with highest salary in {start_season}-{end_season}: {highest_team_salary['Team']} (${highest_team_salary['Team_Salary']:,.2f})")

    if not injury_data.empty:
        injury_count = injury_data['Relinquished'].str.contains(sample_player, case=False).sum()
        print(f"Number of injuries/illnesses for {sample_player} from {start_season} to {end_season}: {injury_count}")

    print("\nAll tests completed.")


Overwriting ../src/salary_nba_data_pull/scrape_utils.py


In [15]:
%%writefile ../src/salary_nba_data_pull/process_utils.py
import pandas as pd
import numpy as np
import logging
import sqlite3
from datetime import datetime
from functools import lru_cache
from salary_nba_data_pull.fetch_utils import fetch_all_players, fetch_career_stats, fetch_player_info, fetch_league_standings
from salary_nba_data_pull.scrape_utils import scrape_advanced_metrics

# --- CPI lazy‑loader --------------------------------------------------
_CPI_AVAILABLE = False  # toggled at runtime

@lru_cache(maxsize=1)
def _ensure_cpi_ready(debug: bool = False) -> bool:
    """
    Import `cpi` lazily and guarantee its internal SQLite DB is usable.
    Returns True when inflation data are available, False otherwise.
    """
    global _CPI_AVAILABLE
    try:
        import importlib
        cpi = importlib.import_module("cpi")        # late import
        try:
            _ = cpi.models.Series.get_by_id("0000")  # 1‑row sanity query
            _CPI_AVAILABLE = True
            return True
        except sqlite3.OperationalError:
            if debug:
                logging.warning("[CPI] DB invalid – rebuilding from BLS…")
            cpi.update(rebuild=True)                # expensive network call
            _CPI_AVAILABLE = True
            return True
    except ModuleNotFoundError:
        if debug:
            logging.warning("[CPI] package not installed")
    except Exception as e:
        if debug:
            logging.error("[CPI] unexpected CPI failure: %s", e)
    return False
# ---------------------------------------------------------------------

def inflate_value(value: float, year_str: str,
                  *, debug: bool = False, skip_inflation: bool = False) -> float:
    """
    Inflate `value` from the dollars of `year_str` (YYYY or YYYY‑YY) to 2022 USD.
    If CPI data are unavailable or the user opts out, return the original value.
    """
    if skip_inflation or not _ensure_cpi_ready(debug):
        return value
    try:
        import cpi                                       # safe: DB ready
        year = int(year_str[:4])
        if year >= datetime.now().year:
            return value
        return float(cpi.inflate(value, year, to=2022))
    except Exception as e:
        if debug:
            logging.error("[CPI] inflate failed for %s: %s", year_str, e)
        return value
# ---------------------------------------------------------------------

def calculate_percentages(df, debug=False):
    """
    Calculate shooting percentages and other derived statistics.
    """
    if df.empty:
        return df

    # Calculate shooting percentages
    if 'FGA' in df.columns and 'FG' in df.columns:
        df['FG%'] = (df['FG'] / df['FGA'] * 100).round(2)
        df['FG%'] = df['FG%'].replace([np.inf, -np.inf], np.nan)

    if '3PA' in df.columns and '3P' in df.columns:
        df['3P%'] = (df['3P'] / df['3PA'] * 100).round(2)
        df['3P%'] = df['3P%'].replace([np.inf, -np.inf], np.nan)

    if 'FTA' in df.columns and 'FT' in df.columns:
        df['FT%'] = (df['FT'] / df['FTA'] * 100).round(2)
        df['FT%'] = df['FT%'].replace([np.inf, -np.inf], np.nan)

    # Calculate efficiency metrics
    if 'PTS' in df.columns and 'FGA' in df.columns and 'FTA' in df.columns:
        df['TS%'] = (df['PTS'] / (2 * (df['FGA'] + 0.44 * df['FTA'])) * 100).round(2)
        df['TS%'] = df['TS%'].replace([np.inf, -np.inf], np.nan)

    if 'PTS' in df.columns and 'MP' in df.columns:
        df['PTS_per_36'] = (df['PTS'] / df['MP'] * 36).round(2)
        df['PTS_per_36'] = df['PTS_per_36'].replace([np.inf, -np.inf], np.nan)

    if 'AST' in df.columns and 'MP' in df.columns:
        df['AST_per_36'] = (df['AST'] / df['MP'] * 36).round(2)
        df['AST_per_36'] = df['AST_per_36'].replace([np.inf, -np.inf], np.nan)

    if 'TRB' in df.columns and 'MP' in df.columns:
        df['TRB_per_36'] = (df['TRB'] / df['MP'] * 36).round(2)
        df['TRB_per_36'] = df['TRB_per_36'].replace([np.inf, -np.inf], np.nan)

    if debug:
        print("Percentage calculations completed")

    return df

def process_player_data(player_name: str, season: str,
                        all_players: dict[str, dict], *,
                        debug: bool = False) -> dict | None:
    """
    Build a single‑player dict **including Games Started (GS)** and keep the
    schema aligned with dataset 1.
    """
    meta = all_players.get(player_name.lower().strip())
    if not meta:
        return None

    pid = meta["player_id"]
    info_df   = fetch_player_info(pid, debug=debug)
    career_df = fetch_career_stats(pid, debug=debug)
    if career_df is None or career_df.empty:
        return None

    season_row = career_df.loc[career_df.SEASON_ID.eq(season)]
    if season_row.empty:
        return None
    season_row = season_row.iloc[0]

    data = {
        # ---------- BASIC ------------
        "Player": player_name,
        "Season": season,
        "Team":   season_row["TEAM_ABBREVIATION"],
        "Age":    season_row["PLAYER_AGE"],
        "GP":     season_row["GP"],
        "GS":     season_row.get("GS", 0),        # <-- NEW
        "MP":     season_row["MIN"],
        # ---------- SCORING ----------
        "PTS": season_row["PTS"],
        "FG":  season_row["FGM"],  "FGA": season_row["FGA"],
        "3P":  season_row["FG3M"], "3PA": season_row["FG3A"],
        "FT":  season_row["FTM"],  "FTA": season_row["FTA"],
        # ---------- OTHER ------------
        "TRB": season_row["REB"], "AST": season_row["AST"],
        "STL": season_row["STL"], "BLK": season_row["BLK"],
        "TOV": season_row["TOV"], "PF":  season_row["PF"],
    }

    # roster meta
    if info_df is not None and not info_df.empty:
        ir = info_df.iloc[0]
        data["Position"]          = ir.get("POSITION", "")
        data["TeamID"]            = ir.get("TEAM_ID", None)
        data["Years_of_Service"]  = ir.get("SEASON_EXP", None)
    else:
        data["TeamID"] = meta.get("team_id")

    # ---------- Derived shooting splits ----------
    two_att          = data["FGA"] - data["3PA"]
    data["2P"]       = data["FG"] - data["3P"]
    data["2PA"]      = two_att
    data["eFG%"]     = round((data["FG"] + 0.5 * data["3P"]) / data["FGA"] * 100 ,2) if data["FGA"] else None
    data["2P%"]      = round(data["2P"] / two_att * 100 ,2)                           if two_att else None

    # ---------- Advanced metrics ----------
    try:
        data.update(scrape_advanced_metrics(player_name, season, debug=debug))
    except Exception as exc:
        if debug:
            logging.warning("%s advanced scrape failed: %s", player_name, exc)

    return data

def merge_injury_data(player_data: pd.DataFrame,
                      injury_data: pd.DataFrame | None) -> pd.DataFrame:
    """
    Attach four injury‑related columns. If a player has no injuries, leave the fields as NA
    (pd.NA) instead of empty strings so repeated runs compare equal.
    """
    import pandas as pd

    if player_data.empty:
        return player_data

    out = player_data.copy()

    # Ensure columns exist with NA defaults
    defaults = {
        "Injured": False,
        "Injury_Periods": pd.NA,
        "Total_Days_Injured": 0,
        "Injury_Risk": "Low Risk",
    }
    for c, v in defaults.items():
        if c not in out.columns:
            out[c] = v

    if injury_data is None or injury_data.empty:
        # normalize empties just in case
        out["Injury_Periods"] = out["Injury_Periods"].replace("", pd.NA)
        return out

    # Process each player/season
    for idx, row in out.iterrows():
        pname = row["Player"]
        season = row["Season"]

        mask = (injury_data["Season"] == season) & \
               (injury_data["Relinquished"].str.contains(pname, case=False, na=False))
        player_inj = injury_data.loc[mask]

        if player_inj.empty:
            continue  # keep defaults

        periods = []
        total_days = 0
        for _, inj in player_inj.iterrows():
            start = inj["Date"]
            # find the first acquired record after start
            got_back = injury_data[
                (injury_data["Date"] > start) &
                (injury_data["Acquired"].str.contains(pname, case=False, na=False))
            ]
            if not got_back.empty:
                end = got_back.iloc[0]["Date"]
            else:
                end_year = int(season.split("-")[1])
                end = pd.Timestamp(f"{end_year}-06-30")

            total_days += (end - start).days
            periods.append(f"{start:%Y-%m-%d} - {end:%Y-%m-%d}")

        out.at[idx, "Injured"] = True
        out.at[idx, "Injury_Periods"] = "; ".join(periods) if periods else pd.NA
        out.at[idx, "Total_Days_Injured"] = total_days

        if total_days < 10:
            risk = "Low Risk"
        elif total_days <= 20:
            risk = "Moderate Risk"
        else:
            risk = "High Risk"
        out.at[idx, "Injury_Risk"] = risk

    # final normalization
    out["Injury_Periods"] = out["Injury_Periods"].replace("", pd.NA)

    return out



Overwriting ../src/salary_nba_data_pull/process_utils.py


In [16]:
%%writefile ../src/salary_nba_data_pull/data_utils.py

import pandas as pd
import numpy as np
from pathlib import Path
from salary_nba_data_pull.process_utils import (
    inflate_value
)
from salary_nba_data_pull.quality import (
    ExpectedSchema, audit_dataframe, write_audit_reports
)
from salary_nba_data_pull.settings import DATA_PROCESSED_DIR

PRESERVE_EVEN_IF_ALL_NA = {
    "3P%", "Injured", "Injury_Periods", "Total_Days_Injured", "Injury_Risk"
}

# --- NEW helper ------------------------------------------------------
def load_salary_cap_csv(path: str | Path, *, debug: bool = False) -> pd.DataFrame:
    """
    Load the preprocessed salary cap CSV (inflated) instead of scraping.
    We DO NOT fill or coerce silently – if a required column is missing,
    we log it and let the caller decide.
    """
    path = Path(path).expanduser().resolve()
    if debug:
        print(f"[salary-cap] loading local file: {path}")
    df = pd.read_csv(path)
    if debug:
        print(f"[salary-cap] rows={len(df)}, cols={df.columns.tolist()}")
    return df

def clean_dataframe(df):
    # Remove unnamed columns
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

    # Remove duplicate columns
    df = df.loc[:, ~df.columns.duplicated()]

    # Remove columns with all NaN values **except** ones we want to keep
    all_na = df.columns[df.isna().all()]
    to_drop = [c for c in all_na if c not in PRESERVE_EVEN_IF_ALL_NA]
    df = df.drop(columns=to_drop)

    # Remove rows with all NaN values
    df = df.dropna(axis=0, how='all')

    # Ensure only one 'Season' column exists
    season_columns = [col for col in df.columns if 'Season' in col]
    if len(season_columns) > 1:
        df = df.rename(columns={season_columns[0]: 'Season'})
        for col in season_columns[1:]:
            df = df.drop(columns=[col])

    # Remove '3PAr' and 'FTr' columns
    columns_to_remove = ['3PAr', 'FTr']
    df = df.drop(columns=columns_to_remove, errors='ignore')

    # Round numeric columns to 2 decimal places
    numeric_columns = df.select_dtypes(include=[np.number]).columns
    df[numeric_columns] = df[numeric_columns].round(2)

    return df

def merge_salary_cap_data(player_data: pd.DataFrame,
                          salary_cap_data: pd.DataFrame,
                          *,
                          debug: bool = False) -> pd.DataFrame:
    """
    Left-merge cap data by season-year. Preserve all cap columns even if all NaN.
    """
    if player_data.empty or salary_cap_data.empty:
        if debug:
            print("[merge_salary_cap_data] one side empty -> returning player_data unchanged")
        return player_data

    # Make sure we don't mutate originals
    p = player_data.copy()
    cap = salary_cap_data.copy()

    # Extract year
    p["Season_Year"]   = p["Season"].str[:4].astype(int)
    cap["Season_Year"] = cap["Season"].str[:4].astype(int)

    # Inflate cap if not present
    if "Salary_Cap_Inflated" not in cap.columns:
        if debug:
            print("[merge_salary_cap_data] computing Salary_Cap_Inflated")
        cap["Salary_Cap_Inflated"] = cap.apply(
            lambda r: inflate_value(r.get("Salary Cap", np.nan), r.get("Season", "")),
            axis=1
        )

    # Merge
    merged = pd.merge(p, cap, on="Season_Year", how="left", suffixes=("", "_cap"))

    # Figure out which columns came from cap
    cap_cols = [c for c in cap.columns if c not in {"Season_Year"}]

    # For each cap col, if we created a *_cap twin, consolidate
    for col in cap_cols:
        src = f"{col}_cap"
        if src in merged.columns:
            merged[col] = merged[col].where(~merged[col].isna(), merged[src])
            merged.drop(columns=[src], inplace=True)

    # Cleanup
    merged.drop(columns=["Season_Year"], inplace=True)

    # Protect salary-cap columns from being dropped in clean_dataframe
    global PRESERVE_EVEN_IF_ALL_NA
    PRESERVE_EVEN_IF_ALL_NA = PRESERVE_EVEN_IF_ALL_NA.union(set(cap_cols))

    merged = clean_dataframe(merged)

    if debug:
        miss = [c for c in cap_cols if c not in merged.columns]
        if miss:
            print(f"[merge_salary_cap_data] WARNING missing cap cols after merge: {miss}")

    return merged

def validate_data(df: pd.DataFrame,
                  *,
                  name: str = "player_dataset",
                  save_reports: bool = True) -> pd.DataFrame:
    """
    Run a comprehensive audit and optionally persist CSV reports.
    Returns the original df untouched.
    """
    schema = ExpectedSchema(
        expected_cols=df.columns,           # you can narrow this if you have a canonical list
        required_cols=[
            "Season", "Player", "Salary", "Team"
        ],
        dtypes={
            "Season": "object",
            "Player": "object",
            "Salary": "float64",
        },
        non_negative_cols=["Salary", "GP", "MP", "PTS", "TRB", "AST", "Team_Salary"],
        non_constant_cols=["Salary", "PTS", "Team_Salary"],
        unique_key=["Season", "Player"]
    )

    reports = audit_dataframe(df, schema, name=name)

    if save_reports:
        out_dir = DATA_PROCESSED_DIR / "audits"
        write_audit_reports(reports, out_dir, prefix=name)

    # Print a one-liner summary (optional)
    missing_req = reports["cols_overview"].query("missing_required == True")
    if not missing_req.empty:
        print(f"[validate_data] Missing required columns: {missing_req['column'].tolist()}")

    return df


Overwriting ../src/salary_nba_data_pull/data_utils.py


In [17]:
%%writefile ../src/salary_nba_data_pull/quality.py
# src/salary_nba_data_pull/quality.py
from __future__ import annotations
from dataclasses import dataclass, field
from pathlib import Path
from typing import Iterable, Mapping, Any
import pandas as pd
import numpy as np

@dataclass
class ExpectedSchema:
    """Describe what we *intended* to have in a dataframe."""
    # All columns we care about (order doesn't matter)
    expected_cols: Iterable[str]

    # Subset that must be present
    required_cols: Iterable[str] = field(default_factory=list)

    # Expected pandas dtypes (string form, e.g. 'float64', 'object')
    dtypes: Mapping[str, str] = field(default_factory=dict)

    # Columns that must be >= 0
    non_negative_cols: Iterable[str] = field(default_factory=list)

    # Columns that should not be all zeros / all NaN
    non_constant_cols: Iterable[str] = field(default_factory=list)

    # Unique key columns (together must be unique)
    unique_key: Iterable[str] = field(default_factory=list)

    # Allowed value sets (enums)
    allowed_values: Mapping[str, Iterable[Any]] = field(default_factory=dict)

def _series_is_constant(s: pd.Series) -> bool:
    return s.nunique(dropna=True) <= 1

def audit_dataframe(df: pd.DataFrame,
                    schema: ExpectedSchema,
                    *,
                    name: str = "dataset") -> dict[str, pd.DataFrame]:
    """
    Return a dict of small DataFrames summarising quality checks.
    Nothing is printed; caller decides how to persist/log.
    """
    exp = set(schema.expected_cols)
    req = set(schema.required_cols)

    present = set(df.columns)
    missing = sorted(list(exp - present))
    extra   = sorted(list(present - exp))

    # --- Column overview
    cols_overview = pd.DataFrame({
        "column": sorted(list(exp | present)),
        "expected": [c in exp for c in sorted(list(exp | present))],
        "present":  [c in present for c in sorted(list(exp | present))],
        "required": [c in req for c in sorted(list(exp | present))]
    })
    cols_overview["missing_required"] = cols_overview.apply(
        lambda r: r["required"] and not r["present"], axis=1
    )

    # --- Null report
    null_report = (df.isna().sum().to_frame("null_count")
                     .assign(total_rows=len(df))
                     .assign(null_pct=lambda d: 100 * d["null_count"] / d["total_rows"])
                     .reset_index()
                     .rename(columns={"index": "column"}))

    # --- Dtype report
    type_rows = []
    for col in df.columns:
        exp_type = schema.dtypes.get(col)
        type_rows.append({
            "column": col,
            "expected_dtype": exp_type,
            "actual_dtype": str(df[col].dtype),
            "matches": (exp_type is None) or (str(df[col].dtype) == exp_type)
        })
    type_report = pd.DataFrame(type_rows)

    # --- Value checks
    value_rows = []
    for col in df.select_dtypes(include=[np.number]).columns:
        series = df[col]
        row = {
            "column": col,
            "min": series.min(skipna=True),
            "max": series.max(skipna=True),
            "negatives": int((series < 0).sum()),
            "zeros": int((series == 0).sum()),
            "non_zero_pct": 100 * (series != 0).sum() / len(series),
        }
        row["should_be_non_negative"] = col in schema.non_negative_cols
        row["violates_non_negative"] = row["negatives"] > 0 and row["should_be_non_negative"]
        value_rows.append(row)
    value_report = pd.DataFrame(value_rows)

    # Constant columns
    constant_rows = []
    for col in df.columns:
        constant_rows.append({
            "column": col,
            "is_constant": _series_is_constant(df[col]),
            "should_not_be_constant": col in schema.non_constant_cols
        })
    constant_report = pd.DataFrame(constant_rows).assign(
        violates=lambda d: d["is_constant"] & d["should_not_be_constant"]
    )

    # Allowed values
    enum_rows = []
    for col, allowed in schema.allowed_values.items():
        if col not in df.columns:
            continue
        bad = ~df[col].isin(allowed) & df[col].notna()
        enum_rows.append({
            "column": col,
            "bad_count": int(bad.sum()),
            "sample_bad": df.loc[bad, col].drop_duplicates().head(5).tolist()
        })
    enum_report = pd.DataFrame(enum_rows)

    # Unique key
    uniq_report = pd.DataFrame()
    if schema.unique_key:
        dup_mask = df.duplicated(subset=list(schema.unique_key), keep=False)
        uniq_report = pd.DataFrame({
            "duplicate_rows": [int(dup_mask.sum())],
            "subset": [list(schema.unique_key)]
        })

    return {
        "cols_overview": cols_overview,
        "null_report": null_report,
        "type_report": type_report,
        "value_report": value_report,
        "constant_report": constant_report,
        "enum_report": enum_report,
        "unique_report": uniq_report
    }

def assert_dataframe_ok(df: pd.DataFrame,
                        schema: ExpectedSchema,
                        *, name: str = "dataset") -> None:
    """
    Raise AssertionError with a concise message if critical checks fail.
    Designed for pytest or CI.
    """
    rep = audit_dataframe(df, schema, name=name)
    bad_missing = rep["cols_overview"].query("missing_required == True")
    bad_types = rep["type_report"].query("matches == False")
    bad_nonneg = rep["value_report"].query("violates_non_negative == True")
    bad_constant = rep["constant_report"].query("violates == True")
    dupes = rep["unique_report"]["duplicate_rows"].iloc[0] if not rep["unique_report"].empty else 0

    msgs = []
    if not bad_missing.empty:
        msgs.append(f"Missing required cols: {bad_missing['column'].tolist()}")
    if not bad_types.empty:
        msgs.append(f"Dtype mismatches: {bad_types[['column','expected_dtype','actual_dtype']].to_dict('records')}")
    if not bad_nonneg.empty:
        msgs.append(f"Negative values in non-negative cols: {bad_nonneg['column'].tolist()}")
    if not bad_constant.empty:
        msgs.append(f"Constant-but-shouldn't cols: {bad_constant['column'].tolist()}")
    if dupes:
        msgs.append(f"Duplicate key rows: {dupes}")

    if msgs:
        raise AssertionError(f"[{name}] data quality failures:\n" + "\n".join(msgs))

def write_audit_reports(reports: Mapping[str, pd.DataFrame],
                        out_dir: Path,
                        prefix: str) -> None:
    """
    Save each report DataFrame as CSV for later inspection.
    """
    out_dir.mkdir(parents=True, exist_ok=True)
    for key, df in reports.items():
        df.to_csv(out_dir / f"{prefix}_{key}.csv", index=False) 

Overwriting ../src/salary_nba_data_pull/quality.py


In [18]:
%%writefile ../src/salary_nba_data_pull/main.py
import argparse
import pandas as pd
import logging
import time
import glob
import os
import hashlib
import numpy as np
from pathlib import Path
import pyarrow.parquet as pq
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm.auto import tqdm
import requests_cache
from salary_nba_data_pull.fetch_utils import fetch_all_players, fetch_season_players, fetch_league_standings
from salary_nba_data_pull.process_utils import (
    process_player_data,
    inflate_value,
    calculate_percentages,
    _ensure_cpi_ready,
)
from salary_nba_data_pull.scrape_utils import (
    scrape_salary_cap_history,
    scrape_player_salary_data,
    scrape_team_salary_data,
    load_injury_data,
    _season_advanced_df,
)
from salary_nba_data_pull.process_utils import merge_injury_data
from salary_nba_data_pull.data_utils import (
    clean_dataframe,
    merge_salary_cap_data,
    validate_data,
    load_salary_cap_csv,
)
from salary_nba_data_pull.settings import DATA_PROCESSED_DIR

# Enable requests-cache for all HTTP traffic
requests_cache.install_cache("nba_pull", backend="sqlite", allowable_codes=(200,))

# CPI self-test - logs a warning once per run if CPI is unavailable
_ensure_cpi_ready(debug=False)

# Default number of worker threads
DEFAULT_WORKERS = 8                # tweak ≤ CPU cores

def _almost_equal_numeric(a: pd.Series, b: pd.Series, atol=1e-6, rtol=1e-9):
    # Handle NA values first
    mask = a.isna() & b.isna()
    
    # For non-NA values, compare them
    both_numeric = pd.api.types.is_numeric_dtype(a) and pd.api.types.is_numeric_dtype(b)
    if not both_numeric:
        # For non-numeric columns, use pandas equals but handle NA carefully
        non_na_mask = ~(a.isna() | b.isna())
        eq_result = pd.Series(False, index=a.index)
        if non_na_mask.any():
            eq_result[non_na_mask] = a[non_na_mask].eq(b[non_na_mask])
        return eq_result | mask
    else:
        # For numeric columns, use numpy isclose
        non_na_mask = ~(a.isna() | b.isna())
        diff_ok = pd.Series(False, index=a.index)
        if non_na_mask.any():
            diff_ok[non_na_mask] = np.isclose(
                a[non_na_mask].astype(float), 
                b[non_na_mask].astype(float), 
                atol=atol, rtol=rtol
            )
        return diff_ok | mask

def _diff_report(old_df: pd.DataFrame,
                 new_df: pd.DataFrame,
                 key_cols=("Season","Player"),
                 numeric_atol=1e-6,
                 numeric_rtol=1e-9,
                 max_print=10):
    """
    Return (is_equal:boolean, summary_str:str, diff_rows:DataFrame)
    diff_rows has: key cols + column + old_val + new_val
    """
    # ensure same columns
    common = [c for c in new_df.columns if c in old_df.columns]
    old = old_df.reindex(columns=common)
    new = new_df.reindex(columns=common)

    # align order by keys if present
    if all(k in common for k in key_cols):
        old = old.sort_values(list(key_cols)).reset_index(drop=True)
        new = new.sort_values(list(key_cols)).reset_index(drop=True)
    else:
        key_cols = ("__row__",)
        old["__row__"] = range(len(old))
        new["__row__"] = range(len(new))
        old = old.sort_values("__row__").reset_index(drop=True)
        new = new.sort_values("__row__").reset_index(drop=True)

    if len(old) != len(new):
        return (False,
                f"Row count differs: old={len(old)}, new={len(new)}",
                pd.DataFrame())

    diffs = []
    for col in common:
        eq_mask = _almost_equal_numeric(old[col], new[col],
                                        atol=numeric_atol, rtol=numeric_rtol)
        if not eq_mask.all():
            idxs = np.where(~eq_mask)[0]
            for i in idxs:
                row_key_vals = {k: new.iloc[i][k] for k in key_cols}
                diffs.append({
                    **row_key_vals,
                    "column": col,
                    "old": old.iloc[i][col],
                    "new": new.iloc[i][col],
                })

    if not diffs:
        return (True, "No value-level diffs (within tolerance).", pd.DataFrame())

    diff_df = pd.DataFrame(diffs)
    # build summary
    cols_changed = diff_df["column"].nunique()
    rows_changed = diff_df[key_cols[0]].nunique()
    examples = diff_df.head(max_print)
    summary = (f"{len(diff_df)} cell diffs, {rows_changed} rows, "
               f"{cols_changed} columns. Showing first {len(examples)}:")
    return (False, summary, examples)

def _file_md5(path: str, chunk: int = 1 << 20) -> str:
    """Return md5 hexdigest for *path* streaming in 1 MiB chunks."""
    h = hashlib.md5()
    with open(path, "rb") as f:
        for blk in iter(lambda: f.read(chunk), b""):
            h.update(blk)
    return h.hexdigest()

def _season_partition_identical(season: str,
                                base_dir: Path | str,
                                new_df: pd.DataFrame) -> bool:
    """
    Return True if on-disk parquet for `season` is byte-wise equivalent (after
    canonical sort & column alignment) to `new_df`.
    """
    ckpt = Path(base_dir) / f"season={season}" / "part.parquet"
    if not ckpt.exists():
        return False

    try:
        old_df = pd.read_parquet(ckpt)
    except Exception as exc:
        logging.warning("[identical] failed to read %s → %s", ckpt, exc)
        return False

    # STEP B1: align columns and sort only by stable key
    cols = sorted(set(old_df.columns) | set(new_df.columns))
    key = ["Season","Player"]

    old_cmp = (old_df.reindex(columns=cols)
                     .sort_values(key)
                     .reset_index(drop=True))
    new_cmp = (new_df.reindex(columns=cols)
                     .sort_values(key)
                     .reset_index(drop=True))

    return old_cmp.equals(new_cmp)   # NaNs treated equal if aligned

def _season_partition_exists(season, base_dir):
    """Check if a season partition already exists in Parquet format."""
    return os.path.exists(os.path.join(base_dir, f"season={season}"))

def _player_task(args):
    """Wrapper for ThreadPoolExecutor."""
    (player_name, season, salary, all_players, debug) = args
    stats = process_player_data(player_name, season, all_players, debug=debug)
    if stats:
        stats['Salary'] = salary
    return stats

# ----------------------------------------------------------------------
def update_data(existing_data,
                start_year: int,
                end_year: int,
                *,
                player_filter: str = "all",
                min_avg_minutes: float | None = None,
                debug: bool = False,
                small_debug: bool = False,          # --- NEW
                max_workers: int = 8,
                output_base: str | Path = DATA_PROCESSED_DIR,
                overwrite: bool = False) -> pd.DataFrame:
    """
    Pull seasons in [start_year, end_year] and write under `output_base`.
    When `small_debug` is True, suppress per‑player chatter and show only
    concise per‑season summaries.
    """
    output_base = Path(output_base)
    output_base.mkdir(parents=True, exist_ok=True)

    # Decide low-level debug for helpers
    helper_debug = debug and not small_debug

    injury = load_injury_data(debug=helper_debug)
    salary_df = scrape_player_salary_data(start_year, end_year,
                                          player_filter, debug=helper_debug)

    out_frames: list[pd.DataFrame] = []
    season_summaries: list[str] = []  # --- NEW: collect summaries

    for y in tqdm(range(start_year, end_year + 1),
                  desc="Seasons", disable=small_debug):
        season = f"{y}-{str(y+1)[-2:]}"
        ckpt_dir = output_base / f"season={season}"
        ckpt_dir.mkdir(parents=True, exist_ok=True)

        # --- 1. Team payroll
        team_payroll = scrape_team_salary_data(season, debug=helper_debug)
        if team_payroll.empty:
            team_payroll = pd.DataFrame(columns=["Team", "Team_Salary", "Season"])

        # --- 2. Standings (wins/losses)
        standings_df = fetch_league_standings(season, debug=helper_debug)
        if standings_df is None:
            standings_df = pd.DataFrame()

        # --- 3. Roster
        players_this_season = fetch_season_players(season, debug=helper_debug)
        rows = salary_df.query("Season == @season")

        args = [
            (row.Player, season, row.Salary, players_this_season, helper_debug)
            for _, row in rows.iterrows()
        ]

        # --- pre‑fetch season‑wide advanced table so workers reuse the cache
        _ = _season_advanced_df(season)        # warm cache under the lock

        # --- 4. Player processing in parallel
        with ThreadPoolExecutor(max_workers=min(max_workers or DEFAULT_WORKERS, len(args))) as pool:
            results, failures = [], 0
            for fut in tqdm(as_completed(pool.submit(_player_task, a) for a in args),
                            total=len(args), desc=f"{season} workers", disable=small_debug):
                try:
                    res = fut.result()
                    if res:
                        results.append(res)
                except Exception as exc:
                    failures += 1
                    logging.exception("Worker failed for %s: %s", season, exc)
            if failures and debug:
                print(f"⚠️  {failures} worker threads raised exceptions")

        missing = rows.loc[~rows.Player.str.lower().isin(players_this_season.keys()),
                           "Player"].unique()

        (ckpt_dir / "missing_players.txt").write_text("\n".join(missing))

        df_season = pd.DataFrame(results)
        print(f"[dbg] {season} processed players:", len(df_season))
        
        # ---- PROBE: Check for specific duplicate key ----
        key = ("2023-24", "Kj Martin")
        if season == "2023-24":
            probe_count = df_season.query("Season == @key[0] & Player == @key[1]").shape[0]
            print(f"[probe] Kj Martin count in df_season: {probe_count}")
            if probe_count > 1:
                print("[probe] Kj Martin rows:")
                print(df_season.query("Season == @key[0] & Player == @key[1]")[["Season", "Player", "Team", "MP"]])
        
        # ---------- season sanity check ----------
        if len(df_season) < 150:
            logging.warning("%s produced only %d rows; retrying after 90 s", season, len(df_season))
            time.sleep(90)
            return update_data(existing_data, y, y,  # single‑season retry
                               player_filter=player_filter,
                               min_avg_minutes=min_avg_minutes,
                               debug=debug,
                               small_debug=small_debug,
                               max_workers=max_workers,
                               output_base=output_base,
                               overwrite=True)
        if df_season.empty:
            # Build tiny summary anyway
            season_summaries.append(f"{season}: 0 players processed.")
            continue

        # --- 5. Merge W/L (validate to prevent row blow‑ups)
        if not standings_df.empty:
            stand_df = standings_df.copy()
            if 'W' in stand_df.columns:
                stand_df.rename(columns={'W': 'Wins', 'L': 'Losses'}, inplace=True)
            if 'WINS' in stand_df.columns:
                stand_df.rename(columns={'WINS': 'Wins', 'LOSSES': 'Losses'}, inplace=True)
            if 'TEAM_ID' in stand_df.columns:
                stand_df.rename(columns={'TEAM_ID': 'TeamID'}, inplace=True)
            
            print(f"[dbg] {season} before standings merge:", len(df_season))
            df_season = pd.merge(
                df_season,
                stand_df[['TeamID', 'Wins', 'Losses']].drop_duplicates('TeamID'),
                on='TeamID', how='left', validate='m:1'
            )
            print(f"[dbg] {season} after standings merge:", len(df_season))

        # --- 6. Team payroll merge
        print(f"[dbg] {season} before team payroll merge:", len(df_season))
        merged_tmp = pd.merge(
            df_season,
            team_payroll.drop_duplicates(subset=["Team","Season"]),
            on=["Team", "Season"], how="left", validate='m:1'
        )
        print(f"[dbg] {season} after team payroll merge:", len(merged_tmp))
        
        merged_tmp2 = merged_tmp if min_avg_minutes is None else merged_tmp.query("MP >= @min_avg_minutes")
        print(f"[dbg] {season} after MP filter:", len(merged_tmp2))
        
        merged_tmp3 = merged_tmp2.pipe(merge_injury_data, injury_data=injury)
        print(f"[dbg] {season} after injury merge:", len(merged_tmp3))
        
        merged = (merged_tmp3
                    .pipe(calculate_percentages, debug=helper_debug)
                    .pipe(clean_dataframe))
        
        # ---- FINAL: enforce key uniqueness ----
        dups = merged.duplicated(subset=["Season","Player"], keep=False)
        if dups.any():
            print(f"[dbg] {season} DUPLICATE KEYS detected ({dups.sum()} rows). Dumping...")
            print(merged.loc[dups, ["Season","Player","Team","MP"]]
                        .sort_values(["Player","Team"]))
            # Hard fail so we never persist dirty data:
            raise AssertionError(f"Duplicate (Season,Player) keys in season {season}")

        # STEP A1: deterministic sort & string normalization
        key_cols = ["Season","Player"]
        merged = merged.sort_values(key_cols).reset_index(drop=True)
        obj_cols = merged.select_dtypes(include=["object"]).columns
        for c in obj_cols:
            merged[c] = merged[c].replace(r"^\s*$", pd.NA, regex=True)

        print(f"[dbg] {season} final merged:", len(merged))

        # Skip identical season unless overwrite (moved here to use merged DataFrame)
        if (not overwrite
            and (ckpt_dir / "part.parquet").exists()
            and _season_partition_identical(season, output_base, merged)):
            if debug and not small_debug:
                print(f"✓  {season} unchanged – skipping")
            out_frames.append(merged)
            continue
        elif debug and not small_debug and (ckpt_dir / "part.parquet").exists():
            print(f"↻  {season} differs – re-scraping")

        parquet_path = ckpt_dir / "part.parquet"
        merged.to_parquet(parquet_path, index=False)
        (ckpt_dir / "part.md5").write_text(_file_md5(parquet_path))

        out_frames.append(merged)
        logging.info("wrote %s", ckpt_dir)

        # --- NEW: concise summary
        if small_debug:
            n_players = len(merged)
            n_missing = len(missing)
            n_cols = merged.shape[1]
            season_summaries.append(
                f"{season}: {n_players} rows, {n_missing} missing roster matches, {n_cols} cols."
            )

    # Print all summaries once
    if small_debug and season_summaries:
        print("\n--- Season Summaries ---")
        for line in season_summaries:
            print(line)
        print("------------------------\n")

    return pd.concat(out_frames, ignore_index=True) if out_frames else pd.DataFrame()

def get_timestamp():
    """Return a filesystem-safe timestamp string."""
    return datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

def remove_old_logs(log_dir, days_to_keep=7):
    current_time = datetime.now()
    for log_file in glob.glob(os.path.join(log_dir, 'stat_pull_log_*.txt')):
        file_modified_time = datetime.fromtimestamp(os.path.getmtime(log_file))
        if current_time - file_modified_time > timedelta(days=days_to_keep):
            os.remove(log_file)

def persist_final_dataset(new_data: pd.DataFrame,
                          seasons_loaded: list[str],
                          *,
                          output_base: Path,
                          debug: bool = False,
                          numeric_atol: float = 1e-6,
                          numeric_rtol: float = 1e-9,
                          max_print: int = 15) -> None:
    """
    Compare & overwrite the master CSV deterministically.
    """
    final_csv = output_base / "nba_player_data_final_inflated.csv"
    join_keys = ["Season","Player"]

    if final_csv.exists():
        old_master = pd.read_csv(final_csv)
        if debug:
            print(f"[persist] loaded {len(old_master):,} rows from existing master")
    else:
        old_master = pd.DataFrame(columns=new_data.columns)

    # Canonicalize types for reliable joins
    for df_ in (old_master, new_data):
        for k in join_keys:
            if k in df_.columns:
                df_[k] = df_[k].astype(str).str.strip()

    # Slice old & new by season
    old_slice = old_master.merge(
        pd.DataFrame({ "Season": seasons_loaded }).drop_duplicates(),
        on="Season", how="inner"
    ).reset_index(drop=True)
    new_slice = new_data.reset_index(drop=True)

    # Early exit if both empty
    if len(old_slice) == len(new_slice) == 0:
        if debug: print("[persist] nothing to compare/write")
        return

    # Key diff
    old_keys = old_slice[join_keys].drop_duplicates()
    new_keys = new_slice[join_keys].drop_duplicates()

    add = new_keys.merge(old_keys, on=join_keys, how="left", indicator=True)
    add = add[add["_merge"]=="left_only"].drop(columns="_merge")

    rem = old_keys.merge(new_keys, on=join_keys, how="left", indicator=True)
    rem = rem[rem["_merge"]=="left_only"].drop(columns="_merge")

    if len(add) or len(rem):
        print(f"[persist] Keys added={len(add)}, removed={len(rem)}")
        audits_dir = output_base / "audits"
        audits_dir.mkdir(parents=True, exist_ok=True)
        add.to_csv(audits_dir / f"keys_added_{get_timestamp()}.csv", index=False)
        rem.to_csv(audits_dir / f"keys_removed_{get_timestamp()}.csv", index=False)

    # Normalize NULL-like text columns
    null_like_cols = ["Injury_Periods"]
    for col in null_like_cols:
        if col in new_slice.columns:
            new_slice[col] = new_slice[col].replace("", pd.NA)
        if col in old_slice.columns:
            old_slice[col] = old_slice[col].replace("", pd.NA)

    # Value-level diff (kept for visibility)
    equal, summary, example_df = _diff_report(old_slice, new_slice,
                                              key_cols=join_keys,
                                              numeric_atol=numeric_atol,
                                              numeric_rtol=numeric_rtol,
                                              max_print=max_print)
    if not equal:
        print("[persist] Detected differences:")
        print("  " + summary)
        if not example_df.empty:
            print(example_df.to_string(index=False))
        audits_dir = output_base / "audits"
        audits_dir.mkdir(parents=True, exist_ok=True)
        _, _, full_diff = _diff_report(old_slice, new_slice,
                                       key_cols=join_keys,
                                       numeric_atol=numeric_atol,
                                       numeric_rtol=numeric_rtol,
                                       max_print=len(new_slice))
        full_diff.to_csv(audits_dir / f"diff_{seasons_loaded[0]}_{seasons_loaded[-1]}_{get_timestamp()}.csv",
                         index=False)
    else:
        if debug:
            print("[persist] No changes detected – master CSV left untouched")
        return

    # STEP C1: remove stale keys and append new
    remover = old_master.merge(new_keys, on=join_keys, how="left", indicator=True)
    remover = remover[remover["_merge"]=="left_only"].drop(columns="_merge")
    updated_master = pd.concat([remover, new_slice], ignore_index=True)

    # Deterministic order before writing
    updated_master = updated_master.sort_values(join_keys).reset_index(drop=True)

    updated_master.to_csv(final_csv, index=False, float_format="%.6f")

    # STEP C2: read back and assert
    reloaded = pd.read_csv(final_csv)
    assert len(reloaded) == len(updated_master), "[persist] row mismatch after write/readback"
    if debug:
        print(f"[persist] Master CSV updated → {final_csv}")

def main(start_year: int,
         end_year: int,
         player_filter: str = "all",
         min_avg_minutes: float = 15,
         debug: bool = False,
         small_debug: bool = False,      # --- NEW
         workers: int = 8,
         overwrite: bool = False,
         output_base: str | Path = DATA_PROCESSED_DIR) -> None:
    """
    Entry point. `small_debug=True` prints only high‑signal info.
    If both `debug` and `small_debug` are True, `debug` wins (full noise).
    """
    t0 = time.time()
    output_base = Path(output_base)
    output_base.mkdir(parents=True, exist_ok=True)

    log_dir = output_base.parent / "stat_pull_output"
    log_dir.mkdir(parents=True, exist_ok=True)
    remove_old_logs(log_dir)

    log_file = log_dir / f"stat_pull_log_{get_timestamp()}.txt"
    logging.basicConfig(filename=log_file,
                        level=logging.DEBUG if debug else logging.INFO,
                        format="%(asctime)s - %(levelname)s - %(message)s")

    updated = update_data(None, start_year, end_year,
                          player_filter=player_filter,
                          min_avg_minutes=min_avg_minutes,
                          debug=debug,
                          small_debug=small_debug,          # --- NEW
                          max_workers=workers,
                          output_base=str(output_base),
                          overwrite=overwrite)

    if not small_debug:  # keep your old prints in full/quiet modes
        print(f"✔ Completed pull: {len(updated):,} rows added")

    if not updated.empty:
        # ---------------- Salary Cap -----------------
        # Prefer local CSV; fallback to scrape only if file missing and user allows
        cap_file = Path(output_base) / "salary_cap_history_inflated.csv"
        use_scrape = False

        if cap_file.exists():
            salary_cap = load_salary_cap_csv(cap_file, debug=debug and not small_debug)
        else:
            # LAST resort – scrape (can be disabled permanently by setting use_scrape=False)
            if debug and not small_debug:
                print("[salary-cap] local file missing, attempting scrape…")
            salary_cap = scrape_salary_cap_history(debug=debug and not small_debug)
            if salary_cap is not None:
                salary_cap.to_csv(cap_file, index=False)

        if salary_cap is not None:
            updated = merge_salary_cap_data(updated, salary_cap, debug=debug and not small_debug)
        else:
            if debug:
                print("[salary-cap] No data merged — check local CSV path.")

        # --------------- Validate --------------------
        updated = validate_data(updated, name="player_dataset", save_reports=True)

        seasons_this_run = sorted(updated["Season"].unique().tolist())
        persist_final_dataset(updated,
                              seasons_loaded=seasons_this_run,
                              output_base=output_base,
                              debug=debug)

    if not small_debug:
        print(f"Process finished in {time.time() - t0:.1f} s — log: {log_file}")
    else:
        # minimal closing line
        print(f"Done in {time.time() - t0:.1f}s. Log: {log_file}")
        
# ----------------------------------------------------------------------
# argparse snippet
if __name__ == "__main__":
    cur = datetime.now().year
    p = argparse.ArgumentParser()
    p.add_argument("--start_year", type=int, default=cur-1)
    p.add_argument("--end_year",   type=int, default=cur)
    p.add_argument("--player_filter", default="all")
    p.add_argument("--min_avg_minutes", type=float, default=15)
    p.add_argument("--debug", action="store_true")
    p.add_argument("--small_debug", action="store_true")   # --- NEW
    p.add_argument("--workers", type=int, default=8)
    p.add_argument("--overwrite", action="store_true")
    p.add_argument("--output_base",
                   default=str(DATA_PROCESSED_DIR),
                   help="Destination root for parquet + csv outputs")
    args = p.parse_args()
    main(**vars(args))


Overwriting ../src/salary_nba_data_pull/main.py


In [None]:
# %%writefile ../src/salary_nba_data_pull/notebook_helper.py
"""
Notebook/REPL helper utilities for salary_nba_data_pull.

Goals
-----
• Work no matter where the notebook is opened (absolute paths).
• Avoid NameError on __file__.
• Keep hot‑reload for iterative dev.
• Forward arbitrary args to main() so we can test all scenarios.

Use:
>>> import salary_nba_data_pull.notebook_helper as nb
>>> nb.quick_pull(2024, workers=12, debug=True)
"""

from __future__ import annotations
import sys, importlib, inspect, os
from pathlib import Path
import requests_cache
from typing import Iterable

def _find_repo_root(start: Path | None = None) -> Path:
    """Find the repository root by looking for pyproject.toml or .git."""
    markers = {"pyproject.toml", ".git"}
    here = (start or Path.cwd()).resolve()
    for p in [here] + list(here.parents):
        if any((p / m).exists() for m in markers):
            return p
    return here

# Ensure project root & src are on sys.path (defensive)
ROOT = _find_repo_root()
SRC  = ROOT / "src"
for p in (ROOT, SRC):
    if p.is_dir() and str(p) not in sys.path:
        sys.path.insert(0, str(p))

# Sanity print (can be silenced)
if __name__ == "__main__" or "JPY_PARENT_PID" in os.environ:
    print(f"[notebook_helper] sys.path[0:3]={sys.path[:3]}")

# Import after path fix
try:
    from salary_nba_data_pull import main as nba_main
    from salary_nba_data_pull.settings import DATA_PROCESSED_DIR
    from salary_nba_data_pull.fetch_utils import clear_cache as _cc
    print("✅ salary_nba_data_pull imported successfully")
except ImportError as e:
    print(f"❌ Failed to import salary_nba_data_pull: {e}")
    print(f"   ROOT={ROOT}")
    print(f"   SRC={SRC}")
    print(f"   sys.path[0:3]={sys.path[:3]}")
    raise
    
    
def _reload():
    """Reload the main module so code edits are picked up."""
    importlib.reload(nba_main)

def quick_pull(season: int, **kwargs):
    _reload()
    print(f"[quick_pull] season={season}, kwargs={kwargs}")
    nba_main.main(start_year=season, end_year=season, **kwargs)

def historical_pull(start_year: int, end_year: int, **kwargs):
    _reload()
    print(f"[historical_pull] {start_year}-{end_year}, kwargs={kwargs}")
    nba_main.main(start_year=start_year, end_year=end_year, **kwargs)

def check_existing_data(base: Path | str | None = None) -> list[str]:
    base = Path(base) if base else DATA_PROCESSED_DIR
    seasons = sorted(d.name.split("=", 1)[-1] for d in base.glob("season=*") if d.is_dir())
    print(f"[check_existing_data] found {len(seasons)} seasons in {base}")
    return seasons

def load_parquet_data(season: str | None = None, *, base: Path | str | None = None):
    import pandas as pd
    base = Path(base) if base else DATA_PROCESSED_DIR
    files = list(base.glob(f"season={season}/part.parquet")) if season else list(base.glob("season=*/part.parquet"))
    if not files:
        print("[load_parquet_data] No parquet files found.")
        return pd.DataFrame()
    print(f"[load_parquet_data] loading {len(files)} files from {base}")
    return pd.concat((pd.read_parquet(f) for f in files), ignore_index=True)

def clear_all_caches():
    requests_cache.clear()
    _cc()
    print("✅ caches cleared")

def print_args():
    sig = inspect.signature(nba_main.main)
    for name, param in sig.parameters.items():
        print(f"{name:<15} default={param.default!r}  kind={param.kind}")

if __name__ == "__main__":
    print_args()
    # quick_pull(2023, workers=4, debug=True)



    historical_pull(2012, 2024,        # multi‑season
                    workers=6,
                    min_avg_minutes=10,
                    overwrite=False,
                    debug=True)
    check_existing_data()              # see which seasons are cached
    df = load_parquet_data("2023-24")  # inspect a single season


[notebook_helper] sys.path[0:3]=['C:\\docker_projects\\coach_analysis', 'C:\\Users\\ghadf\\AppData\\Roaming\\uv\\python\\cpython-3.10.17-windows-x86_64-none\\python310.zip', 'C:\\Users\\ghadf\\AppData\\Roaming\\uv\\python\\cpython-3.10.17-windows-x86_64-none\\DLLs']
✅ salary_nba_data_pull imported successfully
start_year      default=<class 'inspect._empty'>  kind=POSITIONAL_OR_KEYWORD
end_year        default=<class 'inspect._empty'>  kind=POSITIONAL_OR_KEYWORD
player_filter   default='all'  kind=POSITIONAL_OR_KEYWORD
min_avg_minutes default=15  kind=POSITIONAL_OR_KEYWORD
debug           default=False  kind=POSITIONAL_OR_KEYWORD
small_debug     default=False  kind=POSITIONAL_OR_KEYWORD
workers         default=8  kind=POSITIONAL_OR_KEYWORD
overwrite       default=False  kind=POSITIONAL_OR_KEYWORD
output_base     default=WindowsPath('C:/docker_projects/coach_analysis/data/new_processed')  kind=POSITIONAL_OR_KEYWORD
[historical_pull] 2016-2024, kwargs={'workers': 6, 'min_avg_minutes': 10,

Seasons:   0%|          | 0/9 [00:00<?, ?it/s]

[fetch] https://hoopshype.com/salaries/2016-17/ (attempt 1)
  -> HTTP 404, skipping.
[fetch] https://hoopshype.com/salaries/2016-2017/ (attempt 1)
  -> HTTP 404, skipping.
[fetch] https://www.espn.com/nba/salaries/_/type/team/year/2017 (attempt 1)
  -> ESPN team salary fallback added 40 rows
                    Team  Team_Salary   Season
0       LeBron James, SF     30963450  2016-17
1       Kevin Durant, PF     26540100  2016-17
2        Mike Conley, PG     26540100  2016-17
3       James Harden, SG     26540100  2016-17
4  Russell Westbrook, PG     26540100  2016-17




[adv] Lebron James → {'PER': 27.0, 'TS%': 0.619, '3PAr': 0.254, 'FTr': 0.395, 'ORB%': 4.0, 'DRB%': 20.7, 'TRB%': 12.6, 'AST%': 41.3, 'STL%': 1.6, 'BLK%': 1.3, 'TOV%': 16.1, 'USG%': 30.0, 'OWS': 9.8, 'DWS': 3.0, 'WS': 12.9, 'WS/48': 0.221, 'OBPM': 6.4, 'DBPM': 1.2, 'BPM': 7.6, 'VORP': 6.7}
[adv] Kevin Durant → {'PER': 27.6, 'TS%': 0.651, '3PAr': 0.304, 'FTr': 0.374, 'ORB%': 2.2, 'DRB%': 23.6, 'TRB%': 13.6, 'AST%': 23.1, 'STL%': 1.5, 'BLK%': 3.8, 'TOV%': 10.4, 'USG%': 27.8, 'OWS': 8.0, 'DWS': 4.0, 'WS': 12.0, 'WS/48': 0.278, 'OBPM': 6.7, 'DBPM': 2.2, 'BPM': 8.9, 'VORP': 5.7}
[adv] Mike Conley → {'PER': 23.2, 'TS%': 0.604, '3PAr': 0.415, 'FTr': 0.365, 'ORB%': 1.5, 'DRB%': 10.8, 'TRB%': 6.0, 'AST%': 34.5, 'STL%': 2.1, 'BLK%': 0.8, 'TOV%': 11.8, 'USG%': 26.3, 'OWS': 7.5, 'DWS': 2.5, 'WS': 10.0, 'WS/48': 0.209, 'OBPM': 6.0, 'DBPM': 0.8, 'BPM': 6.8, 'VORP': 5.1}
[adv] James Harden → {'PER': 27.4, 'TS%': 0.613, '3PAr': 0.493, 'FTr': 0.575, 'ORB%': 3.5, 'DRB%': 20.9, 'TRB%': 12.2, 'AST%': 50.7,




[adv] Robin Lopez → {'PER': 14.4, 'TS%': 0.511, '3PAr': 0.003, 'FTr': 0.134, 'ORB%': 11.5, 'DRB%': 13.2, 'TRB%': 12.3, 'AST%': 5.6, 'STL%': 0.4, 'BLK%': 4.1, 'TOV%': 10.0, 'USG%': 17.5, 'OWS': 1.8, 'DWS': 2.4, 'WS': 4.3, 'WS/48': 0.09, 'OBPM': -1.4, 'DBPM': -0.8, 'BPM': -2.2, 'VORP': -0.1}
[adv] Michael Kidd-Gilchrist → {'PER': 14.3, 'TS%': 0.528, '3PAr': 0.015, 'FTr': 0.314, 'ORB%': 7.3, 'DRB%': 19.3, 'TRB%': 13.2, 'AST%': 7.4, 'STL%': 1.7, 'BLK%': 2.9, 'TOV%': 7.4, 'USG%': 14.6, 'OWS': 2.7, 'DWS': 3.1, 'WS': 5.8, 'WS/48': 0.119, 'OBPM': -1.6, 'DBPM': 0.9, 'BPM': -0.7, 'VORP': 0.7}
[adv] no advanced stats for Jr Smith in 2016-17
[adv] Brandon Knight → {'PER': 12.4, 'TS%': 0.502, '3PAr': 0.265, 'FTr': 0.293, 'ORB%': 2.3, 'DRB%': 9.1, 'TRB%': 5.6, 'AST%': 17.8, 'STL%': 1.1, 'BLK%': 0.4, 'TOV%': 12.9, 'USG%': 25.0, 'OWS': 0.1, 'DWS': 0.2, 'WS': 0.3, 'WS/48': 0.015, 'OBPM': -0.8, 'DBPM': -2.1, 'BPM': -2.8, 'VORP': -0.2}
[adv] Miles Plumlee → {'PER': 8.4, 'TS%': 0.518, '3PAr': 0.0, 'FTr': 

2016-17 workers:  13%|█▎        | 76/577 [00:00<00:01, 375.05it/s][A

[adv] Arron Afflalo → {'PER': 8.9, 'TS%': 0.559, '3PAr': 0.36, 'FTr': 0.221, 'ORB%': 0.7, 'DRB%': 8.4, 'TRB%': 4.6, 'AST%': 7.4, 'STL%': 0.7, 'BLK%': 0.3, 'TOV%': 8.4, 'USG%': 14.4, 'OWS': 1.2, 'DWS': 0.2, 'WS': 1.4, 'WS/48': 0.043, 'OBPM': -2.1, 'DBPM': -1.5, 'BPM': -3.6, 'VORP': -0.7}
[adv] Tyson Chandler → {'PER': 16.6, 'TS%': 0.703, '3PAr': 0.0, 'FTr': 0.544, 'ORB%': 12.5, 'DRB%': 33.2, 'TRB%': 22.6, 'AST%': 3.3, 'STL%': 1.2, 'BLK%': 1.5, 'TOV%': 18.7, 'USG%': 11.2, 'OWS': 2.7, 'DWS': 1.3, 'WS': 4.1, 'WS/48': 0.151, 'OBPM': 0.7, 'DBPM': -0.3, 'BPM': 0.3, 'VORP': 0.8}
[adv] Eric Gordon → {'PER': 13.2, 'TS%': 0.557, '3PAr': 0.651, 'FTr': 0.172, 'ORB%': 1.4, 'DRB%': 8.1, 'TRB%': 4.7, 'AST%': 12.3, 'STL%': 1.0, 'BLK%': 1.4, 'TOV%': 10.0, 'USG%': 22.1, 'OWS': 2.5, 'DWS': 1.4, 'WS': 3.8, 'WS/48': 0.079, 'OBPM': 1.3, 'DBPM': -1.4, 'BPM': -0.1, 'VORP': 1.1}
[adv] Marvin Williams → {'PER': 13.7, 'TS%': 0.551, '3PAr': 0.503, 'FTr': 0.213, 'ORB%': 4.2, 'DRB%': 19.8, 'TRB%': 12.0, 'AST%': 7.1,




[adv] Victor Oladipo → {'PER': 13.6, 'TS%': 0.534, '3PAr': 0.378, 'FTr': 0.165, 'ORB%': 1.9, 'DRB%': 12.6, 'TRB%': 7.2, 'AST%': 12.5, 'STL%': 1.7, 'BLK%': 0.8, 'TOV%': 10.6, 'USG%': 21.4, 'OWS': 1.7, 'DWS': 2.3, 'WS': 4.0, 'WS/48': 0.085, 'OBPM': -0.3, 'DBPM': -0.1, 'BPM': -0.5, 'VORP': 0.8}
[adv] Isaiah Thomas → {'PER': 26.5, 'TS%': 0.625, '3PAr': 0.439, 'FTr': 0.441, 'ORB%': 1.9, 'DRB%': 7.0, 'TRB%': 4.4, 'AST%': 32.5, 'STL%': 1.4, 'BLK%': 0.4, 'TOV%': 10.7, 'USG%': 34.0, 'OWS': 10.9, 'DWS': 1.6, 'WS': 12.5, 'WS/48': 0.234, 'OBPM': 7.8, 'DBPM': -1.1, 'BPM': 6.7, 'VORP': 5.6}
[adv] Jodie Meeks → {'PER': 13.1, 'TS%': 0.567, '3PAr': 0.535, 'FTr': 0.289, 'ORB%': 0.7, 'DRB%': 10.9, 'TRB%': 5.7, 'AST%': 9.3, 'STL%': 2.3, 'BLK%': 0.4, 'TOV%': 11.1, 'USG%': 19.4, 'OWS': 0.5, 'DWS': 0.6, 'WS': 1.1, 'WS/48': 0.073, 'OBPM': -1.3, 'DBPM': -0.4, 'BPM': -1.8, 'VORP': 0.1}
[adv] Jeremy Lamb → {'PER': 17.0, 'TS%': 0.55, '3PAr': 0.297, 'FTr': 0.263, 'ORB%': 2.9, 'DRB%': 22.6, 'TRB%': 12.7, 'AST%': 11

2016-17 workers:  27%|██▋       | 158/577 [00:00<00:01, 389.93it/s][A

[adv] Ish Smith → {'PER': 14.7, 'TS%': 0.477, '3PAr': 0.14, 'FTr': 0.136, 'ORB%': 1.1, 'DRB%': 12.4, 'TRB%': 6.6, 'AST%': 32.5, 'STL%': 1.6, 'BLK%': 1.4, 'TOV%': 12.4, 'USG%': 20.5, 'OWS': 0.9, 'DWS': 2.2, 'WS': 3.1, 'WS/48': 0.076, 'OBPM': -0.5, 'DBPM': 0.8, 'BPM': 0.3, 'VORP': 1.1}[adv] Andrew Wiggins → {'PER': 16.5, 'TS%': 0.534, '3PAr': 0.184, 'FTr': 0.345, 'ORB%': 3.8, 'DRB%': 8.8, 'TRB%': 6.3, 'AST%': 10.6, 'STL%': 1.4, 'BLK%': 0.8, 'TOV%': 9.4, 'USG%': 29.0, 'OWS': 3.3, 'DWS': 0.9, 'WS': 4.2, 'WS/48': 0.066, 'OBPM': 0.7, 'DBPM': -2.4, 'BPM': -1.7, 'VORP': 0.2}

[adv] Ramon Sessions → {'PER': 12.4, 'TS%': 0.495, '3PAr': 0.236, 'FTr': 0.449, 'ORB%': 1.5, 'DRB%': 8.5, 'TRB%': 4.9, 'AST%': 24.2, 'STL%': 1.7, 'BLK%': 0.3, 'TOV%': 12.7, 'USG%': 20.1, 'OWS': 0.3, 'DWS': 0.6, 'WS': 0.9, 'WS/48': 0.055, 'OBPM': -2.0, 'DBPM': -0.4, 'BPM': -2.4, 'VORP': -0.1}
[adv] Patrick Beverley → {'PER': 13.0, 'TS%': 0.546, '3PAr': 0.53, 'FTr': 0.175, 'ORB%': 5.0, 'DRB%': 15.8, 'TRB%': 10.4, 'AST%': 18





[adv] Will Barton → {'PER': 15.5, 'TS%': 0.547, '3PAr': 0.353, 'FTr': 0.285, 'ORB%': 3.8, 'DRB%': 12.9, 'TRB%': 8.4, 'AST%': 17.7, 'STL%': 1.4, 'BLK%': 1.3, 'TOV%': 11.5, 'USG%': 21.1, 'OWS': 2.5, 'DWS': 0.8, 'WS': 3.3, 'WS/48': 0.093, 'OBPM': 1.2, 'DBPM': -1.0, 'BPM': 0.2, 'VORP': 1.0}
[adv] Marcus Smart → {'PER': 12.0, 'TS%': 0.486, '3PAr': 0.443, 'FTr': 0.334, 'ORB%': 3.7, 'DRB%': 10.4, 'TRB%': 7.1, 'AST%': 22.0, 'STL%': 2.6, 'BLK%': 1.2, 'TOV%': 15.6, 'USG%': 18.8, 'OWS': 0.5, 'DWS': 2.7, 'WS': 3.2, 'WS/48': 0.064, 'OBPM': -1.9, 'DBPM': 0.6, 'BPM': -1.3, 'VORP': 0.4}
[adv] Willie Cauley-Stein → {'PER': 16.3, 'TS%': 0.558, '3PAr': 0.004, 'FTr': 0.314, 'ORB%': 7.0, 'DRB%': 20.4, 'TRB%': 13.8, 'AST%': 9.4, 'STL%': 1.9, 'BLK%': 2.8, 'TOV%': 11.2, 'USG%': 19.7, 'OWS': 1.4, 'DWS': 1.5, 'WS': 2.9, 'WS/48': 0.1, 'OBPM': -1.2, 'DBPM': 0.2, 'BPM': -1.0, 'VORP': 0.3}
[adv] Paul Pierce → {'PER': 5.7, 'TS%': 0.535, '3PAr': 0.614, 'FTr': 0.186, 'ORB%': 0.4, 'DRB%': 18.5, 'TRB%': 9.7, 'AST%': 5.

2016-17 workers:  56%|█████▌    | 324/577 [00:00<00:00, 399.52it/s][A



[adv] Deandre' Bembry → {'PER': 8.8, 'TS%': 0.481, '3PAr': 0.184, 'FTr': 0.163, 'ORB%': 4.2, 'DRB%': 13.1, 'TRB%': 8.7, 'AST%': 11.5, 'STL%': 1.1, 'BLK%': 1.2, 'TOV%': 13.2, 'USG%': 14.2, 'OWS': -0.1, 'DWS': 0.4, 'WS': 0.3, 'WS/48': 0.038, 'OBPM': -3.6, 'DBPM': 0.1, 'BPM': -3.5, 'VORP': -0.1}
[adv] Malachi Richardson → {'PER': 9.6, 'TS%': 0.517, '3PAr': 0.412, 'FTr': 0.279, 'ORB%': 1.8, 'DRB%': 11.5, 'TRB%': 6.7, 'AST%': 8.7, 'STL%': 1.3, 'BLK%': 0.5, 'TOV%': 9.5, 'USG%': 19.3, 'OWS': 0.0, 'DWS': 0.1, 'WS': 0.1, 'WS/48': 0.028, 'OBPM': -3.4, 'DBPM': -1.4, 'BPM': -4.8, 'VORP': -0.1}
[adv] Rodney Hood → {'PER': 12.4, 'TS%': 0.522, '3PAr': 0.461, 'FTr': 0.173, 'ORB%': 1.2, 'DRB%': 13.1, 'TRB%': 7.3, 'AST%': 10.1, 'STL%': 1.3, 'BLK%': 0.6, 'TOV%': 8.3, 'USG%': 22.9, 'OWS': 0.7, 'DWS': 1.9, 'WS': 2.6, 'WS/48': 0.078, 'OBPM': -0.6, 'DBPM': -0.3, 'BPM': -0.9, 'VORP': 0.4}
[adv] Javale Mcgee → {'PER': 25.2, 'TS%': 0.642, '3PAr': 0.009, 'FTr': 0.348, 'ORB%': 15.9, 'DRB%': 20.1, 'TRB%': 18.1, '

2016-17 workers:  77%|███████▋  | 447/577 [00:01<00:00, 519.46it/s][A

[adv] Alan Williams → {'PER': 19.5, 'TS%': 0.547, '3PAr': 0.004, 'FTr': 0.419, 'ORB%': 14.0, 'DRB%': 31.2, 'TRB%': 22.4, 'AST%': 5.2, 'STL%': 1.8, 'BLK%': 3.7, 'TOV%': 10.5, 'USG%': 20.9, 'OWS': 1.1, 'DWS': 0.9, 'WS': 2.1, 'WS/48': 0.142, 'OBPM': -0.8, 'DBPM': -0.6, 'BPM': -1.3, 'VORP': 0.1}
[adv] Josh Richardson → {'PER': 10.7, 'TS%': 0.493, '3PAr': 0.442, 'FTr': 0.15, 'ORB%': 2.4, 'DRB%': 9.2, 'TRB%': 5.8, 'AST%': 12.7, 'STL%': 1.9, 'BLK%': 1.9, 'TOV%': 10.6, 'USG%': 16.8, 'OWS': 0.3, 'DWS': 1.9, 'WS': 2.2, 'WS/48': 0.066, 'OBPM': -2.0, 'DBPM': 0.9, 'BPM': -1.1, 'VORP': 0.4}
[adv] Pat Connaughton → {'PER': 11.8, 'TS%': 0.645, '3PAr': 0.458, 'FTr': 0.125, 'ORB%': 3.5, 'DRB%': 14.8, 'TRB%': 9.1, 'AST%': 12.7, 'STL%': 0.9, 'BLK%': 0.5, 'TOV%': 18.3, 'USG%': 13.0, 'OWS': 0.4, 'DWS': 0.2, 'WS': 0.6, 'WS/48': 0.09, 'OBPM': -0.4, 'DBPM': -0.3, 'BPM': -0.7, 'VORP': 0.1}
[adv] Darrun Hilliard → {'PER': 5.9, 'TS%': 0.459, '3PAr': 0.365, 'FTr': 0.222, 'ORB%': 0.6, 'DRB%': 9.2, 'TRB%': 4.7, 'AST

2016-17 workers: 100%|██████████| 577/577 [00:01<00:00, 456.02it/s]


[adv] Patricio Garino → {'PER': -9.2, 'TS%': 0.0, '3PAr': 0.714, 'FTr': 0.0, 'ORB%': 2.5, 'DRB%': 15.6, 'TRB%': 8.9, 'AST%': 0.0, 'STL%': 0.0, 'BLK%': 0.0, 'TOV%': 30.0, 'USG%': 10.2, 'OWS': -0.2, 'DWS': 0.0, 'WS': -0.2, 'WS/48': -0.25, 'OBPM': -13.3, 'DBPM': -4.6, 'BPM': -17.9, 'VORP': -0.2}
[adv] Pierre Jackson → {'PER': 13.0, 'TS%': 0.416, '3PAr': 0.282, 'FTr': 0.179, 'ORB%': 1.3, 'DRB%': 11.6, 'TRB%': 6.2, 'AST%': 38.0, 'STL%': 1.2, 'BLK%': 0.0, 'TOV%': 6.7, 'USG%': 25.3, 'OWS': 0.0, 'DWS': 0.1, 'WS': 0.0, 'WS/48': 0.021, 'OBPM': -2.7, 'DBPM': -1.7, 'BPM': -4.4, 'VORP': -0.1}
[dbg] 2016-17 processed players: 465
[dbg] 2016-17 before standings merge: 465
[dbg] 2016-17 after standings merge: 465
[dbg] 2016-17 before team payroll merge: 465
[dbg] 2016-17 after team payroll merge: 465
[dbg] 2016-17 after MP filter: 460


Seasons:  11%|█         | 1/9 [00:09<01:17,  9.65s/it]

[dbg] 2016-17 after injury merge: 460
Percentage calculations completed
[dbg] 2016-17 final merged: 460
↻  2016-17 differs – re-scraping
[fetch] https://hoopshype.com/salaries/2017-18/ (attempt 1)
  -> HTTP 404, skipping.
[fetch] https://hoopshype.com/salaries/2017-2018/ (attempt 1)
  -> HTTP 404, skipping.
[fetch] https://www.espn.com/nba/salaries/_/type/team/year/2018 (attempt 1)
  -> ESPN team salary fallback added 40 rows
                 Team  Team_Salary   Season
0   Stephen Curry, PG     37457154  2017-18
1   Blake Griffin, PF     32088932  2017-18
2    Paul Millsap, PF     31269231  2017-18
3      Kyle Lowry, PG     31200000  2017-18
4  Gordon Hayward, SF     29727900  2017-18
[adv] fetching https://www.basketball-reference.com/leagues/NBA_2018_advanced.html




[adv] Stephen Curry → {'PER': 28.2, 'TS%': 0.675, '3PAr': 0.58, 'FTr': 0.35, 'ORB%': 2.7, 'DRB%': 14.4, 'TRB%': 9.0, 'AST%': 30.3, 'STL%': 2.4, 'BLK%': 0.4, 'TOV%': 13.3, 'USG%': 31.0, 'OWS': 7.2, 'DWS': 1.8, 'WS': 9.1, 'WS/48': 0.267, 'OBPM': 7.8, 'DBPM': 0.0, 'BPM': 7.7, 'VORP': 4.0}
[adv] Blake Griffin → {'PER': 19.6, 'TS%': 0.544, '3PAr': 0.323, 'FTr': 0.331, 'ORB%': 4.1, 'DRB%': 19.8, 'TRB%': 12.0, 'AST%': 28.1, 'STL%': 1.0, 'BLK%': 0.8, 'TOV%': 12.6, 'USG%': 28.9, 'OWS': 3.2, 'DWS': 1.8, 'WS': 4.9, 'WS/48': 0.12, 'OBPM': 3.6, 'DBPM': -0.2, 'BPM': 3.4, 'VORP': 2.7}
[adv] Paul Millsap → {'PER': 17.0, 'TS%': 0.549, '3PAr': 0.26, 'FTr': 0.37, 'ORB%': 6.4, 'DRB%': 17.6, 'TRB%': 12.1, 'AST%': 13.9, 'STL%': 1.7, 'BLK%': 3.2, 'TOV%': 12.6, 'USG%': 22.0, 'OWS': 1.4, 'DWS': 1.1, 'WS': 2.5, 'WS/48': 0.107, 'OBPM': 0.4, 'DBPM': 0.0, 'BPM': 0.4, 'VORP': 0.7}
[adv] Gordon Hayward → {'PER': 7.3, 'TS%': 0.5, '3PAr': 0.5, 'FTr': 0.0, 'ORB%': 0.0, 'DRB%': 21.6, 'TRB%': 10.9, 'AST%': 0.0, 'STL%': 0



[adv] Demarcus Cousins → {'PER': 22.6, 'TS%': 0.583, '3PAr': 0.34, 'FTr': 0.456, 'ORB%': 6.8, 'DRB%': 30.9, 'TRB%': 19.2, 'AST%': 23.0, 'STL%': 2.2, 'BLK%': 3.5, 'TOV%': 18.9, 'USG%': 31.9, 'OWS': 1.5, 'DWS': 3.2, 'WS': 4.7, 'WS/48': 0.13, 'OBPM': 2.5, 'DBPM': 2.2, 'BPM': 4.7, 'VORP': 2.9}
[adv] no advanced stats for Goran Dragic in 2017-18
[adv] Kent Bazemore → {'PER': 14.5, 'TS%': 0.547, '3PAr': 0.405, 'FTr': 0.297, 'ORB%': 1.6, 'DRB%': 13.9, 'TRB%': 7.7, 'AST%': 20.1, 'STL%': 2.7, 'BLK%': 2.2, 'TOV%': 17.0, 'USG%': 22.5, 'OWS': 0.2, 'DWS': 2.0, 'WS': 2.2, 'WS/48': 0.059, 'OBPM': -0.4, 'DBPM': 1.0, 'BPM': 0.6, 'VORP': 1.2}
[adv] Klay Thompson → {'PER': 16.0, 'TS%': 0.598, '3PAr': 0.441, 'FTr': 0.083, 'ORB%': 1.5, 'DRB%': 10.3, 'TRB%': 6.2, 'AST%': 11.2, 'STL%': 1.1, 'BLK%': 1.1, 'TOV%': 9.5, 'USG%': 23.7, 'OWS': 3.1, 'DWS': 1.8, 'WS': 4.9, 'WS/48': 0.094, 'OBPM': 1.7, 'DBPM': -2.1, 'BPM': -0.4, 'VORP': 1.0}
[adv] Wesley Matthews → {'PER': 11.4, 'TS%': 0.541, '3PAr': 0.577, 'FTr': 0.1




[adv] Josh Richardson → {'PER': 13.6, 'TS%': 0.551, '3PAr': 0.38, 'FTr': 0.16, 'ORB%': 2.9, 'DRB%': 9.0, 'TRB%': 6.0, 'AST%': 13.2, 'STL%': 2.3, 'BLK%': 2.4, 'TOV%': 12.9, 'USG%': 18.2, 'OWS': 1.8, 'DWS': 3.5, 'WS': 5.4, 'WS/48': 0.096, 'OBPM': -0.6, 'DBPM': 1.3, 'BPM': 0.7, 'VORP': 1.8}
[adv] Norman Powell → {'PER': 8.5, 'TS%': 0.492, '3PAr': 0.497, 'FTr': 0.104, 'ORB%': 1.5, 'DRB%': 10.9, 'TRB%': 6.3, 'AST%': 11.8, 'STL%': 1.7, 'BLK%': 1.2, 'TOV%': 14.4, 'USG%': 18.9, 'OWS': -0.6, 'DWS': 1.2, 'WS': 0.6, 'WS/48': 0.027, 'OBPM': -3.2, 'DBPM': 0.2, 'BPM': -2.9, 'VORP': -0.3}
[adv] Andre Roberson → {'PER': 10.9, 'TS%': 0.543, '3PAr': 0.222, 'FTr': 0.235, 'ORB%': 7.8, 'DRB%': 12.2, 'TRB%': 9.9, 'AST%': 6.0, 'STL%': 2.2, 'BLK%': 3.1, 'TOV%': 14.4, 'USG%': 8.6, 'OWS': 0.8, 'DWS': 1.3, 'WS': 2.2, 'WS/48': 0.1, 'OBPM': -1.9, 'DBPM': 1.8, 'BPM': -0.1, 'VORP': 0.5}
[adv] Avery Bradley → {'PER': 9.6, 'TS%': 0.496, '3PAr': 0.325, 'FTr': 0.131, 'ORB%': 1.7, 'DRB%': 7.3, 'TRB%': 4.5, 'AST%': 10.4, 

2017-18 workers:  36%|███▌      | 210/584 [00:00<00:00, 378.03it/s][A


[adv] Emmanuel Mudiay → {'PER': 11.5, 'TS%': 0.474, '3PAr': 0.281, 'FTr': 0.245, 'ORB%': 2.3, 'DRB%': 11.3, 'TRB%': 6.8, 'AST%': 24.5, 'STL%': 1.6, 'BLK%': 0.8, 'TOV%': 16.2, 'USG%': 24.4, 'OWS': -0.8, 'DWS': 0.7, 'WS': 0.0, 'WS/48': -0.002, 'OBPM': -2.1, 'DBPM': -1.8, 'BPM': -3.9, 'VORP': -0.6}
[adv] Jamal Crawford → {'PER': 13.0, 'TS%': 0.519, '3PAr': 0.423, 'FTr': 0.152, 'ORB%': 1.4, 'DRB%': 5.5, 'TRB%': 3.5, 'AST%': 16.9, 'STL%': 1.2, 'BLK%': 0.5, 'TOV%': 10.7, 'USG%': 23.7, 'OWS': 0.9, 'DWS': 0.3, 'WS': 1.3, 'WS/48': 0.037, 'OBPM': 0.1, 'DBPM': -2.5, 'BPM': -2.3, 'VORP': -0.1}
[adv] Kris Dunn → {'PER': 14.5, 'TS%': 0.488, '3PAr': 0.201, 'FTr': 0.171, 'ORB%': 1.6, 'DRB%': 14.4, 'TRB%': 7.8, 'AST%': 33.3, 'STL%': 3.3, 'BLK%': 1.6, 'TOV%': 17.3, 'USG%': 24.7, 'OWS': -1.0, 'DWS': 1.9, 'WS': 0.9, 'WS/48': 0.027, 'OBPM': -2.4, 'DBPM': 1.1, 'BPM': -1.3, 'VORP': 0.3}
[adv] Nerlens Noel → {'PER': 16.2, 'TS%': 0.558, '3PAr': 0.01, 'FTr': 0.267, 'ORB%': 10.9, 'DRB%': 29.3, 'TRB%': 19.8, 'AST

2017-18 workers:  51%|█████     | 298/584 [00:00<00:00, 406.58it/s][A


[adv] Rashad Vaughn → {'PER': 9.3, 'TS%': 0.546, '3PAr': 0.661, 'FTr': 0.107, 'ORB%': 1.6, 'DRB%': 9.8, 'TRB%': 5.7, 'AST%': 8.7, 'STL%': 1.2, 'BLK%': 1.2, 'TOV%': 9.3, 'USG%': 13.7, 'OWS': 0.1, 'DWS': 0.1, 'WS': 0.2, 'WS/48': 0.05, 'OBPM': -1.3, 'DBPM': -1.4, 'BPM': -2.6, 'VORP': 0.0}
[adv] Henry Ellenson → {'PER': 10.5, 'TS%': 0.476, '3PAr': 0.411, 'FTr': 0.199, 'ORB%': 3.3, 'DRB%': 24.3, 'TRB%': 13.6, 'AST%': 9.4, 'STL%': 0.8, 'BLK%': 0.0, 'TOV%': 10.2, 'USG%': 23.8, 'OWS': -0.2, 'DWS': 0.4, 'WS': 0.1, 'WS/48': 0.02, 'OBPM': -2.6, 'DBPM': -1.9, 'BPM': -4.4, 'VORP': -0.2}
[adv] no advanced stats for Ante Zizic in 2017-18
[adv] Sam Dekker → {'PER': 11.5, 'TS%': 0.534, '3PAr': 0.232, 'FTr': 0.216, 'ORB%': 5.6, 'DRB%': 16.0, 'TRB%': 10.8, 'AST%': 6.2, 'STL%': 1.3, 'BLK%': 0.9, 'TOV%': 10.4, 'USG%': 15.5, 'OWS': 0.6, 'DWS': 0.6, 'WS': 1.2, 'WS/48': 0.066, 'OBPM': -2.3, 'DBPM': -0.6, 'BPM': -2.9, 'VORP': -0.2}
[adv] Eric Moreland → {'PER': 13.9, 'TS%': 0.531, '3PAr': 0.0, 'FTr': 0.238, 'O

2017-18 workers:  66%|██████▌   | 383/584 [00:01<00:00, 397.75it/s][A


[adv] Sterling Brown → {'PER': 9.1, 'TS%': 0.503, '3PAr': 0.444, 'FTr': 0.117, 'ORB%': 4.1, 'DRB%': 17.5, 'TRB%': 10.8, 'AST%': 4.5, 'STL%': 2.0, 'BLK%': 1.4, 'TOV%': 10.4, 'USG%': 14.0, 'OWS': 0.0, 'DWS': 0.7, 'WS': 0.7, 'WS/48': 0.043, 'OBPM': -2.8, 'DBPM': -0.2, 'BPM': -3.1, 'VORP': -0.2}
[adv] Wes Iwundu → {'PER': 7.8, 'TS%': 0.48, '3PAr': 0.211, 'FTr': 0.216, 'ORB%': 3.9, 'DRB%': 11.0, 'TRB%': 7.4, 'AST%': 7.8, 'STL%': 1.6, 'BLK%': 0.9, 'TOV%': 11.2, 'USG%': 11.6, 'OWS': 0.0, 'DWS': 0.7, 'WS': 0.7, 'WS/48': 0.032, 'OBPM': -4.2, 'DBPM': -0.1, 'BPM': -4.3, 'VORP': -0.6}
[adv] Tyler Dorsey → {'PER': 10.0, 'TS%': 0.494, '3PAr': 0.521, 'FTr': 0.165, 'ORB%': 1.9, 'DRB%': 12.9, 'TRB%': 7.4, 'AST%': 12.5, 'STL%': 0.9, 'BLK%': 0.4, 'TOV%': 8.3, 'USG%': 20.0, 'OWS': 0.0, 'DWS': 0.4, 'WS': 0.4, 'WS/48': 0.02, 'OBPM': -1.4, 'DBPM': -1.5, 'BPM': -2.9, 'VORP': -0.2}
[adv] Alex Poythress → {'PER': 7.8, 'TS%': 0.5, '3PAr': 0.423, 'FTr': 0.0, 'ORB%': 7.7, 'DRB%': 10.8, 'TRB%': 9.2, 'AST%': 2.6, 'S

2017-18 workers:  95%|█████████▍| 553/584 [00:01<00:00, 648.38it/s][A

[adv] Joe Johnson → {'PER': 8.1, 'TS%': 0.49, '3PAr': 0.403, 'FTr': 0.125, 'ORB%': 1.3, 'DRB%': 14.7, 'TRB%': 8.1, 'AST%': 10.2, 'STL%': 0.7, 'BLK%': 0.5, 'TOV%': 11.4, 'USG%': 15.9, 'OWS': -0.3, 'DWS': 1.3, 'WS': 1.0, 'WS/48': 0.04, 'OBPM': -2.7, 'DBPM': -0.2, 'BPM': -2.9, 'VORP': -0.3}
[adv] Naz Mitrou-Long → {'PER': 133.8, 'TS%': 1.5, '3PAr': 1.0, 'FTr': 0.0, 'ORB%': 0.0, 'DRB%': 0.0, 'TRB%': 0.0, 'AST%': 0.0, 'STL%': 0.0, 'BLK%': 0.0, 'TOV%': 0.0, 'USG%': 45.0, 'OWS': 0.1, 'DWS': 0.0, 'WS': 0.1, 'WS/48': 2.712, 'OBPM': 199.4, 'DBPM': 42.7, 'BPM': 242.2, 'VORP': 0.0}
[adv] Davon Reed → {'PER': 4.8, 'TS%': 0.388, '3PAr': 0.5, 'FTr': 0.158, 'ORB%': 0.9, 'DRB%': 16.6, 'TRB%': 8.6, 'AST%': 7.6, 'STL%': 2.0, 'BLK%': 0.7, 'TOV%': 11.0, 'USG%': 16.0, 'OWS': -0.5, 'DWS': 0.2, 'WS': -0.3, 'WS/48': -0.06, 'OBPM': -5.8, 'DBPM': -0.6, 'BPM': -6.4, 'VORP': -0.3}
[adv] Derrick Rose → {'PER': 11.5, 'TS%': 0.507, '3PAr': 0.161, 'FTr': 0.247, 'ORB%': 3.5, 'DRB%': 5.8, 'TRB%': 4.7, 'AST%': 13.6, 'STL

2017-18 workers: 100%|██████████| 584/584 [00:16<00:00, 34.49it/s]

[adv] Andre Ingram → {'PER': 16.3, 'TS%': 0.655, '3PAr': 0.529, 'FTr': 0.176, 'ORB%': 1.7, 'DRB%': 8.3, 'TRB%': 5.0, 'AST%': 15.3, 'STL%': 2.2, 'BLK%': 3.8, 'TOV%': 14.1, 'USG%': 14.1, 'OWS': 0.1, 'DWS': 0.1, 'WS': 0.2, 'WS/48': 0.157, 'OBPM': 1.7, 'DBPM': 2.5, 'BPM': 4.3, 'VORP': 0.1}
[adv] Aaron Jackson → {'PER': 2.4, 'TS%': 0.405, '3PAr': 0.444, 'FTr': 0.222, 'ORB%': 6.5, 'DRB%': 3.2, 'TRB%': 4.8, 'AST%': 4.0, 'STL%': 0.0, 'BLK%': 0.0, 'TOV%': 9.2, 'USG%': 13.7, 'OWS': 0.0, 'DWS': 0.0, 'WS': 0.0, 'WS/48': -0.017, 'OBPM': -6.6, 'DBPM': -2.3, 'BPM': -8.9, 'VORP': -0.1}
[dbg] 2017-18 processed players: 402
[dbg] 2017-18 before standings merge: 402
[dbg] 2017-18 after standings merge: 402
[dbg] 2017-18 before team payroll merge: 402
[dbg] 2017-18 after team payroll merge: 402
[dbg] 2017-18 after MP filter: 392



Seasons:  22%|██▏       | 2/9 [00:36<02:16, 19.55s/it]

[dbg] 2017-18 after injury merge: 392
Percentage calculations completed
[dbg] 2017-18 final merged: 392
[fetch] https://hoopshype.com/salaries/2018-19/ (attempt 1)
  -> HTTP 404, skipping.
[fetch] https://hoopshype.com/salaries/2018-2019/ (attempt 1)
  -> HTTP 404, skipping.
[fetch] https://www.espn.com/nba/salaries/_/type/team/year/2019 (attempt 1)
  -> ESPN team salary fallback added 40 rows
                    Team  Team_Salary   Season
0      Stephen Curry, PG     37457154  2018-19
1       LeBron James, SF     35654150  2018-19
2  Russell Westbrook, PG     35654150  2018-19
3         Chris Paul, PG     35654150  2018-19
4      Blake Griffin, PF     32088932  2018-19
[fetch_season_players] 557 players for 2018-19
[adv] fetching https://www.basketball-reference.com/leagues/NBA_2019_advanced.html




[adv] Lebron James → {'PER': 25.6, 'TS%': 0.588, '3PAr': 0.299, 'FTr': 0.382, 'ORB%': 3.1, 'DRB%': 21.3, 'TRB%': 12.4, 'AST%': 39.4, 'STL%': 1.7, 'BLK%': 1.4, 'TOV%': 13.3, 'USG%': 31.6, 'OWS': 4.7, 'DWS': 2.6, 'WS': 7.2, 'WS/48': 0.179, 'OBPM': 6.4, 'DBPM': 1.7, 'BPM': 8.0, 'VORP': 4.9}
[adv] Stephen Curry → {'PER': 24.4, 'TS%': 0.641, '3PAr': 0.604, 'FTr': 0.214, 'ORB%': 2.2, 'DRB%': 14.2, 'TRB%': 8.4, 'AST%': 24.2, 'STL%': 1.9, 'BLK%': 0.9, 'TOV%': 11.6, 'USG%': 30.4, 'OWS': 7.2, 'DWS': 2.5, 'WS': 9.7, 'WS/48': 0.199, 'OBPM': 7.1, 'DBPM': -0.5, 'BPM': 6.6, 'VORP': 5.1}
[adv] Blake Griffin → {'PER': 21.0, 'TS%': 0.581, '3PAr': 0.389, 'FTr': 0.41, 'ORB%': 4.0, 'DRB%': 20.1, 'TRB%': 11.8, 'AST%': 27.1, 'STL%': 1.0, 'BLK%': 0.9, 'TOV%': 13.8, 'USG%': 30.2, 'OWS': 5.1, 'DWS': 2.9, 'WS': 8.0, 'WS/48': 0.147, 'OBPM': 4.9, 'DBPM': 0.2, 'BPM': 5.1, 'VORP': 4.7}
[adv] Russell Westbrook → {'PER': 21.1, 'TS%': 0.501, '3PAr': 0.279, 'FTr': 0.306, 'ORB%': 4.1, 'DRB%': 28.3, 'TRB%': 15.8, 'AST%': 



[adv] Marcin Gortat → {'PER': 12.8, 'TS%': 0.562, '3PAr': 0.0, 'FTr': 0.258, 'ORB%': 9.8, 'DRB%': 26.5, 'TRB%': 18.4, 'AST%': 12.0, 'STL%': 0.4, 'BLK%': 2.5, 'TOV%': 19.4, 'USG%': 14.4, 'OWS': 0.6, 'DWS': 0.8, 'WS': 1.4, 'WS/48': 0.089, 'OBPM': -1.7, 'DBPM': 0.4, 'BPM': -1.3, 'VORP': 0.1}
[adv] Eric Gordon → {'PER': 12.2, 'TS%': 0.549, '3PAr': 0.64, 'FTr': 0.162, 'ORB%': 0.9, 'DRB%': 6.9, 'TRB%': 3.8, 'AST%': 9.4, 'STL%': 0.9, 'BLK%': 1.1, 'TOV%': 8.2, 'USG%': 22.0, 'OWS': 1.7, 'DWS': 1.0, 'WS': 2.6, 'WS/48': 0.059, 'OBPM': 0.5, 'DBPM': -1.8, 'BPM': -1.3, 'VORP': 0.4}
[adv] Cody Zeller → {'PER': 17.2, 'TS%': 0.611, '3PAr': 0.064, 'FTr': 0.409, 'ORB%': 9.4, 'DRB%': 19.7, 'TRB%': 14.5, 'AST%': 12.1, 'STL%': 1.5, 'BLK%': 2.9, 'TOV%': 13.2, 'USG%': 16.3, 'OWS': 2.6, 'DWS': 1.2, 'WS': 3.9, 'WS/48': 0.15, 'OBPM': -0.6, 'DBPM': 0.7, 'BPM': 0.1, 'VORP': 0.7}
[adv] Joe Ingles → {'PER': 13.4, 'TS%': 0.581, '3PAr': 0.602, 'FTr': 0.153, 'ORB%': 1.5, 'DRB%': 12.2, 'TRB%': 7.0, 'AST%': 26.1, 'STL%':




[adv] Karl-Anthony Towns → {'PER': 26.3, 'TS%': 0.622, '3PAr': 0.27, 'FTr': 0.342, 'ORB%': 10.9, 'DRB%': 29.3, 'TRB%': 20.0, 'AST%': 17.2, 'STL%': 1.3, 'BLK%': 4.2, 'TOV%': 13.7, 'USG%': 28.9, 'OWS': 7.2, 'DWS': 3.2, 'WS': 10.4, 'WS/48': 0.197, 'OBPM': 5.4, 'DBPM': 0.6, 'BPM': 6.0, 'VORP': 5.1}
[adv] Jamychal Green → {'PER': 14.7, 'TS%': 0.587, '3PAr': 0.37, 'FTr': 0.212, 'ORB%': 8.3, 'DRB%': 24.3, 'TRB%': 16.4, 'AST%': 5.8, 'STL%': 1.6, 'BLK%': 2.3, 'TOV%': 14.3, 'USG%': 19.4, 'OWS': 0.9, 'DWS': 1.9, 'WS': 2.9, 'WS/48': 0.101, 'OBPM': -1.3, 'DBPM': -0.4, 'BPM': -1.7, 'VORP': 0.1}
[adv] Kyle Korver → {'PER': 11.2, 'TS%': 0.582, '3PAr': 0.72, 'FTr': 0.151, 'ORB%': 0.7, 'DRB%': 12.4, 'TRB%': 6.6, 'AST%': 8.9, 'STL%': 0.9, 'BLK%': 0.7, 'TOV%': 10.3, 'USG%': 18.5, 'OWS': 1.0, 'DWS': 1.3, 'WS': 2.3, 'WS/48': 0.082, 'OBPM': -0.2, 'DBPM': -0.3, 'BPM': -0.5, 'VORP': 0.5}
[adv] Jeremy Lamb → {'PER': 17.3, 'TS%': 0.552, '3PAr': 0.337, 'FTr': 0.266, 'ORB%': 3.1, 'DRB%': 18.1, 'TRB%': 10.4, 'AST%'

2018-19 workers:  49%|████▉     | 242/493 [00:00<00:00, 388.24it/s][A


[adv] Troy Daniels → {'PER': 9.8, 'TS%': 0.558, '3PAr': 0.705, 'FTr': 0.084, 'ORB%': 1.9, 'DRB%': 8.9, 'TRB%': 5.3, 'AST%': 5.0, 'STL%': 1.6, 'BLK%': 0.5, 'TOV%': 8.7, 'USG%': 17.6, 'OWS': 0.2, 'DWS': 0.1, 'WS': 0.3, 'WS/48': 0.02, 'OBPM': -1.7, 'DBPM': -2.0, 'BPM': -3.7, 'VORP': -0.3}
[adv] Kelly Oubre Jr. → {'PER': 15.2, 'TS%': 0.551, '3PAr': 0.401, 'FTr': 0.29, 'ORB%': 4.0, 'DRB%': 14.6, 'TRB%': 9.2, 'AST%': 6.8, 'STL%': 2.1, 'BLK%': 2.5, 'TOV%': 9.8, 'USG%': 23.2, 'OWS': 0.9, 'DWS': 1.3, 'WS': 2.2, 'WS/48': 0.054, 'OBPM': -0.3, 'DBPM': -0.9, 'BPM': -1.2, 'VORP': 0.4}
[adv] Miles Bridges → {'PER': 13.1, 'TS%': 0.548, '3PAr': 0.391, 'FTr': 0.151, 'ORB%': 4.2, 'DRB%': 16.6, 'TRB%': 10.3, 'AST%': 8.1, 'STL%': 1.6, 'BLK%': 2.5, 'TOV%': 8.4, 'USG%': 15.1, 'OWS': 1.4, 'DWS': 1.5, 'WS': 2.9, 'WS/48': 0.083, 'OBPM': -0.9, 'DBPM': 0.1, 'BPM': -0.8, 'VORP': 0.5}
[adv] Marquese Chriss → {'PER': 9.2, 'TS%': 0.455, '3PAr': 0.4, 'FTr': 0.25, 'ORB%': 8.6, 'DRB%': 23.6, 'TRB%': 15.8, 'AST%': 6.5, '

2018-19 workers:  70%|██████▉   | 344/493 [00:00<00:00, 453.64it/s][A


[adv] Ivica Zubac → {'PER': 18.9, 'TS%': 0.604, '3PAr': 0.0, 'FTr': 0.332, 'ORB%': 11.9, 'DRB%': 24.2, 'TRB%': 18.2, 'AST%': 9.1, 'STL%': 0.6, 'BLK%': 4.0, 'TOV%': 13.9, 'USG%': 20.3, 'OWS': 1.9, 'DWS': 1.3, 'WS': 3.2, 'WS/48': 0.148, 'OBPM': -0.2, 'DBPM': -0.2, 'BPM': -0.5, 'VORP': 0.4}
[adv] Pascal Siakam → {'PER': 18.7, 'TS%': 0.628, '3PAr': 0.226, 'FTr': 0.32, 'ORB%': 5.4, 'DRB%': 17.5, 'TRB%': 11.6, 'AST%': 14.6, 'STL%': 1.4, 'BLK%': 1.7, 'TOV%': 12.5, 'USG%': 20.8, 'OWS': 5.7, 'DWS': 3.6, 'WS': 9.3, 'WS/48': 0.175, 'OBPM': 1.6, 'DBPM': 0.8, 'BPM': 2.4, 'VORP': 2.8}
[adv] Cheick Diallo → {'PER': 17.5, 'TS%': 0.644, '3PAr': 0.015, 'FTr': 0.247, 'ORB%': 8.8, 'DRB%': 29.3, 'TRB%': 19.2, 'AST%': 5.1, 'STL%': 1.5, 'BLK%': 3.0, 'TOV%': 14.0, 'USG%': 16.0, 'OWS': 1.4, 'DWS': 1.2, 'WS': 2.6, 'WS/48': 0.14, 'OBPM': -1.1, 'DBPM': 0.1, 'BPM': -0.9, 'VORP': 0.2}
[adv] no advanced stats for Wade Baldwin Iv in 2018-19
[adv] Andrew Harrison → {'PER': 5.8, 'TS%': 0.443, '3PAr': 0.472, 'FTr': 0.34

2018-19 workers: 100%|██████████| 493/493 [00:01<00:00, 393.51it/s][A


[adv] Hamidou Diallo → {'PER': 10.1, 'TS%': 0.497, '3PAr': 0.145, 'FTr': 0.358, 'ORB%': 7.2, 'DRB%': 12.0, 'TRB%': 9.5, 'AST%': 4.4, 'STL%': 1.9, 'BLK%': 1.6, 'TOV%': 10.7, 'USG%': 16.6, 'OWS': 0.1, 'DWS': 0.7, 'WS': 0.8, 'WS/48': 0.069, 'OBPM': -4.4, 'DBPM': 0.2, 'BPM': -4.2, 'VORP': -0.3}
[adv] Brad Wanamaker → {'PER': 14.3, 'TS%': 0.597, '3PAr': 0.371, 'FTr': 0.267, 'ORB%': 0.9, 'DRB%': 11.8, 'TRB%': 6.4, 'AST%': 22.5, 'STL%': 1.7, 'BLK%': 0.5, 'TOV%': 13.9, 'USG%': 17.1, 'OWS': 0.6, 'DWS': 0.4, 'WS': 1.0, 'WS/48': 0.134, 'OBPM': -1.3, 'DBPM': 0.7, 'BPM': -0.7, 'VORP': 0.1}
[adv] Isaiah Hartenstein → {'PER': 10.0, 'TS%': 0.562, '3PAr': 0.146, 'FTr': 0.341, 'ORB%': 10.3, 'DRB%': 13.3, 'TRB%': 11.8, 'AST%': 9.4, 'STL%': 1.6, 'BLK%': 4.6, 'TOV%': 21.6, 'USG%': 11.8, 'OWS': 0.2, 'DWS': 0.3, 'WS': 0.5, 'WS/48': 0.101, 'OBPM': -5.1, 'DBPM': 1.2, 'BPM': -3.9, 'VORP': -0.1}
[adv] Alize Johnson → {'PER': 6.3, 'TS%': 0.333, '3PAr': 0.125, 'FTr': 0.5, 'ORB%': 7.1, 'DRB%': 25.5, 'TRB%': 16.5, '

Seasons:  33%|███▎      | 3/9 [00:55<01:57, 19.51s/it]

[dbg] 2018-19 after injury merge: 450
Percentage calculations completed
[dbg] 2018-19 final merged: 450
[fetch] https://hoopshype.com/salaries/2019-20/ (attempt 1)
  -> HTTP 404, skipping.
[fetch] https://hoopshype.com/salaries/2019-2020/ (attempt 1)
  -> HTTP 404, skipping.
[fetch] https://www.espn.com/nba/salaries/_/type/team/year/2020 (attempt 1)
  -> ESPN team salary fallback added 40 rows
                    Team  Team_Salary   Season
0      Stephen Curry, PG     40231758  2019-20
1  Russell Westbrook, PG     38506482  2019-20
2         Chris Paul, PG     38506482  2019-20
3       Kevin Durant, PF     38199000  2019-20
4       James Harden, SG     38199000  2019-20
[fetch_season_players] 562 players for 2019-20
[adv] fetching https://www.basketball-reference.com/leagues/NBA_2020_advanced.html





[adv] Stephen Curry → {'PER': 21.7, 'TS%': 0.557, '3PAr': 0.598, 'FTr': 0.317, 'ORB%': 3.0, 'DRB%': 17.8, 'TRB%': 10.1, 'AST%': 42.3, 'STL%': 1.7, 'BLK%': 1.3, 'TOV%': 14.6, 'USG%': 33.6, 'OWS': 0.2, 'DWS': 0.1, 'WS': 0.3, 'WS/48': 0.103, 'OBPM': 4.5, 'DBPM': -0.6, 'BPM': 3.9, 'VORP': 0.2}
[adv] Russell Westbrook → {'PER': 21.0, 'TS%': 0.536, '3PAr': 0.166, 'FTr': 0.297, 'ORB%': 5.1, 'DRB%': 18.1, 'TRB%': 11.5, 'AST%': 35.6, 'STL%': 2.1, 'BLK%': 0.8, 'TOV%': 15.0, 'USG%': 34.4, 'OWS': 1.7, 'DWS': 2.5, 'WS': 4.2, 'WS/48': 0.098, 'OBPM': 1.6, 'DBPM': -0.1, 'BPM': 1.5, 'VORP': 1.8}
[adv] James Harden → {'PER': 29.1, 'TS%': 0.626, '3PAr': 0.557, 'FTr': 0.528, 'ORB%': 2.9, 'DRB%': 16.0, 'TRB%': 9.4, 'AST%': 35.9, 'STL%': 2.3, 'BLK%': 2.1, 'TOV%': 14.2, 'USG%': 36.3, 'OWS': 9.9, 'DWS': 3.2, 'WS': 13.1, 'WS/48': 0.254, 'OBPM': 8.1, 'DBPM': 1.6, 'BPM': 9.6, 'VORP': 7.3}
[adv] Chris Paul → {'PER': 21.7, 'TS%': 0.61, '3PAr': 0.343, 'FTr': 0.315, 'ORB%': 1.3, 'DRB%': 15.7, 'TRB%': 8.7, 'AST%': 34

2019-20 workers:  15%|█▍        | 76/520 [00:00<00:01, 368.47it/s][A


[adv] Kelly Oubre Jr. → {'PER': 14.9, 'TS%': 0.56, '3PAr': 0.372, 'FTr': 0.298, 'ORB%': 3.8, 'DRB%': 17.1, 'TRB%': 10.4, 'AST%': 6.6, 'STL%': 1.7, 'BLK%': 1.8, 'TOV%': 8.2, 'USG%': 22.4, 'OWS': 1.7, 'DWS': 2.1, 'WS': 3.8, 'WS/48': 0.093, 'OBPM': -0.3, 'DBPM': -0.3, 'BPM': -0.5, 'VORP': 0.7}
[adv] Bobby Portis → {'PER': 14.6, 'TS%': 0.53, '3PAr': 0.317, 'FTr': 0.158, 'ORB%': 6.0, 'DRB%': 20.4, 'TRB%': 12.9, 'AST%': 11.1, 'STL%': 1.1, 'BLK%': 1.2, 'TOV%': 10.4, 'USG%': 21.4, 'OWS': 0.9, 'DWS': 1.0, 'WS': 1.9, 'WS/48': 0.064, 'OBPM': 0.0, 'DBPM': -1.1, 'BPM': -1.1, 'VORP': 0.3}
[adv] James Johnson → {'PER': 14.8, 'TS%': 0.572, '3PAr': 0.423, 'FTr': 0.223, 'ORB%': 4.3, 'DRB%': 16.3, 'TRB%': 10.3, 'AST%': 18.4, 'STL%': 1.9, 'BLK%': 4.6, 'TOV%': 16.9, 'USG%': 19.4, 'OWS': 0.3, 'DWS': 0.8, 'WS': 1.1, 'WS/48': 0.089, 'OBPM': -1.1, 'DBPM': 1.2, 'BPM': 0.0, 'VORP': 0.3}
[adv] no advanced stats for Marcus Morris Sr. in 2019-20
[adv] Danny Green → {'PER': 10.8, 'TS%': 0.552, '3PAr': 0.69, 'FTr': 0

2019-20 workers:  30%|██▉       | 155/520 [00:00<00:00, 381.81it/s][A

[adv] Lou Williams → {'PER': 17.3, 'TS%': 0.546, '3PAr': 0.337, 'FTr': 0.362, 'ORB%': 1.8, 'DRB%': 9.2, 'TRB%': 5.6, 'AST%': 30.1, 'STL%': 1.1, 'BLK%': 0.6, 'TOV%': 14.4, 'USG%': 28.4, 'OWS': 2.3, 'DWS': 1.8, 'WS': 4.1, 'WS/48': 0.105, 'OBPM': 2.2, 'DBPM': -1.4, 'BPM': 0.7, 'VORP': 1.3}
[adv] Jayson Tatum → {'PER': 20.4, 'TS%': 0.567, '3PAr': 0.383, 'FTr': 0.255, 'ORB%': 3.1, 'DRB%': 18.5, 'TRB%': 10.9, 'AST%': 14.5, 'STL%': 2.0, 'BLK%': 2.3, 'TOV%': 10.2, 'USG%': 28.6, 'OWS': 3.2, 'DWS': 3.7, 'WS': 6.9, 'WS/48': 0.146, 'OBPM': 3.5, 'DBPM': 0.6, 'BPM': 4.0, 'VORP': 3.4}
[adv] Rj Barrett → {'PER': 10.7, 'TS%': 0.479, '3PAr': 0.271, 'FTr': 0.349, 'ORB%': 3.0, 'DRB%': 14.8, 'TRB%': 8.7, 'AST%': 12.8, 'STL%': 1.6, 'BLK%': 0.9, 'TOV%': 12.9, 'USG%': 24.0, 'OWS': -1.6, 'DWS': 1.1, 'WS': -0.5, 'WS/48': -0.015, 'OBPM': -2.9, 'DBPM': -1.5, 'BPM': -4.3, 'VORP': -1.0}
[adv] no advanced stats for Luka Doncic in 2019-20
[adv] Jae Crowder → {'PER': 12.5, 'TS%': 0.553, '3PAr': 0.7, 'FTr': 0.222, 'ORB




[adv] Shai Gilgeous-Alexander → {'PER': 17.7, 'TS%': 0.568, '3PAr': 0.247, 'FTr': 0.352, 'ORB%': 2.2, 'DRB%': 16.1, 'TRB%': 9.4, 'AST%': 15.2, 'STL%': 1.6, 'BLK%': 1.7, 'TOV%': 10.3, 'USG%': 23.7, 'OWS': 3.1, 'DWS': 3.0, 'WS': 6.1, 'WS/48': 0.121, 'OBPM': 1.3, 'DBPM': 0.4, 'BPM': 1.7, 'VORP': 2.2}
[adv] Miles Bridges → {'PER': 12.1, 'TS%': 0.52, '3PAr': 0.398, 'FTr': 0.174, 'ORB%': 4.6, 'DRB%': 15.6, 'TRB%': 9.9, 'AST%': 9.4, 'STL%': 1.0, 'BLK%': 2.2, 'TOV%': 10.5, 'USG%': 20.0, 'OWS': 0.3, 'DWS': 1.3, 'WS': 1.6, 'WS/48': 0.039, 'OBPM': -1.5, 'DBPM': -1.0, 'BPM': -2.5, 'VORP': -0.3}
[adv] Luke Kennard → {'PER': 14.4, 'TS%': 0.589, '3PAr': 0.535, 'FTr': 0.219, 'ORB%': 1.1, 'DRB%': 11.1, 'TRB%': 6.0, 'AST%': 19.2, 'STL%': 0.6, 'BLK%': 0.5, 'TOV%': 10.3, 'USG%': 19.8, 'OWS': 1.6, 'DWS': 0.3, 'WS': 1.9, 'WS/48': 0.1, 'OBPM': 1.1, 'DBPM': -1.4, 'BPM': -0.3, 'VORP': 0.4}
[adv] Jakob Poeltl → {'PER': 19.1, 'TS%': 0.612, '3PAr': 0.0, 'FTr': 0.323, 'ORB%': 12.1, 'DRB%': 22.5, 'TRB%': 17.3, 'AST

2019-20 workers:  62%|██████▏   | 323/520 [00:00<00:00, 408.36it/s][A

[adv] Landry Shamet → {'PER': 8.5, 'TS%': 0.58, '3PAr': 0.757, 'FTr': 0.194, 'ORB%': 0.4, 'DRB%': 6.6, 'TRB%': 3.6, 'AST%': 9.1, 'STL%': 0.7, 'BLK%': 0.6, 'TOV%': 9.0, 'USG%': 13.4, 'OWS': 1.5, 'DWS': 1.1, 'WS': 2.6, 'WS/48': 0.086, 'OBPM': -1.8, 'DBPM': -0.3, 'BPM': -2.1, 'VORP': 0.0}
[adv] Kyle Kuzma → {'PER': 12.2, 'TS%': 0.531, '3PAr': 0.407, 'FTr': 0.232, 'ORB%': 3.8, 'DRB%': 15.7, 'TRB%': 9.8, 'AST%': 7.6, 'STL%': 0.9, 'BLK%': 1.5, 'TOV%': 11.0, 'USG%': 22.9, 'OWS': 0.1, 'DWS': 1.9, 'WS': 2.0, 'WS/48': 0.063, 'OBPM': -1.6, 'DBPM': -1.0, 'BPM': -2.6, 'VORP': -0.2}
[adv] Tony Bradley → {'PER': 21.7, 'TS%': 0.681, '3PAr': 0.016, 'FTr': 0.243, 'ORB%': 19.1, 'DRB%': 25.0, 'TRB%': 22.2, 'AST%': 5.4, 'STL%': 1.0, 'BLK%': 4.1, 'TOV%': 12.5, 'USG%': 15.8, 'OWS': 2.2, 'DWS': 1.1, 'WS': 3.3, 'WS/48': 0.236, 'OBPM': 1.0, 'DBPM': 0.3, 'BPM': 1.3, 'VORP': 0.6}
[adv] Jordan Poole → {'PER': 7.2, 'TS%': 0.454, '3PAr': 0.528, 'FTr': 0.237, 'ORB%': 0.7, 'DRB%': 9.6, 'TRB%': 5.0, 'AST%': 15.8, 'STL%



[adv] Iman Shumpert → {'PER': 5.3, 'TS%': 0.403, '3PAr': 0.516, 'FTr': 0.109, 'ORB%': 3.9, 'DRB%': 10.5, 'TRB%': 7.3, 'AST%': 6.7, 'STL%': 2.4, 'BLK%': 0.7, 'TOV%': 13.0, 'USG%': 13.4, 'OWS': -0.3, 'DWS': 0.3, 'WS': 0.0, 'WS/48': -0.005, 'OBPM': -6.2, 'DBPM': 0.7, 'BPM': -5.5, 'VORP': -0.2}
[adv] Gary Trent Jr. → {'PER': 12.9, 'TS%': 0.587, '3PAr': 0.608, 'FTr': 0.102, 'ORB%': 2.0, 'DRB%': 5.8, 'TRB%': 3.9, 'AST%': 6.3, 'STL%': 1.7, 'BLK%': 1.0, 'TOV%': 4.4, 'USG%': 15.3, 'OWS': 2.2, 'DWS': 0.3, 'WS': 2.5, 'WS/48': 0.089, 'OBPM': 0.5, 'DBPM': -0.8, 'BPM': -0.3, 'VORP': 0.6}
[adv] Isaiah Roby → {'PER': -7.7, 'TS%': 0.0, '3PAr': 0.0, 'FTr': 0.0, 'ORB%': 0.0, 'DRB%': 19.5, 'TRB%': 10.0, 'AST%': 0.0, 'STL%': 0.0, 'BLK%': 0.0, 'TOV%': 50.0, 'USG%': 8.0, 'OWS': -0.1, 'DWS': 0.0, 'WS': 0.0, 'WS/48': -0.188, 'OBPM': -12.0, 'DBPM': -3.4, 'BPM': -15.4, 'VORP': 0.0}
[adv] Wenyen Gabriel → {'PER': 9.6, 'TS%': 0.546, '3PAr': 0.417, 'FTr': 0.458, 'ORB%': 9.5, 'DRB%': 13.9, 'TRB%': 11.7, 'AST%': 4.4,

2019-20 workers: 100%|██████████| 520/520 [00:01<00:00, 397.53it/s]

[adv] Troy Daniels → {'PER': 8.4, 'TS%': 0.518, '3PAr': 0.71, 'FTr': 0.086, 'ORB%': 2.5, 'DRB%': 8.2, 'TRB%': 5.4, 'AST%': 4.3, 'STL%': 1.0, 'BLK%': 0.5, 'TOV%': 5.9, 'USG%': 16.3, 'OWS': 0.1, 'DWS': 0.5, 'WS': 0.6, 'WS/48': 0.056, 'OBPM': -1.7, 'DBPM': -1.2, 'BPM': -2.9, 'VORP': -0.1}
[adv] Jeff Green → {'PER': 13.7, 'TS%': 0.59, '3PAr': 0.532, 'FTr': 0.265, 'ORB%': 2.7, 'DRB%': 12.0, 'TRB%': 7.5, 'AST%': 7.9, 'STL%': 1.3, 'BLK%': 1.7, 'TOV%': 9.9, 'USG%': 19.0, 'OWS': 1.2, 'DWS': 0.9, 'WS': 2.1, 'WS/48': 0.104, 'OBPM': 0.0, 'DBPM': -0.1, 'BPM': -0.1, 'VORP': 0.5}
[adv] Chris Clemons → {'PER': 13.1, 'TS%': 0.548, '3PAr': 0.754, 'FTr': 0.077, 'ORB%': 1.4, 'DRB%': 9.4, 'TRB%': 5.4, 'AST%': 14.3, 'STL%': 1.4, 'BLK%': 1.8, 'TOV%': 11.5, 'USG%': 23.6, 'OWS': 0.1, 'DWS': 0.2, 'WS': 0.4, 'WS/48': 0.058, 'OBPM': 0.3, 'DBPM': -1.3, 'BPM': -1.0, 'VORP': 0.1}
[adv] Rayjon Tucker → {'PER': 9.7, 'TS%': 0.584, '3PAr': 0.395, 'FTr': 0.535, 'ORB%': 2.1, 'DRB%': 11.2, 'TRB%': 6.8, 'AST%': 4.4, 'STL%':


Seasons:  44%|████▍     | 4/9 [01:14<01:36, 19.39s/it]

[dbg] 2019-20 after injury merge: 445
Percentage calculations completed
[dbg] 2019-20 final merged: 445
[fetch] https://hoopshype.com/salaries/2020-21/ (attempt 1)
  -> HTTP 404, skipping.
[fetch] https://hoopshype.com/salaries/2020-2021/ (attempt 1)
  -> HTTP 404, skipping.
[fetch] https://www.espn.com/nba/salaries/_/type/team/year/2021 (attempt 1)
  -> ESPN team salary fallback added 40 rows
                    Team  Team_Salary   Season
0      Stephen Curry, PG     43006362  2020-21
1  Russell Westbrook, PG     41358814  2020-21
2         Chris Paul, PG     41358814  2020-21
3       James Harden, SG     41254920  2020-21
4          John Wall, PG     41254920  2020-21
[fetch_season_players] 579 players for 2020-21
[adv] fetching https://www.basketball-reference.com/leagues/NBA_2021_advanced.html





[adv] Russell Westbrook → {'PER': 19.5, 'TS%': 0.509, '3PAr': 0.221, 'FTr': 0.334, 'ORB%': 4.9, 'DRB%': 28.6, 'TRB%': 16.8, 'AST%': 48.6, 'STL%': 1.7, 'BLK%': 0.8, 'TOV%': 18.0, 'USG%': 30.2, 'OWS': 0.5, 'DWS': 3.2, 'WS': 3.7, 'WS/48': 0.075, 'OBPM': 2.6, 'DBPM': 1.1, 'BPM': 3.7, 'VORP': 3.4}
[adv] Stephen Curry → {'PER': 26.3, 'TS%': 0.655, '3PAr': 0.587, 'FTr': 0.289, 'ORB%': 1.5, 'DRB%': 15.4, 'TRB%': 8.5, 'AST%': 30.5, 'STL%': 1.7, 'BLK%': 0.3, 'TOV%': 12.2, 'USG%': 34.8, 'OWS': 6.5, 'DWS': 2.5, 'WS': 9.0, 'WS/48': 0.201, 'OBPM': 8.3, 'DBPM': 0.4, 'BPM': 8.7, 'VORP': 5.8}
[adv] James Harden → {'PER': 24.5, 'TS%': 0.618, '3PAr': 0.455, 'FTr': 0.44, 'ORB%': 2.5, 'DRB%': 20.6, 'TRB%': 11.9, 'AST%': 44.4, 'STL%': 1.6, 'BLK%': 1.8, 'TOV%': 16.8, 'USG%': 28.4, 'OWS': 5.3, 'DWS': 1.7, 'WS': 7.0, 'WS/48': 0.208, 'OBPM': 6.2, 'DBPM': 1.0, 'BPM': 7.2, 'VORP': 3.7}
[adv] Chris Paul → {'PER': 21.4, 'TS%': 0.599, '3PAr': 0.294, 'FTr': 0.206, 'ORB%': 1.3, 'DRB%': 14.5, 'TRB%': 8.0, 'AST%': 40.9,

2020-21 workers:  13%|█▎        | 74/556 [00:00<00:01, 360.86it/s][A


[adv] Clint Capela → {'PER': 24.3, 'TS%': 0.601, '3PAr': 0.0, 'FTr': 0.327, 'ORB%': 17.5, 'DRB%': 34.3, 'TRB%': 26.1, 'AST%': 4.1, 'STL%': 1.1, 'BLK%': 6.0, 'TOV%': 8.4, 'USG%': 19.9, 'OWS': 4.9, 'DWS': 3.3, 'WS': 8.2, 'WS/48': 0.207, 'OBPM': 2.7, 'DBPM': 0.0, 'BPM': 2.7, 'VORP': 2.2}
[adv] Cody Zeller → {'PER': 18.2, 'TS%': 0.599, '3PAr': 0.086, 'FTr': 0.367, 'ORB%': 12.6, 'DRB%': 22.9, 'TRB%': 17.7, 'AST%': 13.2, 'STL%': 1.3, 'BLK%': 1.7, 'TOV%': 11.9, 'USG%': 18.3, 'OWS': 2.1, 'DWS': 1.1, 'WS': 3.3, 'WS/48': 0.156, 'OBPM': -0.2, 'DBPM': -0.2, 'BPM': -0.5, 'VORP': 0.4}
[adv] Danny Green → {'PER': 12.1, 'TS%': 0.582, '3PAr': 0.794, 'FTr': 0.074, 'ORB%': 3.2, 'DRB%': 11.5, 'TRB%': 7.4, 'AST%': 8.3, 'STL%': 2.3, 'BLK%': 2.6, 'TOV%': 10.7, 'USG%': 14.0, 'OWS': 1.6, 'DWS': 3.1, 'WS': 4.6, 'WS/48': 0.115, 'OBPM': 0.4, 'DBPM': 1.5, 'BPM': 1.9, 'VORP': 1.9}
[adv] Andre Iguodala → {'PER': 9.2, 'TS%': 0.519, '3PAr': 0.734, 'FTr': 0.153, 'ORB%': 3.3, 'DRB%': 15.4, 'TRB%': 9.4, 'AST%': 14.3, 'ST

2020-21 workers:  28%|██▊       | 157/556 [00:00<00:01, 391.65it/s][A


[adv] De'Aaron Fox → {'PER': 20.7, 'TS%': 0.565, '3PAr': 0.288, 'FTr': 0.376, 'ORB%': 1.8, 'DRB%': 9.4, 'TRB%': 5.5, 'AST%': 32.7, 'STL%': 2.1, 'BLK%': 1.1, 'TOV%': 11.9, 'USG%': 31.0, 'OWS': 3.4, 'DWS': 0.8, 'WS': 4.2, 'WS/48': 0.098, 'OBPM': 3.1, 'DBPM': -1.3, 'BPM': 1.8, 'VORP': 2.0}
[adv] Mason Plumlee → {'PER': 18.7, 'TS%': 0.638, '3PAr': 0.018, 'FTr': 0.444, 'ORB%': 10.6, 'DRB%': 28.1, 'TRB%': 19.3, 'AST%': 21.0, 'STL%': 1.4, 'BLK%': 2.9, 'TOV%': 18.7, 'USG%': 16.3, 'OWS': 3.0, 'DWS': 2.1, 'WS': 5.1, 'WS/48': 0.164, 'OBPM': 0.2, 'DBPM': 2.0, 'BPM': 2.2, 'VORP': 1.6}
[adv] Lou Williams → {'PER': 14.0, 'TS%': 0.519, '3PAr': 0.268, 'FTr': 0.268, 'ORB%': 1.7, 'DRB%': 8.9, 'TRB%': 5.4, 'AST%': 23.4, 'STL%': 1.6, 'BLK%': 0.4, 'TOV%': 13.0, 'USG%': 25.7, 'OWS': 0.5, 'DWS': 1.2, 'WS': 1.7, 'WS/48': 0.056, 'OBPM': -1.0, 'DBPM': -1.5, 'BPM': -2.5, 'VORP': -0.2}
[adv] P.J. Tucker → {'PER': 5.5, 'TS%': 0.51, '3PAr': 0.672, 'FTr': 0.158, 'ORB%': 3.9, 'DRB%': 12.3, 'TRB%': 8.0, 'AST%': 5.7, 'S

2020-21 workers:  44%|████▍     | 244/556 [00:00<00:00, 413.05it/s][A

[adv] Dorian Finney-Smith → {'PER': 12.0, 'TS%': 0.609, '3PAr': 0.648, 'FTr': 0.088, 'ORB%': 5.7, 'DRB%': 13.0, 'TRB%': 9.3, 'AST%': 7.2, 'STL%': 1.3, 'BLK%': 1.2, 'TOV%': 9.0, 'USG%': 12.2, 'OWS': 2.8, 'DWS': 1.6, 'WS': 4.4, 'WS/48': 0.11, 'OBPM': 0.2, 'DBPM': 0.0, 'BPM': 0.1, 'VORP': 1.0}
[adv] Terrance Ferguson → {'PER': -7.1, 'TS%': 0.143, '3PAr': 0.714, 'FTr': 0.0, 'ORB%': 0.0, 'DRB%': 2.2, 'TRB%': 1.1, 'AST%': 4.9, 'STL%': 1.0, 'BLK%': 0.0, 'TOV%': 36.4, 'USG%': 9.7, 'OWS': -0.2, 'DWS': 0.0, 'WS': -0.2, 'WS/48': -0.161, 'OBPM': -12.1, 'DBPM': -2.2, 'BPM': -14.3, 'VORP': -0.2}
[adv] Jevon Carter → {'PER': 12.2, 'TS%': 0.548, '3PAr': 0.677, 'FTr': 0.031, 'ORB%': 2.6, 'DRB%': 11.8, 'TRB%': 7.3, 'AST%': 13.0, 'STL%': 2.0, 'BLK%': 1.1, 'TOV%': 6.6, 'USG%': 15.0, 'OWS': 0.7, 'DWS': 0.8, 'WS': 1.6, 'WS/48': 0.104, 'OBPM': -0.4, 'DBPM': 0.9, 'BPM': 0.5, 'VORP': 0.5}
[adv] Miles Bridges → {'PER': 14.7, 'TS%': 0.625, '3PAr': 0.466, 'FTr': 0.182, 'ORB%': 4.5, 'DRB%': 18.0, 'TRB%': 11.1, 'AS




[adv] Damyean Dotson → {'PER': 8.1, 'TS%': 0.499, '3PAr': 0.543, 'FTr': 0.123, 'ORB%': 0.6, 'DRB%': 11.1, 'TRB%': 5.7, 'AST%': 15.1, 'STL%': 0.9, 'BLK%': 0.4, 'TOV%': 12.0, 'USG%': 16.9, 'OWS': -0.4, 'DWS': 0.3, 'WS': 0.0, 'WS/48': -0.002, 'OBPM': -3.4, 'DBPM': -1.4, 'BPM': -4.8, 'VORP': -0.6}[adv] Gary Clark → {'PER': 6.0, 'TS%': 0.436, '3PAr': 0.818, 'FTr': 0.076, 'ORB%': 5.1, 'DRB%': 13.5, 'TRB%': 9.1, 'AST%': 6.4, 'STL%': 1.0, 'BLK%': 1.0, 'TOV%': 11.1, 'USG%': 10.1, 'OWS': -0.3, 'DWS': 0.4, 'WS': 0.1, 'WS/48': 0.008, 'OBPM': -4.2, 'DBPM': -0.7, 'BPM': -4.9, 'VORP': -0.5}
[adv] Kenrich Williams → {'PER': 14.0, 'TS%': 0.599, '3PAr': 0.285, 'FTr': 0.153, 'ORB%': 6.1, 'DRB%': 14.0, 'TRB%': 10.0, 'AST%': 16.4, 'STL%': 1.8, 'BLK%': 1.1, 'TOV%': 14.8, 'USG%': 15.3, 'OWS': 1.6, 'DWS': 1.2, 'WS': 2.8, 'WS/48': 0.096, 'OBPM': -1.2, 'DBPM': 0.5, 'BPM': -0.7, 'VORP': 0.4}

[adv] Cameron Payne → {'PER': 17.4, 'TS%': 0.602, '3PAr': 0.42, 'FTr': 0.142, 'ORB%': 1.7, 'DRB%': 13.2, 'TRB%': 7.6, 'AS

2020-21 workers:  78%|███████▊  | 436/556 [00:01<00:00, 466.43it/s][A

[adv] Terence Davis → {'PER': 12.7, 'TS%': 0.555, '3PAr': 0.586, 'FTr': 0.121, 'ORB%': 2.0, 'DRB%': 14.1, 'TRB%': 7.9, 'AST%': 11.4, 'STL%': 2.0, 'BLK%': 1.2, 'TOV%': 11.6, 'USG%': 21.8, 'OWS': 0.1, 'DWS': 0.8, 'WS': 0.8, 'WS/48': 0.038, 'OBPM': -0.9, 'DBPM': -0.9, 'BPM': -1.7, 'VORP': 0.1}[adv] Miye Oni → {'PER': 6.4, 'TS%': 0.517, '3PAr': 0.889, 'FTr': 0.061, 'ORB%': 4.9, 'DRB%': 11.9, 'TRB%': 8.6, 'AST%': 6.8, 'STL%': 1.0, 'BLK%': 1.2, 'TOV%': 13.6, 'USG%': 9.8, 'OWS': 0.2, 'DWS': 0.7, 'WS': 0.9, 'WS/48': 0.081, 'OBPM': -3.0, 'DBPM': 1.3, 'BPM': -1.7, 'VORP': 0.0}

[adv] Isaiah Roby → {'PER': 11.9, 'TS%': 0.555, '3PAr': 0.254, 'FTr': 0.273, 'ORB%': 6.4, 'DRB%': 18.6, 'TRB%': 12.4, 'AST%': 11.4, 'STL%': 1.7, 'BLK%': 2.3, 'TOV%': 19.0, 'USG%': 17.7, 'OWS': -0.3, 'DWS': 1.5, 'WS': 1.3, 'WS/48': 0.043, 'OBPM': -3.7, 'DBPM': 0.4, 'BPM': -3.3, 'VORP': -0.5}
[adv] Nic Claxton → {'PER': 16.9, 'TS%': 0.612, '3PAr': 0.034, 'FTr': 0.441, 'ORB%': 8.4, 'DRB%': 21.8, 'TRB%': 15.4, 'AST%': 6.4, 'S



[adv] Cj Elleby → {'PER': 9.4, 'TS%': 0.468, '3PAr': 0.515, 'FTr': 0.227, 'ORB%': 6.0, 'DRB%': 12.0, 'TRB%': 8.9, 'AST%': 7.1, 'STL%': 1.5, 'BLK%': 1.4, 'TOV%': 6.4, 'USG%': 17.4, 'OWS': 0.0, 'DWS': 0.1, 'WS': 0.1, 'WS/48': 0.032, 'OBPM': -4.0, 'DBPM': -0.9, 'BPM': -4.9, 'VORP': -0.1}
[adv] Ignas Brazdeikis → {'PER': 9.5, 'TS%': 0.51, '3PAr': 0.337, 'FTr': 0.169, 'ORB%': 4.5, 'DRB%': 14.7, 'TRB%': 9.4, 'AST%': 10.5, 'STL%': 0.8, 'BLK%': 1.1, 'TOV%': 11.9, 'USG%': 17.6, 'OWS': -0.1, 'DWS': 0.1, 'WS': 0.1, 'WS/48': 0.016, 'OBPM': -3.2, 'DBPM': -2.1, 'BPM': -5.3, 'VORP': -0.2}
[adv] Kelan Martin → {'PER': 13.9, 'TS%': 0.551, '3PAr': 0.435, 'FTr': 0.072, 'ORB%': 6.1, 'DRB%': 20.1, 'TRB%': 13.1, 'AST%': 7.5, 'STL%': 1.5, 'BLK%': 2.5, 'TOV%': 10.1, 'USG%': 20.9, 'OWS': 0.1, 'DWS': 0.4, 'WS': 0.4, 'WS/48': 0.066, 'OBPM': -1.1, 'DBPM': -1.3, 'BPM': -2.4, 'VORP': 0.0}
[adv] Deividas Sirvydis → {'PER': 5.7, 'TS%': 0.481, '3PAr': 0.7, 'FTr': 0.15, 'ORB%': 0.0, 'DRB%': 24.5, 'TRB%': 12.1, 'AST%': 

2020-21 workers: 100%|██████████| 556/556 [00:16<00:00, 33.13it/s]

[adv] Charlie Brown Jr. → {'PER': 4.1, 'TS%': 0.422, '3PAr': 0.488, 'FTr': 0.233, 'ORB%': 2.0, 'DRB%': 9.7, 'TRB%': 5.8, 'AST%': 8.2, 'STL%': 1.3, 'BLK%': 1.2, 'TOV%': 11.2, 'USG%': 14.9, 'OWS': -0.2, 'DWS': 0.1, 'WS': -0.1, 'WS/48': -0.032, 'OBPM': -7.4, 'DBPM': -1.3, 'BPM': -8.7, 'VORP': -0.3}[adv] Elijah Bryant → {'PER': 10.6, 'TS%': 0.559, '3PAr': 0.385, 'FTr': 0.231, 'ORB%': 6.8, 'DRB%': 12.7, 'TRB%': 9.9, 'AST%': 12.6, 'STL%': 0.0, 'BLK%': 2.8, 'TOV%': 21.8, 'USG%': 24.0, 'OWS': 0.0, 'DWS': 0.0, 'WS': 0.0, 'WS/48': 0.008, 'OBPM': -3.9, 'DBPM': -3.4, 'BPM': -7.3, 'VORP': 0.0}

[adv] Mike James → {'PER': 11.9, 'TS%': 0.481, '3PAr': 0.337, 'FTr': 0.293, 'ORB%': 1.5, 'DRB%': 12.9, 'TRB%': 7.5, 'AST%': 30.6, 'STL%': 1.2, 'BLK%': 0.4, 'TOV%': 16.1, 'USG%': 22.9, 'OWS': 0.0, 'DWS': 0.1, 'WS': 0.1, 'WS/48': 0.028, 'OBPM': -2.1, 'DBPM': -2.3, 'BPM': -4.4, 'VORP': -0.1}
[dbg] 2020-21 processed players: 460
[dbg] 2020-21 before standings merge: 460
[dbg] 2020-21 after standings merge: 460
[


Seasons:  56%|█████▌    | 5/9 [01:50<01:40, 25.20s/it]

[dbg] 2020-21 after injury merge: 457
Percentage calculations completed
[dbg] 2020-21 final merged: 457
[fetch] https://hoopshype.com/salaries/2021-22/ (attempt 1)
  -> HTTP 404, skipping.
[fetch] https://hoopshype.com/salaries/2021-2022/ (attempt 1)
  -> HTTP 404, skipping.
[fetch] https://www.espn.com/nba/salaries/_/type/team/year/2022 (attempt 1)
  -> ESPN team salary fallback added 40 rows
                    Team  Team_Salary   Season
0      Stephen Curry, PG     45780966  2021-22
1       James Harden, SG     44310840  2021-22
2          John Wall, PG     44310840  2021-22
3  Russell Westbrook, PG     44211146  2021-22
4       Kevin Durant, PF     42018900  2021-22
[fetch_season_players] 639 players for 2021-22
[adv] fetching https://www.basketball-reference.com/leagues/NBA_2022_advanced.html





[adv] James Harden → {'PER': 20.9, 'TS%': 0.583, '3PAr': 0.452, 'FTr': 0.54, 'ORB%': 2.6, 'DRB%': 19.7, 'TRB%': 11.3, 'AST%': 40.3, 'STL%': 1.7, 'BLK%': 1.3, 'TOV%': 18.8, 'USG%': 27.2, 'OWS': 4.9, 'DWS': 2.7, 'WS': 7.6, 'WS/48': 0.152, 'OBPM': 3.7, 'DBPM': 0.3, 'BPM': 4.0, 'VORP': 3.7}
[adv] Stephen Curry → {'PER': 21.4, 'TS%': 0.601, '3PAr': 0.613, 'FTr': 0.243, 'ORB%': 1.7, 'DRB%': 14.4, 'TRB%': 8.3, 'AST%': 30.4, 'STL%': 1.9, 'BLK%': 1.0, 'TOV%': 13.2, 'USG%': 30.8, 'OWS': 4.6, 'DWS': 3.4, 'WS': 8.0, 'WS/48': 0.173, 'OBPM': 5.4, 'DBPM': 0.4, 'BPM': 5.8, 'VORP': 4.4}
[adv] Kevin Durant → {'PER': 25.6, 'TS%': 0.634, '3PAr': 0.269, 'FTr': 0.367, 'ORB%': 1.6, 'DRB%': 19.6, 'TRB%': 10.8, 'AST%': 29.1, 'STL%': 1.1, 'BLK%': 2.3, 'TOV%': 12.9, 'USG%': 31.2, 'OWS': 6.4, 'DWS': 2.0, 'WS': 8.4, 'WS/48': 0.198, 'OBPM': 6.4, 'DBPM': 0.7, 'BPM': 7.2, 'VORP': 4.8}
[adv] Lebron James → {'PER': 26.2, 'TS%': 0.619, '3PAr': 0.367, 'FTr': 0.275, 'ORB%': 3.3, 'DRB%': 20.4, 'TRB%': 11.8, 'AST%': 30.6, '

2021-22 workers:  13%|█▎        | 63/497 [00:00<00:01, 311.25it/s][A

[adv] no advanced stats for Bogdan Bogdanovic in 2021-22
[adv] Terry Rozier → {'PER': 17.6, 'TS%': 0.566, '3PAr': 0.508, 'FTr': 0.151, 'ORB%': 2.4, 'DRB%': 11.3, 'TRB%': 6.8, 'AST%': 19.7, 'STL%': 1.8, 'BLK%': 1.0, 'TOV%': 7.2, 'USG%': 23.2, 'OWS': 4.3, 'DWS': 1.6, 'WS': 5.9, 'WS/48': 0.115, 'OBPM': 2.4, 'DBPM': -0.7, 'BPM': 1.7, 'VORP': 2.3}
[adv] Ricky Rubio → {'PER': 14.0, 'TS%': 0.488, '3PAr': 0.423, 'FTr': 0.251, 'ORB%': 1.7, 'DRB%': 14.1, 'TRB%': 8.0, 'AST%': 34.6, 'STL%': 2.5, 'BLK%': 0.6, 'TOV%': 16.5, 'USG%': 24.9, 'OWS': -0.1, 'DWS': 1.4, 'WS': 1.3, 'WS/48': 0.064, 'OBPM': -1.0, 'DBPM': 1.2, 'BPM': 0.2, 'VORP': 0.5}
[adv] Caris Levert → {'PER': 15.1, 'TS%': 0.525, '3PAr': 0.3, 'FTr': 0.226, 'ORB%': 2.7, 'DRB%': 10.7, 'TRB%': 6.7, 'AST%': 22.0, 'STL%': 1.4, 'BLK%': 1.1, 'TOV%': 10.3, 'USG%': 25.4, 'OWS': 1.2, 'DWS': 1.0, 'WS': 2.2, 'WS/48': 0.059, 'OBPM': 0.0, 'DBPM': -1.2, 'BPM': -1.2, 'VORP': 0.4}
[adv] Joe Harris → {'PER': 9.6, 'TS%': 0.624, '3PAr': 0.71, 'FTr': 0.048, 'ORB




[adv] Jakob Poeltl → {'PER': 20.8, 'TS%': 0.613, '3PAr': 0.002, 'FTr': 0.287, 'ORB%': 13.9, 'DRB%': 20.0, 'TRB%': 16.9, 'AST%': 14.0, 'STL%': 1.1, 'BLK%': 5.0, 'TOV%': 12.7, 'USG%': 18.3, 'OWS': 4.4, 'DWS': 2.5, 'WS': 6.9, 'WS/48': 0.168, 'OBPM': 0.9, 'DBPM': 0.7, 'BPM': 1.6, 'VORP': 1.7}
[adv] Royce O'Neale → {'PER': 10.3, 'TS%': 0.608, '3PAr': 0.689, 'FTr': 0.126, 'ORB%': 3.0, 'DRB%': 13.4, 'TRB%': 8.4, 'AST%': 10.5, 'STL%': 1.8, 'BLK%': 1.2, 'TOV%': 14.4, 'USG%': 9.9, 'OWS': 2.7, 'DWS': 2.8, 'WS': 5.5, 'WS/48': 0.109, 'OBPM': -0.8, 'DBPM': 1.7, 'BPM': 0.8, 'VORP': 1.7}
[adv] Thomas Bryant → {'PER': 16.6, 'TS%': 0.607, '3PAr': 0.284, 'FTr': 0.27, 'ORB%': 6.7, 'DRB%': 20.4, 'TRB%': 13.6, 'AST%': 7.9, 'STL%': 0.7, 'BLK%': 4.1, 'TOV%': 10.3, 'USG%': 18.7, 'OWS': 0.7, 'DWS': 0.4, 'WS': 1.1, 'WS/48': 0.118, 'OBPM': -0.4, 'DBPM': -0.7, 'BPM': -1.0, 'VORP': 0.1}
[adv] Alex Caruso → {'PER': 11.7, 'TS%': 0.528, '3PAr': 0.492, 'FTr': 0.285, 'ORB%': 3.3, 'DRB%': 11.2, 'TRB%': 7.3, 'AST%': 18.5,

2021-22 workers:  44%|████▍     | 220/497 [00:00<00:00, 371.61it/s][A

[adv] Ziaire Williams → {'PER': 9.7, 'TS%': 0.559, '3PAr': 0.571, 'FTr': 0.13, 'ORB%': 2.0, 'DRB%': 8.2, 'TRB%': 5.0, 'AST%': 6.3, 'STL%': 1.2, 'BLK%': 0.8, 'TOV%': 8.4, 'USG%': 14.9, 'OWS': 1.1, 'DWS': 1.1, 'WS': 2.2, 'WS/48': 0.08, 'OBPM': -2.4, 'DBPM': -0.6, 'BPM': -3.0, 'VORP': -0.3}
[adv] Lonnie Walker Iv → {'PER': 12.3, 'TS%': 0.512, '3PAr': 0.462, 'FTr': 0.194, 'ORB%': 1.2, 'DRB%': 10.7, 'TRB%': 5.9, 'AST%': 13.4, 'STL%': 1.3, 'BLK%': 1.0, 'TOV%': 7.9, 'USG%': 23.6, 'OWS': 0.1, 'DWS': 1.1, 'WS': 1.2, 'WS/48': 0.037, 'OBPM': -1.2, 'DBPM': -1.3, 'BPM': -2.5, 'VORP': -0.2}
[adv] Cameron Johnson → {'PER': 15.2, 'TS%': 0.625, '3PAr': 0.644, 'FTr': 0.188, 'ORB%': 2.7, 'DRB%': 14.0, 'TRB%': 8.5, 'AST%': 7.7, 'STL%': 1.6, 'BLK%': 0.8, 'TOV%': 6.7, 'USG%': 17.5, 'OWS': 3.2, 'DWS': 2.4, 'WS': 5.6, 'WS/48': 0.156, 'OBPM': 2.0, 'DBPM': 0.7, 'BPM': 2.7, 'VORP': 2.1}
[adv] Bobby Portis → {'PER': 17.7, 'TS%': 0.571, '3PAr': 0.388, 'FTr': 0.13, 'ORB%': 9.6, 'DRB%': 24.4, 'TRB%': 17.2, 'AST%': 6



[adv] Josh Christopher → {'PER': 12.2, 'TS%': 0.534, '3PAr': 0.378, 'FTr': 0.226, 'ORB%': 4.3, 'DRB%': 11.1, 'TRB%': 7.7, 'AST%': 17.3, 'STL%': 2.3, 'BLK%': 0.9, 'TOV%': 17.2, 'USG%': 21.1, 'OWS': -0.4, 'DWS': 0.7, 'WS': 0.3, 'WS/48': 0.01, 'OBPM': -2.2, 'DBPM': -1.3, 'BPM': -3.5, 'VORP': -0.5}
[adv] Dylan Windler → {'PER': 8.8, 'TS%': 0.519, '3PAr': 0.714, 'FTr': 0.184, 'ORB%': 5.2, 'DRB%': 16.0, 'TRB%': 10.7, 'AST%': 10.0, 'STL%': 1.5, 'BLK%': 0.6, 'TOV%': 13.1, 'USG%': 11.8, 'OWS': 0.2, 'DWS': 0.6, 'WS': 0.8, 'WS/48': 0.084, 'OBPM': -3.0, 'DBPM': 1.2, 'BPM': -1.7, 'VORP': 0.0}
[adv] John Konchar → {'PER': 14.0, 'TS%': 0.615, '3PAr': 0.481, 'FTr': 0.187, 'ORB%': 6.9, 'DRB%': 20.1, 'TRB%': 13.3, 'AST%': 10.6, 'STL%': 1.7, 'BLK%': 1.5, 'TOV%': 9.3, 'USG%': 9.9, 'OWS': 2.4, 'DWS': 1.8, 'WS': 4.2, 'WS/48': 0.155, 'OBPM': 0.2, 'DBPM': 1.3, 'BPM': 1.5, 'VORP': 1.2}
[adv] Immanuel Quickley → {'PER': 14.6, 'TS%': 0.543, '3PAr': 0.547, 'FTr': 0.278, 'ORB%': 1.8, 'DRB%': 13.1, 'TRB%': 7.4, 'AS



[adv] Otto Porter Jr. → {'PER': 15.9, 'TS%': 0.581, '3PAr': 0.519, 'FTr': 0.147, 'ORB%': 6.9, 'DRB%': 21.0, 'TRB%': 14.2, 'AST%': 9.6, 'STL%': 2.4, 'BLK%': 2.0, 'TOV%': 7.7, 'USG%': 15.0, 'OWS': 2.2, 'DWS': 2.7, 'WS': 4.9, 'WS/48': 0.168, 'OBPM': 1.0, 'DBPM': 2.1, 'BPM': 3.1, 'VORP': 1.8}
[adv] Hassan Whiteside → {'PER': 22.9, 'TS%': 0.662, '3PAr': 0.0, 'FTr': 0.506, 'ORB%': 16.2, 'DRB%': 29.9, 'TRB%': 23.3, 'AST%': 3.4, 'STL%': 0.9, 'BLK%': 7.8, 'TOV%': 11.6, 'USG%': 17.1, 'OWS': 3.4, 'DWS': 2.3, 'WS': 5.8, 'WS/48': 0.238, 'OBPM': 0.6, 'DBPM': 1.4, 'BPM': 2.1, 'VORP': 1.2}
[adv] Malik Monk → {'PER': 14.4, 'TS%': 0.597, '3PAr': 0.535, 'FTr': 0.142, 'ORB%': 1.9, 'DRB%': 11.0, 'TRB%': 6.4, 'AST%': 15.2, 'STL%': 1.4, 'BLK%': 1.2, 'TOV%': 12.2, 'USG%': 20.1, 'OWS': 2.3, 'DWS': 1.4, 'WS': 3.6, 'WS/48': 0.081, 'OBPM': 0.5, 'DBPM': -1.0, 'BPM': -0.5, 'VORP': 0.8}
[adv] Kent Bazemore → {'PER': 6.0, 'TS%': 0.451, '3PAr': 0.576, 'FTr': 0.122, 'ORB%': 2.6, 'DRB%': 11.0, 'TRB%': 6.8, 'AST%': 8.1, 



[adv] Usman Garuba → {'PER': 12.9, 'TS%': 0.51, '3PAr': 0.455, 'FTr': 0.159, 'ORB%': 9.6, 'DRB%': 28.7, 'TRB%': 19.1, 'AST%': 9.6, 'STL%': 2.0, 'BLK%': 4.1, 'TOV%': 11.3, 'USG%': 9.4, 'OWS': 0.2, 'DWS': 0.3, 'WS': 0.5, 'WS/48': 0.105, 'OBPM': -1.8, 'DBPM': 0.9, 'BPM': -0.9, 'VORP': 0.1}
[adv] Omer Yurtseven → {'PER': 17.4, 'TS%': 0.546, '3PAr': 0.045, 'FTr': 0.247, 'ORB%': 13.9, 'DRB%': 33.0, 'TRB%': 23.6, 'AST%': 11.0, 'STL%': 1.2, 'BLK%': 3.0, 'TOV%': 13.0, 'USG%': 19.9, 'OWS': 0.8, 'DWS': 1.4, 'WS': 2.1, 'WS/48': 0.145, 'OBPM': -1.4, 'DBPM': 0.4, 'BPM': -1.0, 'VORP': 0.2}
[adv] Leandro Bolmaro → {'PER': 5.4, 'TS%': 0.419, '3PAr': 0.333, 'FTr': 0.241, 'ORB%': 6.9, 'DRB%': 12.3, 'TRB%': 9.6, 'AST%': 11.0, 'STL%': 1.2, 'BLK%': 0.0, 'TOV%': 17.9, 'USG%': 12.6, 'OWS': -0.1, 'DWS': 0.2, 'WS': 0.0, 'WS/48': 0.004, 'OBPM': -5.1, 'DBPM': -0.7, 'BPM': -5.8, 'VORP': -0.2}
[adv] Jt Thor → {'PER': 9.7, 'TS%': 0.525, '3PAr': 0.491, 'FTr': 0.364, 'ORB%': 4.4, 'DRB%': 13.1, 'TRB%': 8.7, 'AST%': 9.2



[adv] Ayo Dosunmu → {'PER': 11.2, 'TS%': 0.596, '3PAr': 0.348, 'FTr': 0.146, 'ORB%': 1.4, 'DRB%': 9.9, 'TRB%': 5.7, 'AST%': 16.5, 'STL%': 1.4, 'BLK%': 1.2, 'TOV%': 15.9, 'USG%': 14.1, 'OWS': 1.7, 'DWS': 1.3, 'WS': 3.0, 'WS/48': 0.069, 'OBPM': -1.9, 'DBPM': -0.1, 'BPM': -2.0, 'VORP': 0.0}
[adv] Vit Krejci → {'PER': 8.5, 'TS%': 0.529, '3PAr': 0.587, 'FTr': 0.132, 'ORB%': 2.6, 'DRB%': 12.6, 'TRB%': 7.5, 'AST%': 11.9, 'STL%': 1.3, 'BLK%': 1.1, 'TOV%': 14.1, 'USG%': 12.9, 'OWS': 0.1, 'DWS': 0.5, 'WS': 0.6, 'WS/48': 0.042, 'OBPM': -4.3, 'DBPM': -1.0, 'BPM': -5.3, 'VORP': -0.6}
[adv] Stanley Johnson → {'PER': 10.5, 'TS%': 0.564, '3PAr': 0.418, 'FTr': 0.295, 'ORB%': 3.4, 'DRB%': 11.9, 'TRB%': 7.6, 'AST%': 9.8, 'STL%': 1.9, 'BLK%': 1.1, 'TOV%': 11.0, 'USG%': 12.5, 'OWS': 0.9, 'DWS': 0.9, 'WS': 1.8, 'WS/48': 0.079, 'OBPM': -2.8, 'DBPM': 0.1, 'BPM': -2.7, 'VORP': -0.2}
[adv] Bismack Biyombo → {'PER': 17.3, 'TS%': 0.596, '3PAr': 0.0, 'FTr': 0.49, 'ORB%': 13.7, 'DRB%': 21.5, 'TRB%': 17.7, 'AST%': 6



[adv] Daishen Nix → {'PER': 8.1, 'TS%': 0.48, '3PAr': 0.388, 'FTr': 0.448, 'ORB%': 2.9, 'DRB%': 11.0, 'TRB%': 6.9, 'AST%': 21.5, 'STL%': 2.7, 'BLK%': 0.0, 'TOV%': 24.5, 'USG%': 17.3, 'OWS': -0.3, 'DWS': 0.1, 'WS': -0.2, 'WS/48': -0.033, 'OBPM': -5.4, 'DBPM': -1.0, 'BPM': -6.4, 'VORP': -0.3}
[adv] Keifer Sykes → {'PER': 5.9, 'TS%': 0.456, '3PAr': 0.474, 'FTr': 0.089, 'ORB%': 1.5, 'DRB%': 7.4, 'TRB%': 4.4, 'AST%': 14.9, 'STL%': 1.0, 'BLK%': 0.6, 'TOV%': 14.3, 'USG%': 17.4, 'OWS': -0.5, 'DWS': 0.1, 'WS': -0.5, 'WS/48': -0.04, 'OBPM': -4.4, 'DBPM': -2.4, 'BPM': -6.9, 'VORP': -0.7}
[adv] Rayjon Tucker → {'PER': 17.7, 'TS%': 0.869, '3PAr': 0.556, 'FTr': 0.778, 'ORB%': 4.7, 'DRB%': 7.5, 'TRB%': 6.1, 'AST%': 18.1, 'STL%': 2.7, 'BLK%': 0.0, 'TOV%': 7.6, 'USG%': 7.9, 'OWS': 0.3, 'DWS': 0.1, 'WS': 0.4, 'WS/48': 0.243, 'OBPM': 2.3, 'DBPM': 1.2, 'BPM': 3.5, 'VORP': 0.1}
[adv] Juwan Morgan → {'PER': 8.3, 'TS%': 0.833, '3PAr': 0.667, 'FTr': 0.0, 'ORB%': 6.6, 'DRB%': 7.4, 'TRB%': 7.0, 'AST%': 4.2, 'ST

2021-22 workers: 100%|██████████| 497/497 [00:31<00:00, 15.57it/s]

[adv] Devin Cannady → {'PER': 8.6, 'TS%': 0.531, '3PAr': 0.841, 'FTr': 0.159, 'ORB%': 0.7, 'DRB%': 3.7, 'TRB%': 2.2, 'AST%': 10.0, 'STL%': 1.7, 'BLK%': 1.9, 'TOV%': 9.6, 'USG%': 15.5, 'OWS': 0.0, 'DWS': 0.1, 'WS': 0.1, 'WS/48': 0.033, 'OBPM': -2.8, 'DBPM': -0.4, 'BPM': -3.2, 'VORP': 0.0}
[adv] Xavier Sneed → {'PER': 0.4, 'TS%': 0.25, '3PAr': 0.8, 'FTr': 0.0, 'ORB%': 5.5, 'DRB%': 10.9, 'TRB%': 8.3, 'AST%': 3.3, 'STL%': 0.0, 'BLK%': 0.0, 'TOV%': 0.0, 'USG%': 11.0, 'OWS': -0.1, 'DWS': 0.0, 'WS': 0.0, 'WS/48': -0.056, 'OBPM': -6.9, 'DBPM': -3.7, 'BPM': -10.6, 'VORP': -0.1}
[adv] Ish Wainright → {'PER': 9.4, 'TS%': 0.494, '3PAr': 0.567, 'FTr': 0.115, 'ORB%': 8.3, 'DRB%': 7.9, 'TRB%': 8.1, 'AST%': 5.3, 'STL%': 2.7, 'BLK%': 1.5, 'TOV%': 10.6, 'USG%': 14.7, 'OWS': 0.0, 'DWS': 0.6, 'WS': 0.6, 'WS/48': 0.079, 'OBPM': -3.0, 'DBPM': 1.0, 'BPM': -2.0, 'VORP': 0.0}
[dbg] 2021-22 processed players: 468
[dbg] 2021-22 before standings merge: 468
[dbg] 2021-22 after standings merge: 468
[dbg] 2021-22 be


Seasons:  67%|██████▋   | 6/9 [02:51<01:52, 37.53s/it]

[dbg] 2021-22 after injury merge: 464
Percentage calculations completed
[dbg] 2021-22 final merged: 464
[fetch] https://hoopshype.com/salaries/2022-23/ (attempt 1)
  -> HTTP 404, skipping.
[fetch] https://hoopshype.com/salaries/2022-2023/ (attempt 1)
  -> HTTP 404, skipping.
[fetch] https://www.espn.com/nba/salaries/_/type/team/year/2023 (attempt 1)
  -> ESPN team salary fallback added 40 rows
                        Team  Team_Salary   Season
0          Stephen Curry, PG     48070014  2022-23
1           LeBron James, SF     44474988  2022-23
2           Kevin Durant, PF     44119845  2022-23
3           Bradley Beal, SG     43279250  2022-23
4  Giannis Antetokounmpo, PF     42492492  2022-23
[fetch_season_players] 574 players for 2022-23
[adv] fetching https://www.basketball-reference.com/leagues/NBA_2023_advanced.html





[adv] Lebron James → {'PER': 23.9, 'TS%': 0.583, '3PAr': 0.309, 'FTr': 0.268, 'ORB%': 3.7, 'DRB%': 20.8, 'TRB%': 12.5, 'AST%': 33.5, 'STL%': 1.2, 'BLK%': 1.4, 'TOV%': 11.6, 'USG%': 33.3, 'OWS': 3.2, 'DWS': 2.4, 'WS': 5.6, 'WS/48': 0.138, 'OBPM': 5.5, 'DBPM': 0.6, 'BPM': 6.1, 'VORP': 4.0}
[adv] Stephen Curry → {'PER': 24.1, 'TS%': 0.656, '3PAr': 0.564, 'FTr': 0.248, 'ORB%': 2.3, 'DRB%': 16.8, 'TRB%': 9.7, 'AST%': 30.0, 'STL%': 1.3, 'BLK%': 0.9, 'TOV%': 12.5, 'USG%': 31.0, 'OWS': 5.8, 'DWS': 2.0, 'WS': 7.8, 'WS/48': 0.192, 'OBPM': 7.5, 'DBPM': 0.1, 'BPM': 7.5, 'VORP': 4.7}
[adv] Kevin Durant → {'PER': 25.9, 'TS%': 0.677, '3PAr': 0.267, 'FTr': 0.387, 'ORB%': 1.2, 'DRB%': 19.5, 'TRB%': 10.5, 'AST%': 24.5, 'STL%': 1.0, 'BLK%': 3.4, 'TOV%': 13.4, 'USG%': 30.7, 'OWS': 4.7, 'DWS': 2.1, 'WS': 6.8, 'WS/48': 0.194, 'OBPM': 6.0, 'DBPM': 1.2, 'BPM': 7.1, 'VORP': 3.9}
[adv] Bradley Beal → {'PER': 19.7, 'TS%': 0.593, '3PAr': 0.249, 'FTr': 0.26, 'ORB%': 2.8, 'DRB%': 9.9, 'TRB%': 6.5, 'AST%': 26.6, 'ST

2022-23 workers:  16%|█▌        | 84/529 [00:00<00:01, 422.99it/s][A

[adv] Markelle Fultz → {'PER': 16.6, 'TS%': 0.564, '3PAr': 0.128, 'FTr': 0.211, 'ORB%': 4.4, 'DRB%': 10.5, 'TRB%': 7.4, 'AST%': 29.8, 'STL%': 2.4, 'BLK%': 1.4, 'TOV%': 15.8, 'USG%': 21.3, 'OWS': 1.7, 'DWS': 2.0, 'WS': 3.7, 'WS/48': 0.1, 'OBPM': -0.1, 'DBPM': 0.7, 'BPM': 0.5, 'VORP': 1.2}
[adv] Davis Bertans → {'PER': 11.9, 'TS%': 0.618, '3PAr': 0.881, 'FTr': 0.094, 'ORB%': 2.3, 'DRB%': 10.3, 'TRB%': 6.3, 'AST%': 6.2, 'STL%': 1.1, 'BLK%': 1.5, 'TOV%': 5.7, 'USG%': 16.2, 'OWS': 0.6, 'DWS': 0.3, 'WS': 0.9, 'WS/48': 0.09, 'OBPM': 0.0, 'DBPM': -1.3, 'BPM': -1.2, 'VORP': 0.1}
[adv] Lauri Markkanen → {'PER': 22.1, 'TS%': 0.64, '3PAr': 0.446, 'FTr': 0.348, 'ORB%': 6.3, 'DRB%': 20.7, 'TRB%': 13.6, 'AST%': 8.6, 'STL%': 0.9, 'BLK%': 1.4, 'TOV%': 8.8, 'USG%': 26.6, 'OWS': 6.3, 'DWS': 1.9, 'WS': 8.2, 'WS/48': 0.173, 'OBPM': 4.9, 'DBPM': -1.0, 'BPM': 3.8, 'VORP': 3.3}
[adv] no advanced stats for Marcus Morris Sr. in 2022-23
[adv] no advanced stats for Jusuf Nurkic in 2022-23
[adv] Malik Beasley → {'




[adv] Jalen Suggs → {'PER': 12.5, 'TS%': 0.528, '3PAr': 0.455, 'FTr': 0.268, 'ORB%': 5.0, 'DRB%': 9.6, 'TRB%': 7.3, 'AST%': 18.0, 'STL%': 2.6, 'BLK%': 2.1, 'TOV%': 15.9, 'USG%': 20.3, 'OWS': -0.2, 'DWS': 1.5, 'WS': 1.4, 'WS/48': 0.053, 'OBPM': -1.8, 'DBPM': 0.8, 'BPM': -1.0, 'VORP': 0.3}
[adv] Khem Birch → {'PER': 9.4, 'TS%': 0.629, '3PAr': 0.063, 'FTr': 0.156, 'ORB%': 5.2, 'DRB%': 12.8, 'TRB%': 8.7, 'AST%': 5.8, 'STL%': 1.5, 'BLK%': 3.0, 'TOV%': 19.0, 'USG%': 11.1, 'OWS': 0.1, 'DWS': 0.2, 'WS': 0.2, 'WS/48': 0.072, 'OBPM': -4.6, 'DBPM': 1.1, 'BPM': -3.5, 'VORP': -0.1}
[adv] Jaxson Hayes → {'PER': 13.7, 'TS%': 0.605, '3PAr': 0.186, 'FTr': 0.532, 'ORB%': 7.3, 'DRB%': 17.3, 'TRB%': 12.3, 'AST%': 7.7, 'STL%': 1.6, 'BLK%': 3.0, 'TOV%': 13.9, 'USG%': 15.7, 'OWS': 0.6, 'DWS': 0.9, 'WS': 1.5, 'WS/48': 0.118, 'OBPM': -2.2, 'DBPM': 1.0, 'BPM': -1.2, 'VORP': 0.1}
[adv] Bennedict Mathurin → {'PER': 13.1, 'TS%': 0.566, '3PAr': 0.326, 'FTr': 0.477, 'ORB%': 4.4, 'DRB%': 11.5, 'TRB%': 7.9, 'AST%': 7.

2022-23 workers:  51%|█████     | 270/529 [00:00<00:00, 458.12it/s][A


[adv] Kevin Porter Jr. → {'PER': 16.2, 'TS%': 0.565, '3PAr': 0.436, 'FTr': 0.299, 'ORB%': 4.1, 'DRB%': 13.1, 'TRB%': 8.5, 'AST%': 25.7, 'STL%': 2.0, 'BLK%': 0.8, 'TOV%': 15.8, 'USG%': 24.3, 'OWS': 1.9, 'DWS': 1.0, 'WS': 2.9, 'WS/48': 0.068, 'OBPM': 1.4, 'DBPM': -0.8, 'BPM': 0.6, 'VORP': 1.3}
[adv] Trey Murphy Iii → {'PER': 15.2, 'TS%': 0.65, '3PAr': 0.621, 'FTr': 0.238, 'ORB%': 2.7, 'DRB%': 10.3, 'TRB%': 6.5, 'AST%': 6.4, 'STL%': 1.8, 'BLK%': 1.7, 'TOV%': 6.5, 'USG%': 16.6, 'OWS': 4.8, 'DWS': 2.8, 'WS': 7.6, 'WS/48': 0.15, 'OBPM': 1.4, 'DBPM': 0.4, 'BPM': 1.8, 'VORP': 2.3}
[adv] Aleksej Pokusevski → {'PER': 13.4, 'TS%': 0.522, '3PAr': 0.418, 'FTr': 0.141, 'ORB%': 6.8, 'DRB%': 17.8, 'TRB%': 12.2, 'AST%': 12.4, 'STL%': 1.4, 'BLK%': 5.7, 'TOV%': 14.0, 'USG%': 18.3, 'OWS': 0.0, 'DWS': 1.0, 'WS': 1.0, 'WS/48': 0.066, 'OBPM': -1.0, 'DBPM': 1.0, 'BPM': 0.0, 'VORP': 0.3}
[adv] Simone Fontecchio → {'PER': 8.0, 'TS%': 0.495, '3PAr': 0.612, 'FTr': 0.141, 'ORB%': 4.4, 'DRB%': 7.7, 'TRB%': 6.1, 'AS

2022-23 workers:  69%|██████▉   | 364/529 [00:00<00:00, 462.92it/s][A

[adv] Oshae Brissett → {'PER': 10.6, 'TS%': 0.52, '3PAr': 0.486, 'FTr': 0.455, 'ORB%': 6.4, 'DRB%': 16.0, 'TRB%': 11.2, 'AST%': 5.4, 'STL%': 1.4, 'BLK%': 1.0, 'TOV%': 8.2, 'USG%': 16.1, 'OWS': 0.4, 'DWS': 0.7, 'WS': 1.1, 'WS/48': 0.05, 'OBPM': -2.0, 'DBPM': -1.2, 'BPM': -3.2, 'VORP': -0.3}
[adv] Bismack Biyombo → {'PER': 14.4, 'TS%': 0.555, '3PAr': 0.0, 'FTr': 0.34, 'ORB%': 11.0, 'DRB%': 22.1, 'TRB%': 16.5, 'AST%': 8.7, 'STL%': 1.0, 'BLK%': 9.0, 'TOV%': 17.7, 'USG%': 14.0, 'OWS': 0.2, 'DWS': 1.6, 'WS': 1.9, 'WS/48': 0.102, 'OBPM': -3.4, 'DBPM': 2.5, 'BPM': -0.9, 'VORP': 0.2}
[adv] Anthony Gill → {'PER': 11.1, 'TS%': 0.604, '3PAr': 0.22, 'FTr': 0.508, 'ORB%': 6.9, 'DRB%': 10.6, 'TRB%': 8.8, 'AST%': 7.2, 'STL%': 0.5, 'BLK%': 1.6, 'TOV%': 8.5, 'USG%': 12.3, 'OWS': 1.0, 'DWS': 0.4, 'WS': 1.3, 'WS/48': 0.101, 'OBPM': -2.4, 'DBPM': -0.9, 'BPM': -3.3, 'VORP': -0.2}
[adv] no advanced stats for Juancho Hernangomez in 2022-23
[adv] Isaiah Joe → {'PER': 14.5, 'TS%': 0.626, '3PAr': 0.776, 'FTr': 0



[adv] Tyty Washington Jr. → {'PER': 7.5, 'TS%': 0.432, '3PAr': 0.5, 'FTr': 0.113, 'ORB%': 0.5, 'DRB%': 11.0, 'TRB%': 5.7, 'AST%': 15.3, 'STL%': 1.7, 'BLK%': 0.5, 'TOV%': 7.7, 'USG%': 17.4, 'OWS': -0.3, 'DWS': 0.1, 'WS': -0.1, 'WS/48': -0.016, 'OBPM': -5.0, 'DBPM': -1.3, 'BPM': -6.3, 'VORP': -0.5}
[adv] Jabari Walker → {'PER': 9.6, 'TS%': 0.496, '3PAr': 0.283, 'FTr': 0.227, 'ORB%': 8.8, 'DRB%': 15.5, 'TRB%': 12.2, 'AST%': 7.8, 'STL%': 0.8, 'BLK%': 1.9, 'TOV%': 11.8, 'USG%': 17.3, 'OWS': -0.1, 'DWS': 0.3, 'WS': 0.2, 'WS/48': 0.015, 'OBPM': -3.9, 'DBPM': -2.1, 'BPM': -6.1, 'VORP': -0.6}
[adv] Josh Minott → {'PER': 17.9, 'TS%': 0.582, '3PAr': 0.167, 'FTr': 0.222, 'ORB%': 5.9, 'DRB%': 22.9, 'TRB%': 14.6, 'AST%': 7.5, 'STL%': 2.0, 'BLK%': 5.4, 'TOV%': 7.1, 'USG%': 18.9, 'OWS': 0.1, 'DWS': 0.2, 'WS': 0.3, 'WS/48': 0.146, 'OBPM': -1.9, 'DBPM': 0.4, 'BPM': -1.6, 'VORP': 0.0}
[adv] Tyrese Martin → {'PER': 7.7, 'TS%': 0.44, '3PAr': 0.304, 'FTr': 0.087, 'ORB%': 8.2, 'DRB%': 11.7, 'TRB%': 10.0, 'AS

2022-23 workers: 100%|██████████| 529/529 [00:16<00:00, 32.25it/s]

[adv] Xavier Cooks → {'PER': 14.3, 'TS%': 0.586, '3PAr': 0.036, 'FTr': 0.357, 'ORB%': 14.7, 'DRB%': 18.7, 'TRB%': 16.8, 'AST%': 6.4, 'STL%': 2.3, 'BLK%': 2.7, 'TOV%': 19.8, 'USG%': 13.9, 'OWS': 0.1, 'DWS': 0.2, 'WS': 0.2, 'WS/48': 0.094, 'OBPM': -3.2, 'DBPM': -0.4, 'BPM': -3.6, 'VORP': -0.1}
[adv] David Duke Jr. → {'PER': 8.3, 'TS%': 0.497, '3PAr': 0.154, 'FTr': 0.218, 'ORB%': 5.5, 'DRB%': 8.7, 'TRB%': 7.1, 'AST%': 13.1, 'STL%': 2.1, 'BLK%': 0.4, 'TOV%': 17.4, 'USG%': 20.1, 'OWS': -0.3, 'DWS': 0.2, 'WS': -0.1, 'WS/48': -0.011, 'OBPM': -6.3, 'DBPM': -1.0, 'BPM': -7.3, 'VORP': -0.3}
[adv] Carlik Jones → {'PER': 11.1, 'TS%': 0.54, '3PAr': 0.4, 'FTr': 0.533, 'ORB%': 2.1, 'DRB%': 8.0, 'TRB%': 5.1, 'AST%': 13.9, 'STL%': 1.7, 'BLK%': 0.0, 'TOV%': 9.7, 'USG%': 16.2, 'OWS': 0.0, 'DWS': 0.1, 'WS': 0.1, 'WS/48': 0.078, 'OBPM': -2.8, 'DBPM': 0.4, 'BPM': -2.5, 'VORP': 0.0}
[dbg] 2022-23 processed players: 473
[dbg] 2022-23 before standings merge: 473
[dbg] 2022-23 after standings merge: 473
[dbg] 2


Seasons:  78%|███████▊  | 7/9 [03:35<01:19, 39.64s/it]

[dbg] 2022-23 after injury merge: 472
Percentage calculations completed
[dbg] 2022-23 final merged: 472
[fetch] https://hoopshype.com/salaries/2023-24/ (attempt 1)
  -> HTTP 404, skipping.
[fetch] https://hoopshype.com/salaries/2023-2024/ (attempt 1)
  -> HTTP 404, skipping.
[fetch] https://www.espn.com/nba/salaries/_/type/team/year/2024 (attempt 1)
  -> ESPN team salary fallback added 40 rows
                Team  Team_Salary   Season
0  Stephen Curry, PG     51915615  2023-24
1   Kevin Durant, PF     47649433  2023-24
2   LeBron James, SF     47607350  2023-24
3    Nikola Jokic, C     47607350  2023-24
4     Joel Embiid, C     46900000  2023-24




[adv] Kevin Durant → {'PER': 21.2, 'TS%': 0.626, '3PAr': 0.283, 'FTr': 0.295, 'ORB%': 1.7, 'DRB%': 17.5, 'TRB%': 10.0, 'AST%': 22.2, 'STL%': 1.2, 'BLK%': 2.9, 'TOV%': 13.1, 'USG%': 29.0, 'OWS': 5.1, 'DWS': 3.2, 'WS': 8.3, 'WS/48': 0.142, 'OBPM': 4.0, 'DBPM': 0.1, 'BPM': 4.0, 'VORP': 4.3}
[adv] no advanced stats for Nikola Jokic in 2023-24
[adv] Stephen Curry → {'PER': 20.6, 'TS%': 0.616, '3PAr': 0.606, 'FTr': 0.224, 'ORB%': 1.7, 'DRB%': 12.9, 'TRB%': 7.4, 'AST%': 24.7, 'STL%': 1.1, 'BLK%': 1.0, 'TOV%': 11.7, 'USG%': 31.3, 'OWS': 5.2, 'DWS': 2.0, 'WS': 7.2, 'WS/48': 0.142, 'OBPM': 6.3, 'DBPM': -1.1, 'BPM': 5.2, 'VORP': 4.4}
[adv] Lebron James → {'PER': 23.7, 'TS%': 0.63, '3PAr': 0.286, 'FTr': 0.318, 'ORB%': 2.8, 'DRB%': 19.3, 'TRB%': 11.5, 'AST%': 37.5, 'STL%': 1.7, 'BLK%': 1.3, 'TOV%': 14.5, 'USG%': 29.2, 'OWS': 5.7, 'DWS': 2.8, 'WS': 8.5, 'WS/48': 0.164, 'OBPM': 5.5, 'DBPM': 0.9, 'BPM': 6.5, 'VORP': 5.4}
[adv] Bradley Beal → {'PER': 16.3, 'TS%': 0.607, '3PAr': 0.32, 'FTr': 0.182, 'ORB



[adv] Myles Turner → {'PER': 19.3, 'TS%': 0.626, '3PAr': 0.356, 'FTr': 0.349, 'ORB%': 6.2, 'DRB%': 22.9, 'TRB%': 14.5, 'AST%': 6.4, 'STL%': 0.9, 'BLK%': 5.5, 'TOV%': 9.4, 'USG%': 23.4, 'OWS': 3.6, 'DWS': 2.2, 'WS': 5.8, 'WS/48': 0.134, 'OBPM': 1.0, 'DBPM': -0.1, 'BPM': 0.9, 'VORP': 1.5}
[adv] Clint Capela → {'PER': 20.7, 'TS%': 0.59, '3PAr': 0.002, 'FTr': 0.333, 'ORB%': 18.7, 'DRB%': 26.5, 'TRB%': 22.4, 'AST%': 6.9, 'STL%': 1.1, 'BLK%': 5.1, 'TOV%': 9.5, 'USG%': 17.3, 'OWS': 4.4, 'DWS': 1.9, 'WS': 6.3, 'WS/48': 0.161, 'OBPM': 1.0, 'DBPM': -1.0, 'BPM': 0.1, 'VORP': 1.0}
[adv] De'Andre Hunter → {'PER': 12.7, 'TS%': 0.595, '3PAr': 0.459, 'FTr': 0.296, 'ORB%': 1.9, 'DRB%': 12.9, 'TRB%': 7.2, 'AST%': 7.3, 'STL%': 1.1, 'BLK%': 0.9, 'TOV%': 10.0, 'USG%': 20.6, 'OWS': 1.6, 'DWS': 0.6, 'WS': 2.2, 'WS/48': 0.062, 'OBPM': -0.9, 'DBPM': -1.5, 'BPM': -2.4, 'VORP': -0.2}
[adv] Keldon Johnson → {'PER': 14.2, 'TS%': 0.565, '3PAr': 0.426, 'FTr': 0.256, 'ORB%': 5.0, 'DRB%': 15.1, 'TRB%': 10.0, 'AST%': 1



[adv] Cade Cunningham → {'PER': 17.1, 'TS%': 0.546, '3PAr': 0.287, 'FTr': 0.235, 'ORB%': 1.7, 'DRB%': 12.9, 'TRB%': 7.2, 'AST%': 37.5, 'STL%': 1.3, 'BLK%': 0.9, 'TOV%': 14.1, 'USG%': 30.8, 'OWS': 0.7, 'DWS': 1.0, 'WS': 1.8, 'WS/48': 0.041, 'OBPM': 2.0, 'DBPM': -1.6, 'BPM': 0.3, 'VORP': 1.2}
[adv] P.J. Tucker → {'PER': 5.9, 'TS%': 0.507, '3PAr': 0.7, 'FTr': 0.06, 'ORB%': 6.7, 'DRB%': 12.9, 'TRB%': 9.8, 'AST%': 3.9, 'STL%': 1.6, 'BLK%': 1.3, 'TOV%': 13.5, 'USG%': 5.3, 'OWS': 0.2, 'DWS': 0.5, 'WS': 0.7, 'WS/48': 0.068, 'OBPM': -4.0, 'DBPM': 0.4, 'BPM': -3.6, 'VORP': -0.2}
[adv] Joe Ingles → {'PER': 10.6, 'TS%': 0.612, '3PAr': 0.709, 'FTr': 0.15, 'ORB%': 1.8, 'DRB%': 12.6, 'TRB%': 7.1, 'AST%': 23.0, 'STL%': 1.8, 'BLK%': 0.4, 'TOV%': 21.7, 'USG%': 11.6, 'OWS': 1.1, 'DWS': 1.5, 'WS': 2.6, 'WS/48': 0.108, 'OBPM': -1.7, 'DBPM': 1.2, 'BPM': -0.5, 'VORP': 0.5}
[adv] Maxi Kleber → {'PER': 8.8, 'TS%': 0.573, '3PAr': 0.63, 'FTr': 0.322, 'ORB%': 3.8, 'DRB%': 14.1, 'TRB%': 9.0, 'AST%': 9.5, 'STL%': 0



[adv] Tyrese Haliburton → {'PER': 23.3, 'TS%': 0.605, '3PAr': 0.51, 'FTr': 0.217, 'ORB%': 1.8, 'DRB%': 12.0, 'TRB%': 6.9, 'AST%': 44.9, 'STL%': 1.8, 'BLK%': 1.7, 'TOV%': 12.2, 'USG%': 24.6, 'OWS': 7.6, 'DWS': 1.5, 'WS': 9.0, 'WS/48': 0.195, 'OBPM': 7.3, 'DBPM': -0.4, 'BPM': 6.9, 'VORP': 5.0}
[adv] Kira Lewis Jr. → {'PER': 8.6, 'TS%': 0.441, '3PAr': 0.359, 'FTr': 0.217, 'ORB%': 3.8, 'DRB%': 7.0, 'TRB%': 5.4, 'AST%': 18.8, 'STL%': 1.7, 'BLK%': 0.7, 'TOV%': 12.2, 'USG%': 18.5, 'OWS': -0.2, 'DWS': 0.2, 'WS': 0.0, 'WS/48': -0.005, 'OBPM': -4.4, 'DBPM': -1.4, 'BPM': -5.8, 'VORP': -0.3}
[adv] Dean Wade → {'PER': 10.2, 'TS%': 0.598, '3PAr': 0.871, 'FTr': 0.112, 'ORB%': 3.8, 'DRB%': 18.2, 'TRB%': 11.1, 'AST%': 5.0, 'STL%': 1.8, 'BLK%': 2.0, 'TOV%': 7.6, 'USG%': 10.5, 'OWS': 0.9, 'DWS': 1.6, 'WS': 2.5, 'WS/48': 0.11, 'OBPM': -1.7, 'DBPM': 1.8, 'BPM': 0.1, 'VORP': 0.6}
[adv] Aaron Nesmith → {'PER': 12.8, 'TS%': 0.631, '3PAr': 0.526, 'FTr': 0.216, 'ORB%': 3.6, 'DRB%': 12.0, 'TRB%': 7.8, 'AST%': 6.



[adv] Dante Exum → {'PER': 13.7, 'TS%': 0.645, '3PAr': 0.358, 'FTr': 0.225, 'ORB%': 2.5, 'DRB%': 12.4, 'TRB%': 7.5, 'AST%': 19.3, 'STL%': 1.0, 'BLK%': 0.4, 'TOV%': 12.9, 'USG%': 15.0, 'OWS': 2.1, 'DWS': 0.8, 'WS': 2.9, 'WS/48': 0.127, 'OBPM': -0.3, 'DBPM': -0.3, 'BPM': -0.6, 'VORP': 0.4}
[adv] Julian Champagnie → {'PER': 10.6, 'TS%': 0.57, '3PAr': 0.689, 'FTr': 0.229, 'ORB%': 3.1, 'DRB%': 12.2, 'TRB%': 7.6, 'AST%': 9.3, 'STL%': 1.6, 'BLK%': 2.4, 'TOV%': 11.4, 'USG%': 14.4, 'OWS': 0.6, 'DWS': 1.3, 'WS': 1.8, 'WS/48': 0.059, 'OBPM': -1.8, 'DBPM': -0.1, 'BPM': -1.9, 'VORP': 0.0}
[adv] Dariq Whitehead → {'PER': 4.9, 'TS%': 0.255, '3PAr': 0.6, 'FTr': 0.4, 'ORB%': 0.0, 'DRB%': 18.8, 'TRB%': 9.1, 'AST%': 15.6, 'STL%': 0.0, 'BLK%': 3.8, 'TOV%': 0.0, 'USG%': 10.6, 'OWS': 0.0, 'DWS': 0.0, 'WS': 0.0, 'WS/48': -0.03, 'OBPM': -5.5, 'DBPM': -2.4, 'BPM': -7.9, 'VORP': 0.0}
[adv] Christian Braun → {'PER': 12.1, 'TS%': 0.554, '3PAr': 0.34, 'FTr': 0.277, 'ORB%': 5.4, 'DRB%': 14.8, 'TRB%': 10.2, 'AST%': 




[adv] Mo Bamba → {'PER': 15.4, 'TS%': 0.575, '3PAr': 0.323, 'FTr': 0.253, 'ORB%': 11.3, 'DRB%': 24.5, 'TRB%': 17.8, 'AST%': 7.6, 'STL%': 1.5, 'BLK%': 7.4, 'TOV%': 15.1, 'USG%': 15.1, 'OWS': 0.6, 'DWS': 1.3, 'WS': 2.0, 'WS/48': 0.128, 'OBPM': -2.3, 'DBPM': 1.4, 'BPM': -1.0, 'VORP': 0.2}
[adv] Kelly Oubre Jr. → {'PER': 13.5, 'TS%': 0.535, '3PAr': 0.37, 'FTr': 0.264, 'ORB%': 5.0, 'DRB%': 13.5, 'TRB%': 9.2, 'AST%': 7.3, 'STL%': 1.8, 'BLK%': 2.1, 'TOV%': 8.2, 'USG%': 22.4, 'OWS': 0.6, 'DWS': 2.4, 'WS': 3.0, 'WS/48': 0.07, 'OBPM': -1.5, 'DBPM': -0.7, 'BPM': -2.3, 'VORP': -0.1}
[adv] Trendon Watford → {'PER': 15.7, 'TS%': 0.611, '3PAr': 0.219, 'FTr': 0.312, 'ORB%': 6.5, 'DRB%': 18.7, 'TRB%': 12.4, 'AST%': 14.6, 'STL%': 1.3, 'BLK%': 2.0, 'TOV%': 15.1, 'USG%': 21.1, 'OWS': 0.9, 'DWS': 0.9, 'WS': 1.8, 'WS/48': 0.1, 'OBPM': -0.9, 'DBPM': 0.4, 'BPM': -0.5, 'VORP': 0.3}
[adv] Dennis Smith Jr. → {'PER': 13.4, 'TS%': 0.508, '3PAr': 0.322, 'FTr': 0.172, 'ORB%': 5.0, 'DRB%': 12.1, 'TRB%': 8.5, 'AST%': 

2023-24 workers: 100%|██████████| 474/474 [00:01<00:00, 370.69it/s][A


[adv] James Johnson → {'PER': 5.0, 'TS%': 0.368, '3PAr': 0.2, 'FTr': 0.2, 'ORB%': 2.4, 'DRB%': 7.2, 'TRB%': 4.8, 'AST%': 18.6, 'STL%': 5.0, 'BLK%': 1.7, 'TOV%': 31.5, 'USG%': 14.2, 'OWS': -0.1, 'DWS': 0.1, 'WS': 0.0, 'WS/48': -0.05, 'OBPM': -8.0, 'DBPM': 2.2, 'BPM': -5.8, 'VORP': 0.0}
[adv] Bismack Biyombo → {'PER': 9.9, 'TS%': 0.562, '3PAr': 0.0, 'FTr': 0.412, 'ORB%': 8.3, 'DRB%': 21.1, 'TRB%': 14.5, 'AST%': 9.5, 'STL%': 0.7, 'BLK%': 4.4, 'TOV%': 21.3, 'USG%': 10.7, 'OWS': 0.1, 'DWS': 1.0, 'WS': 1.1, 'WS/48': 0.066, 'OBPM': -4.8, 'DBPM': 1.4, 'BPM': -3.4, 'VORP': -0.3}
[adv] Lindy Waters Iii → {'PER': 15.3, 'TS%': 0.653, '3PAr': 0.817, 'FTr': 0.019, 'ORB%': 2.9, 'DRB%': 12.6, 'TRB%': 8.0, 'AST%': 11.5, 'STL%': 0.9, 'BLK%': 2.3, 'TOV%': 6.3, 'USG%': 17.3, 'OWS': 0.6, 'DWS': 0.3, 'WS': 0.9, 'WS/48': 0.15, 'OBPM': 2.4, 'DBPM': -0.5, 'BPM': 1.9, 'VORP': 0.3}
[adv] Keon Ellis → {'PER': 12.0, 'TS%': 0.627, '3PAr': 0.724, 'FTr': 0.151, 'ORB%': 3.6, 'DRB%': 10.8, 'TRB%': 7.1, 'AST%': 11.1, 'S

Seasons:  89%|████████▉ | 8/9 [03:40<00:28, 28.62s/it]

[dbg] 2023-24 after injury merge: 451
Percentage calculations completed
[dbg] 2023-24 final merged: 451
↻  2023-24 differs – re-scraping
[fetch] https://hoopshype.com/salaries/2024-25/ (attempt 1)
  -> HTTP 404, skipping.
[fetch] https://hoopshype.com/salaries/2024-2025/ (attempt 1)
  -> HTTP 404, skipping.
[fetch] https://www.espn.com/nba/salaries/_/type/team/year/2025 (attempt 1)
  -> ESPN team salary fallback added 40 rows
                Team  Team_Salary   Season
0  Stephen Curry, PG     55761216  2024-25
1     Joel Embiid, C     51415938  2024-25
2    Nikola Jokic, C     51415938  2024-25
3   Kevin Durant, PF     51179021  2024-25
4   Bradley Beal, SG     50203930  2024-25
[fetch_season_players] 578 players for 2024-25
[adv] fetching https://www.basketball-reference.com/leagues/NBA_2025_advanced.html





[adv] Joel Embiid → {'PER': 23.4, 'TS%': 0.58, '3PAr': 0.244, 'FTr': 0.537, 'ORB%': 6.8, 'DRB%': 24.5, 'TRB%': 15.3, 'AST%': 25.6, 'STL%': 1.2, 'BLK%': 3.1, 'TOV%': 13.7, 'USG%': 34.2, 'OWS': 0.9, 'DWS': 0.5, 'WS': 1.4, 'WS/48': 0.118, 'OBPM': 3.7, 'DBPM': -0.3, 'BPM': 3.5, 'VORP': 0.8}
[adv] Kevin Durant → {'PER': 21.2, 'TS%': 0.642, '3PAr': 0.331, 'FTr': 0.321, 'ORB%': 1.2, 'DRB%': 16.9, 'TRB%': 9.2, 'AST%': 19.6, 'STL%': 1.1, 'BLK%': 3.2, 'TOV%': 12.9, 'USG%': 28.7, 'OWS': 4.0, 'DWS': 1.2, 'WS': 5.2, 'WS/48': 0.111, 'OBPM': 3.9, 'DBPM': -0.7, 'BPM': 3.2, 'VORP': 3.0}
[adv] Bradley Beal → {'PER': 15.2, 'TS%': 0.598, '3PAr': 0.38, 'FTr': 0.197, 'ORB%': 2.3, 'DRB%': 9.1, 'TRB%': 5.8, 'AST%': 17.6, 'STL%': 1.7, 'BLK%': 1.5, 'TOV%': 11.6, 'USG%': 22.1, 'OWS': 1.9, 'DWS': 0.6, 'WS': 2.4, 'WS/48': 0.068, 'OBPM': -0.1, 'DBPM': -1.0, 'BPM': -1.1, 'VORP': 0.4}
[adv] Stephen Curry → {'PER': 21.5, 'TS%': 0.618, '3PAr': 0.623, 'FTr': 0.238, 'ORB%': 1.9, 'DRB%': 13.3, 'TRB%': 7.4, 'AST%': 31.3, '

2024-25 workers:  14%|█▍        | 68/491 [00:00<00:01, 331.03it/s][A


[adv] Demar Derozan → {'PER': 17.7, 'TS%': 0.569, '3PAr': 0.196, 'FTr': 0.337, 'ORB%': 2.0, 'DRB%': 10.1, 'TRB%': 6.0, 'AST%': 18.7, 'STL%': 1.1, 'BLK%': 1.1, 'TOV%': 6.5, 'USG%': 25.0, 'OWS': 5.7, 'DWS': 1.4, 'WS': 7.1, 'WS/48': 0.123, 'OBPM': 1.2, 'DBPM': -1.2, 'BPM': -0.1, 'VORP': 1.3}
[adv] Bruce Brown → {'PER': 10.9, 'TS%': 0.504, '3PAr': 0.262, 'FTr': 0.21, 'ORB%': 4.5, 'DRB%': 14.9, 'TRB%': 9.6, 'AST%': 12.8, 'STL%': 1.8, 'BLK%': 1.0, 'TOV%': 10.4, 'USG%': 17.2, 'OWS': 0.1, 'DWS': 0.6, 'WS': 0.7, 'WS/48': 0.035, 'OBPM': -3.2, 'DBPM': -0.6, 'BPM': -3.8, 'VORP': -0.4}
[adv] Brook Lopez → {'PER': 14.8, 'TS%': 0.624, '3PAr': 0.482, 'FTr': 0.178, 'ORB%': 5.0, 'DRB%': 11.8, 'TRB%': 8.6, 'AST%': 7.8, 'STL%': 0.9, 'BLK%': 5.4, 'TOV%': 9.1, 'USG%': 15.8, 'OWS': 3.6, 'DWS': 2.9, 'WS': 6.6, 'WS/48': 0.124, 'OBPM': 0.4, 'DBPM': 0.4, 'BPM': 0.8, 'VORP': 1.8}
[adv] Aaron Gordon → {'PER': 17.0, 'TS%': 0.65, '3PAr': 0.346, 'FTr': 0.36, 'ORB%': 6.4, 'DRB%': 12.1, 'TRB%': 9.4, 'AST%': 15.0, 'STL%

2024-25 workers:  29%|██▉       | 143/491 [00:00<00:00, 355.40it/s][A

[adv] Ivica Zubac → {'PER': 22.3, 'TS%': 0.641, '3PAr': 0.0, 'FTr': 0.251, 'ORB%': 13.1, 'DRB%': 30.2, 'TRB%': 21.8, 'AST%': 12.8, 'STL%': 1.0, 'BLK%': 3.4, 'TOV%': 10.8, 'USG%': 19.5, 'OWS': 7.0, 'DWS': 4.7, 'WS': 11.7, 'WS/48': 0.215, 'OBPM': 2.3, 'DBPM': 0.8, 'BPM': 3.1, 'VORP': 3.4}
[adv] Brandon Miller → {'PER': 14.0, 'TS%': 0.54, '3PAr': 0.597, 'FTr': 0.161, 'ORB%': 2.8, 'DRB%': 12.5, 'TRB%': 7.6, 'AST%': 18.3, 'STL%': 1.5, 'BLK%': 2.1, 'TOV%': 12.6, 'USG%': 27.7, 'OWS': -0.2, 'DWS': 0.7, 'WS': 0.4, 'WS/48': 0.023, 'OBPM': 0.8, 'DBPM': -0.8, 'BPM': 0.0, 'VORP': 0.4}
[adv] Terance Mann → {'PER': 12.0, 'TS%': 0.584, '3PAr': 0.395, 'FTr': 0.165, 'ORB%': 5.3, 'DRB%': 10.4, 'TRB%': 7.8, 'AST%': 11.7, 'STL%': 1.6, 'BLK%': 1.0, 'TOV%': 9.6, 'USG%': 14.7, 'OWS': 1.7, 'DWS': 1.3, 'WS': 3.0, 'WS/48': 0.101, 'OBPM': -1.4, 'DBPM': 0.0, 'BPM': -1.3, 'VORP': 0.2}
[adv] Evan Mobley → {'PER': 22.3, 'TS%': 0.633, '3PAr': 0.254, 'FTr': 0.337, 'ORB%': 8.4, 'DRB%': 24.0, 'TRB%': 16.5, 'AST%': 15.1, 




[adv] Davion Mitchell → {'PER': 10.2, 'TS%': 0.573, '3PAr': 0.443, 'FTr': 0.176, 'ORB%': 1.9, 'DRB%': 7.1, 'TRB%': 4.5, 'AST%': 24.2, 'STL%': 1.7, 'BLK%': 0.8, 'TOV%': 20.0, 'USG%': 13.6, 'OWS': 1.3, 'DWS': 1.6, 'WS': 3.0, 'WS/48': 0.07, 'OBPM': -2.8, 'DBPM': 0.2, 'BPM': -2.6, 'VORP': -0.3}
[adv] Jarace Walker → {'PER': 11.9, 'TS%': 0.59, '3PAr': 0.514, 'FTr': 0.192, 'ORB%': 2.4, 'DRB%': 19.0, 'TRB%': 10.8, 'AST%': 12.2, 'STL%': 2.2, 'BLK%': 1.9, 'TOV%': 16.7, 'USG%': 17.1, 'OWS': 0.1, 'DWS': 1.5, 'WS': 1.6, 'WS/48': 0.065, 'OBPM': -1.8, 'DBPM': 0.8, 'BPM': -0.9, 'VORP': 0.3}
[adv] Dean Wade → {'PER': 10.1, 'TS%': 0.563, '3PAr': 0.819, 'FTr': 0.111, 'ORB%': 4.5, 'DRB%': 16.6, 'TRB%': 10.8, 'AST%': 7.3, 'STL%': 1.6, 'BLK%': 1.4, 'TOV%': 7.2, 'USG%': 10.4, 'OWS': 1.3, 'DWS': 1.7, 'WS': 3.0, 'WS/48': 0.113, 'OBPM': -1.0, 'DBPM': 1.2, 'BPM': 0.3, 'VORP': 0.7}
[adv] John Konchar → {'PER': 12.1, 'TS%': 0.592, '3PAr': 0.681, 'FTr': 0.088, 'ORB%': 7.1, 'DRB%': 21.4, 'TRB%': 14.3, 'AST%': 8.8, 

2024-25 workers:  61%|██████    | 299/491 [00:00<00:00, 376.14it/s][A


[adv] Yves Missi → {'PER': 15.4, 'TS%': 0.572, '3PAr': 0.002, 'FTr': 0.415, 'ORB%': 13.7, 'DRB%': 19.7, 'TRB%': 16.6, 'AST%': 7.2, 'STL%': 0.9, 'BLK%': 4.8, 'TOV%': 12.4, 'USG%': 14.3, 'OWS': 2.5, 'DWS': 1.1, 'WS': 3.6, 'WS/48': 0.089, 'OBPM': -1.6, 'DBPM': -1.4, 'BPM': -3.0, 'VORP': -0.5}
[adv] Christian Braun → {'PER': 16.0, 'TS%': 0.665, '3PAr': 0.273, 'FTr': 0.261, 'ORB%': 4.2, 'DRB%': 12.2, 'TRB%': 8.4, 'AST%': 10.1, 'STL%': 1.5, 'BLK%': 1.2, 'TOV%': 8.1, 'USG%': 15.8, 'OWS': 6.2, 'DWS': 1.9, 'WS': 8.0, 'WS/48': 0.144, 'OBPM': 0.8, 'DBPM': -0.5, 'BPM': 0.3, 'VORP': 1.6}
[adv] Johnny Juzang → {'PER': 11.8, 'TS%': 0.573, '3PAr': 0.661, 'FTr': 0.112, 'ORB%': 3.6, 'DRB%': 12.0, 'TRB%': 7.8, 'AST%': 7.9, 'STL%': 1.5, 'BLK%': 0.6, 'TOV%': 6.7, 'USG%': 17.6, 'OWS': 1.3, 'DWS': 0.3, 'WS': 1.6, 'WS/48': 0.06, 'OBPM': -0.9, 'DBPM': -1.4, 'BPM': -2.3, 'VORP': -0.1}
[adv] Julian Champagnie → {'PER': 12.2, 'TS%': 0.577, '3PAr': 0.722, 'FTr': 0.141, 'ORB%': 3.7, 'DRB%': 13.9, 'TRB%': 8.8, 'AST%

2024-25 workers:  77%|███████▋  | 378/491 [00:01<00:00, 381.09it/s][A


[adv] Garrett Temple → {'PER': 8.5, 'TS%': 0.406, '3PAr': 0.467, 'FTr': 0.2, 'ORB%': 4.2, 'DRB%': 9.7, 'TRB%': 6.8, 'AST%': 17.4, 'STL%': 3.6, 'BLK%': 0.8, 'TOV%': 13.3, 'USG%': 13.9, 'OWS': -0.1, 'DWS': 0.3, 'WS': 0.2, 'WS/48': 0.033, 'OBPM': -5.7, 'DBPM': 2.1, 'BPM': -3.6, 'VORP': -0.1}
[adv] Daniel Theis → {'PER': 12.1, 'TS%': 0.558, '3PAr': 0.287, 'FTr': 0.287, 'ORB%': 7.4, 'DRB%': 21.5, 'TRB%': 14.2, 'AST%': 13.0, 'STL%': 1.4, 'BLK%': 3.1, 'TOV%': 14.7, 'USG%': 11.6, 'OWS': 0.6, 'DWS': 0.4, 'WS': 1.0, 'WS/48': 0.077, 'OBPM': -3.3, 'DBPM': 0.0, 'BPM': -3.3, 'VORP': -0.2}
[adv] Charles Bassey → {'PER': 19.0, 'TS%': 0.597, '3PAr': 0.0, 'FTr': 0.282, 'ORB%': 15.9, 'DRB%': 27.4, 'TRB%': 21.7, 'AST%': 6.6, 'STL%': 1.8, 'BLK%': 7.2, 'TOV%': 14.9, 'USG%': 17.5, 'OWS': 0.5, 'DWS': 0.6, 'WS': 1.1, 'WS/48': 0.136, 'OBPM': -1.7, 'DBPM': 0.6, 'BPM': -1.1, 'VORP': 0.1}
[adv] Reggie Jackson → {'PER': 9.8, 'TS%': 0.496, '3PAr': 0.556, 'FTr': 0.068, 'ORB%': 2.3, 'DRB%': 10.4, 'TRB%': 6.2, 'AST%': 

2024-25 workers: 100%|██████████| 491/491 [00:01<00:00, 368.71it/s][A


[adv] Antonio Reeves → {'PER': 11.7, 'TS%': 0.579, '3PAr': 0.52, 'FTr': 0.141, 'ORB%': 2.9, 'DRB%': 7.6, 'TRB%': 5.1, 'AST%': 8.8, 'STL%': 1.5, 'BLK%': 0.6, 'TOV%': 10.2, 'USG%': 18.8, 'OWS': 0.4, 'DWS': 0.0, 'WS': 0.4, 'WS/48': 0.029, 'OBPM': -1.2, 'DBPM': -2.0, 'BPM': -3.2, 'VORP': -0.2}
[adv] Pelle Larsson → {'PER': 10.2, 'TS%': 0.546, '3PAr': 0.468, 'FTr': 0.3, 'ORB%': 4.0, 'DRB%': 9.0, 'TRB%': 6.5, 'AST%': 11.6, 'STL%': 2.0, 'BLK%': 0.9, 'TOV%': 9.5, 'USG%': 14.4, 'OWS': 0.6, 'DWS': 0.8, 'WS': 1.4, 'WS/48': 0.086, 'OBPM': -3.8, 'DBPM': 0.7, 'BPM': -3.1, 'VORP': -0.2}
[adv] Ariel Hukporti → {'PER': 7.6, 'TS%': 0.654, '3PAr': 0.0, 'FTr': 0.419, 'ORB%': 7.5, 'DRB%': 19.4, 'TRB%': 13.5, 'AST%': 6.4, 'STL%': 0.2, 'BLK%': 6.4, 'TOV%': 36.4, 'USG%': 11.6, 'OWS': -0.2, 'DWS': 0.3, 'WS': 0.1, 'WS/48': 0.02, 'OBPM': -6.5, 'DBPM': 0.9, 'BPM': -5.6, 'VORP': -0.2}
[adv] Jamison Battle → {'PER': 10.7, 'TS%': 0.59, '3PAr': 0.746, 'FTr': 0.052, 'ORB%': 3.7, 'DRB%': 12.8, 'TRB%': 8.1, 'AST%': 7.1,

Seasons: 100%|██████████| 9/9 [03:48<00:00, 25.38s/it]

[dbg] 2024-25 after injury merge: 466
Percentage calculations completed
[dbg] 2024-25 final merged: 466
↻  2024-25 differs – re-scraping
✔ Completed pull: 4,057 rows added
[salary-cap] loading local file: C:\docker_projects\coach_analysis\data\new_processed\salary_cap_history_inflated.csv
[salary-cap] rows=52, cols=['Season', 'Salary Cap', 'Luxury Tax', '1st Apron', '2nd Apron', 'BAE', 'Non-Taxpayer MLE', 'Taxpayer MLE', 'Team Room MLE']
[merge_salary_cap_data] computing Salary_Cap_Inflated
[persist] loaded 451 rows from existing master
[persist] Keys added=3606, removed=0
[persist] Detected differences:
  Row count differs: old=451, new=4057





[persist] Master CSV updated → C:\docker_projects\coach_analysis\data\new_processed\nba_player_data_final_inflated.csv
Process finished in 327.7 s — log: C:\docker_projects\coach_analysis\data\stat_pull_output\stat_pull_log_2025-07-23_11-57-46.txt
[check_existing_data] found 13 seasons in C:\docker_projects\coach_analysis\data\new_processed
[load_parquet_data] loading 1 files from C:\docker_projects\coach_analysis\data\new_processed


# Tests: