In [6]:
%load_ext autoreload
%autoreload 2

from dave_ledger import pipeline

# Run the truncated pipeline
df = pipeline.run_dave()

# INSPECTION: Check if your bespoke IDP and Tiered Kicking rules worked
# Updated inspection columns
cols = ['full_name', 'position', 'season', 'week', 'points', 
        'sacks_suffered', 'def_sacks', 'passing_interceptions', 'def_interceptions']

display(df[cols].sort_values('points', ascending=False).head(5))

# 2. Check an IDP Star (e.g., TJ Watt) to ensure he got points
print("\n--- TJ Watt Sample ---")
watt = df[df['full_name'] == 'T.J. Watt'].sort_values('season', ascending=False).head(5)
display(watt[cols])

2025-12-31 19:11:08,981 - INFO - ‚úÖ Configuration Loaded.
2025-12-31 19:11:08,982 - INFO - 1. [TRANSFORM] Loading & Merging History...
2025-12-31 19:11:09,086 - INFO - üîß Renaming 'pfr_player_id' to 'player_id' in snaps
2025-12-31 19:11:09,097 - INFO - üîß Renaming 'gsis_id' to 'player_id' in rosters
2025-12-31 19:11:09,102 - INFO - üí° Renaming Weekly 'position' to 'fantasy_group' (Generic)


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


2025-12-31 19:11:09,392 - INFO -    -> Loaded 89426 rows of history.
2025-12-31 19:11:09,393 - INFO - 2. [SCORING] Applying League Rules...
2025-12-31 19:11:09,429 - INFO - ‚úÖ Pipeline Complete (Scoring Only).


Unnamed: 0,full_name,position,season,week,points,sacks_suffered,def_sacks,passing_interceptions,def_interceptions
2187,Khalil Mack,LB,2023,4,79.35,0,6.0,0,0
83026,Chandler Jones,DL,2021,1,69.4,0,5.0,0,0
8401,Myles Garrett,DL,2025,8,65.1,0,5.0,0,0
9811,Trey Hendrickson,DL,2024,9,62.9,0,4.0,0,0
42608,Tuli Tuipulotu,LB,2025,4,60.0,0,4.0,0,0



--- TJ Watt Sample ---


Unnamed: 0,full_name,position,season,week,points,sacks_suffered,def_sacks,passing_interceptions,def_interceptions
9117,T.J. Watt,LB,2025,11,14.3,0,0.0,0,0
9120,T.J. Watt,LB,2025,14,10.2,0,0.0,0,0
9119,T.J. Watt,LB,2025,13,0.0,0,0.0,0,0
9118,T.J. Watt,LB,2025,12,17.6,0,1.0,0,0
9116,T.J. Watt,LB,2025,10,20.9,0,1.0,0,0


In [8]:
from dave_ledger import pipeline

df = pipeline.run_dave()

# Group by player and sum points
total_points = df.groupby(['full_name', 'position'])['points'].sum().sort_values(ascending=False)

# CHECK 1: Do the top scorers make sense?
print("--- üèÜ TOP 5 SCORERS (5-YR TOTAL) ---")
print(total_points.head(5))
# Expect: Josh Allen, Patrick Mahomes, or high-scoring IDPs like TJ Watt

# CHECK 2: Who are the "Ghosts"? (Players with > 50 games but 0 points)
# If this returns big names, your stats aren't merging to your roster IDs.
games_played = df.groupby('full_name').size()
ghosts = total_points[(total_points == 0) & (games_played > 50)]

print(f"\n--- üëª GHOSTS (Active but 0 points): {len(ghosts)} found ---")
if len(ghosts) > 0:
    print(ghosts.head(10))

2025-12-31 19:17:42,366 - INFO - ‚úÖ Configuration Loaded.
2025-12-31 19:17:42,367 - INFO - 1. [TRANSFORM] Loading & Merging History...
2025-12-31 19:17:42,474 - INFO - üîß Renaming 'pfr_player_id' to 'player_id' in snaps
2025-12-31 19:17:42,482 - INFO - üîß Renaming 'gsis_id' to 'player_id' in rosters
2025-12-31 19:17:42,489 - INFO - üí° Renaming Weekly 'position' to 'fantasy_group' (Generic)
2025-12-31 19:17:42,652 - INFO -    -> Loaded 89426 rows of history.
2025-12-31 19:17:42,653 - INFO - 2. [SCORING] Applying League Rules...
2025-12-31 19:17:42,689 - INFO - ‚úÖ Pipeline Complete (Scoring Only).


--- üèÜ TOP 5 SCORERS (5-YR TOTAL) ---
full_name        position
Josh Allen       QB          2338.65
Patrick Mahomes  QB          2010.18
Jalen Hurts      QB          1950.40
Justin Herbert   QB          1799.66
Jared Goff       QB          1757.40
Name: points, dtype: float64

--- üëª GHOSTS (Active but 0 points): 1 found ---
full_name        position
Corliss Waitman  P           0.0
Name: points, dtype: float64


In [9]:
# Look for outliers
max_score = df['points'].max()
min_score = df['points'].min()

print(f"üìà MAX Weekly Score: {max_score:.2f}")
print(f"üìâ MIN Weekly Score: {min_score:.2f}")

# Show the outliers
print("\n--- üö® EXTREME PERFORMANCES ---")
display(df[df['points'] > 60].sort_values('points', ascending=False)[['full_name', 'week', 'season', 'points', 'passing_tds', 'def_sacks']])

üìà MAX Weekly Score: 79.35
üìâ MIN Weekly Score: -3.40

--- üö® EXTREME PERFORMANCES ---


Unnamed: 0,full_name,week,season,points,passing_tds,def_sacks
2187,Khalil Mack,4,2023,79.35,0,6.0
83026,Chandler Jones,1,2021,69.4,0,5.0
8401,Myles Garrett,8,2025,65.1,0,5.0
9811,Trey Hendrickson,9,2024,62.9,0,4.0


In [14]:
from dave_ledger import pipeline
import pandas as pd

# 1. Run Pipeline (Fast Mode)
df = pipeline.run_dave()

# 2. Define the "Sanity Check" Logic
def get_position_records(df):
    records = []
    # Get every unique position in the dataset (QB, RB, OLB, FS, etc.)
    positions = df['position'].unique()
    
    for pos in positions:
        # Filter for that position
        pos_df = df[df['position'] == pos]
        if len(pos_df) == 0: continue
            
        # Find the max score row
        max_idx = pos_df['points'].idxmin()
        record = pos_df.loc[max_idx]
        
        records.append({
            'Pos': pos,
            'Player': record['full_name'],
            'Points': record['points'],
            'Season': record['season'],
            'Week': record['week'],
            'Stats': f"{record.get('passing_yards',0)}/{record.get('passing_tds',0)} or {record.get('rushing_yards',0)}/{record.get('rushing_tds',0)}"
        })
    
    # Return as a nice dataframe sorted by position grouping
    return pd.DataFrame(records).sort_values('Points', ascending=False)

# 3. Run and Display
print("üèÜ ALL-TIME SINGLE GAME RECORDS (BY POSITION)")
records = get_position_records(df)
display(records)

# 4. Automatic Warnings
print("\n--- ‚ö†Ô∏è AUTOMATIC SANITY CHECKS ---")
for _, row in records.iterrows():
    pos = row['Pos']
    pts = row['Points']
    
    # Rough thresholds for "Standard-ish" High Scoring IDP
    if pos == 'QB' and pts > 70:
        print(f"üö® WARNING: QB {row['Player']} scored {pts:.1f}. Check Passing TD/Yard multipliers.")
    elif pos in ['WR', 'RB'] and pts > 65:
        print(f"üö® WARNING: {pos} {row['Player']} scored {pts:.1f}. Check TD/PPR multipliers.")
    elif pos == 'K' and pts > 35:
        print(f"üö® WARNING: K {row['Player']} scored {pts:.1f}. Check FG multipliers.")
    elif pos in ['DE', 'LB', 'CB', 'S', 'OLB', 'DT'] and pts > 50:
        print(f"üëÄ NOTE: IDP {pos} {row['Player']} scored {pts:.1f}. This is high, but possible in your system.")

2025-12-31 19:25:24,705 - INFO - ‚úÖ Configuration Loaded.
2025-12-31 19:25:24,706 - INFO - 1. [TRANSFORM] Loading & Merging History...
2025-12-31 19:25:24,811 - INFO - üîß Renaming 'pfr_player_id' to 'player_id' in snaps
2025-12-31 19:25:24,818 - INFO - üîß Renaming 'gsis_id' to 'player_id' in rosters
2025-12-31 19:25:24,824 - INFO - üí° Renaming Weekly 'position' to 'fantasy_group' (Generic)
2025-12-31 19:25:24,993 - INFO -    -> Loaded 89426 rows of history.
2025-12-31 19:25:24,994 - INFO - 2. [SCORING] Applying League Rules...
2025-12-31 19:25:25,029 - INFO - ‚úÖ Pipeline Complete (Scoring Only).


üèÜ ALL-TIME SINGLE GAME RECORDS (BY POSITION)


Unnamed: 0,Pos,Player,Points,Season,Week,Stats
2,DL,Calais Campbell,0.0,2025,5,0/0 or 0/0
5,LB,Von Miller,0.0,2024,9,0/0 or 0/0
6,LS,Andrew DePaola,0.0,2025,1,0/0 or 0/0
4,OL,Robert Hainsey,-0.3,2023,9,0/0 or 0/0
10,DB,Matthias Farley,-0.3,2022,8,0/0 or -3/0
7,RB,Zamir White,-0.7,2022,5,0/0 or -7/0
9,TE,Feleipe Franks,-1.0,2021,11,0/0 or 0/0
8,WR,Marvin Mims Jr.,-1.1,2023,7,0/0 or -11/0
0,QB,Mike White,-1.4,2023,6,0/0 or -4/0
1,K,Nick Folk,-2.0,2022,16,0/0 or 0/0



--- ‚ö†Ô∏è AUTOMATIC SANITY CHECKS ---


In [2]:
from dave_ledger import pipeline

# Run the full simulation
board = pipeline.run_dave()

# Sort by points to see the stars, not the benchwarmers
display(board.sort_values('fantasy_points', ascending=False).head(20))

# Or check the summary stats
print(board['fantasy_points'].describe())

2026-01-01 02:48:04,900 - INFO - ‚úÖ Configuration Loaded.
2026-01-01 02:48:04,901 - INFO - 1. [TRANSFORM] Loading & Merging History...
2026-01-01 02:48:04,980 - INFO - üîß Renaming 'pfr_player_id' to 'player_id' in snaps
2026-01-01 02:48:04,984 - INFO - üîß Renaming 'gsis_id' to 'player_id' in rosters
2026-01-01 02:48:05,088 - INFO -    -> Loaded 89515 rows of history.
2026-01-01 02:48:05,088 - INFO - 2. [SCORING] Applying League Rules...
2026-01-01 02:48:05,111 - INFO - 3. [BASELINES] Calculating League Replacement Levels...
2026-01-01 02:48:05,123 - INFO - üìâ QB Baseline: 2.0 starts -> Rank 48 (Dillon Gabriel) = 8.61 PPG
2026-01-01 02:48:05,124 - INFO - üìâ RB Baseline: 2.5 starts -> Rank 75 (Zavier Scott) = 4.24 PPG
2026-01-01 02:48:05,125 - INFO - üìâ WR Baseline: 3.5 starts -> Rank 92 (Pat Bryant) = 5.97 PPG
2026-01-01 02:48:05,125 - INFO - üìâ TE Baseline: 1.0 starts -> Rank 18 (Colston Loveland) = 9.42 PPG
2026-01-01 02:48:05,126 - INFO - üìâ K Baseline: 1.0 starts -> R

Unnamed: 0,player_id,player_name,player_display_name,fantasy_group,position_group,headshot_url,season,week,season_type,team,...,current_team,birth_year,current_age,points,availability_score,talent_ppg,risk_cv,dcf_value,replacement_value,vorp
236,00-0034857,J.Allen,Josh Allen,QB,QB,https://static.www.nfl.com/image/upload/f_auto...,2025,1,REG,BUF,...,BUF,1996,30,42.76,0.179048,0.0,0.351063,0,334.118031,-334.118031
1166,00-0033106,J.Goff,Jared Goff,QB,QB,https://static.www.nfl.com/image/upload/f_auto...,2025,2,REG,DET,...,DET,1994,32,43.96,0.179048,0.0,0.444602,0,334.118031,-334.118031
494,00-0036945,J.Fields,Justin Fields,QB,QB,https://static.www.nfl.com/image/upload/f_auto...,2025,1,REG,NYJ,...,NYJ,1999,27,31.52,0.179048,0.0,0.623884,0,334.118031,-334.118031
332,00-0035710,D.Jones,Daniel Jones,QB,QB,https://static.www.nfl.com/image/upload/f_auto...,2025,1,REG,IND,...,IND,1997,29,31.48,0.179048,0.0,0.481774,0,334.118031,-334.118031
217,00-0034796,L.Jackson,Lamar Jackson,QB,QB,https://static.www.nfl.com/image/upload/f_auto...,2025,1,REG,BAL,...,BAL,1997,29,33.36,0.179048,0.0,0.463495,0,334.118031,-334.118031
403,00-0036355,J.Herbert,Justin Herbert,QB,QB,https://static.www.nfl.com/image/upload/f_auto...,2025,1,REG,LAC,...,LAC,1998,28,33.92,0.179048,0.0,0.411488,0,334.118031,-334.118031
132,00-0033873,P.Mahomes,Patrick Mahomes,QB,QB,https://static.www.nfl.com/image/upload/f_auto...,2025,1,REG,KC,...,KC,1995,31,28.02,0.179048,0.0,0.362106,0,334.118031,-334.118031
0,00-0023459,A.Rodgers,Aaron Rodgers,QB,QB,https://static.www.nfl.com/image/upload/f_auto...,2025,1,REG,PIT,...,PIT,1983,43,33.66,0.179048,0.0,0.435586,0,334.118031,-334.118031
413,00-0036389,J.Hurts,Jalen Hurts,QB,QB,https://static.www.nfl.com/image/upload/f_auto...,2025,1,REG,PHI,...,PHI,1998,28,24.28,0.179048,0.0,0.348366,0,334.118031,-334.118031
937,00-0039918,C.Williams,Caleb Williams,QB,QB,https://static.www.nfl.com/image/upload/f_auto...,2025,1,REG,CHI,...,CHI,2001,25,26.2,0.348148,0.0,0.496986,0,334.118031,-334.118031


count    3673.000000
mean        0.927672
std         3.133224
min        -2.600000
25%         0.000000
50%         0.000000
75%         0.000000
max        38.760000
Name: fantasy_points, dtype: float64


In [4]:
def debug_data():
    import pandas as pd
    from dave_ledger import paths, config
    
    # HARDCODED FIX: Match the filename found on your disk
    suffix = "2021_2025.parquet" 
    
    raw_dir = paths.find_repo_root() / "data" / "raw"
    print(f"--- LOADING: weekly_{suffix} ---")

    # Load data
    weekly = pd.read_parquet(raw_dir / f"weekly_{suffix}")
    rosters = pd.read_parquet(raw_dir / f"rosters_{suffix}")
    
    # 1. Check ID Columns (to fix the merge)
    print("\n--- ID CHECK ---")
    for col in ['player_id', 'id', 'gsis_id', 'pfr_player_id']:
        if col in weekly.columns:
            print(f"Weekly ['{col}'] sample: {weekly[col].dropna().iloc[0]}")
        if col in rosters.columns:
            print(f"Roster ['{col}'] sample: {rosters[col].dropna().iloc[0]}")

    # 2. Check Stats (to see if we need to calculate fantasy points)
    print("\n--- STATS CHECK ---")
    if 'fantasy_points' in weekly.columns:
        print(f"Fantasy Points exist! Max: {weekly['fantasy_points'].max()}")
    else:
        print("‚ö†Ô∏è Fantasy Points column is MISSING. You must calculate it in transform.py.")

debug_data()

--- LOADING: weekly_2021_2025.parquet ---

--- ID CHECK ---
Weekly ['player_id'] sample: 00-0023459
Roster ['gsis_id'] sample: 

--- STATS CHECK ---
Fantasy Points exist! Max: 51.88
