# FantasyPros Projections Analysis

This notebook loads the scraped FantasyPros projections and applies custom scoring.

In [8]:
import pandas as pd
import numpy as np
import yaml
import os
from datetime import datetime
import glob

# Display settings
pd.set_option('display.max_columns', 20)
pd.set_option('display.width', 150)

In [9]:
# Load the most recent projection files
def load_latest_projections(position='all_positions'):
    """Load the most recent projection file for a position"""
    pattern = f'data/projections/projections_{position}_*.csv'
    files = glob.glob(pattern)
    
    if not files:
        print(f"No projection files found for {position}")
        print("Run 'python scrape_projections.py' first")
        return None
    
    # Get most recent file
    latest_file = sorted(files)[-1]
    print(f"Loading: {latest_file}")
    
    df = pd.read_csv(latest_file)
    
    # Fix column naming issue - use the unnamed column which has actual data
    if 'UNNAMED:_0_LEVEL_0_PLAYER' in df.columns:
        # The UNNAMED column has the real player names, PLAYER column is empty
        df['PLAYER_NAME'] = df['UNNAMED:_0_LEVEL_0_PLAYER']
        # Drop the problematic columns
        if 'PLAYER' in df.columns:
            df = df.drop(columns=['PLAYER'])
        df = df.drop(columns=['UNNAMED:_0_LEVEL_0_PLAYER'])
        # Rename to correct column name
        df = df.rename(columns={'PLAYER_NAME': 'PLAYER'})
    
    # Convert all stat columns to numeric and fill NaN with 0
    stat_columns = [col for col in df.columns if col not in ['PLAYER', 'POSITION', 'SCRAPE_DATE']]
    for col in stat_columns:
        df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
    
    return df

# Load all projections
projections = load_latest_projections('all_positions')

if projections is not None:
    print(f"\nLoaded {len(projections)} player projections")
    print(f"Positions: {projections['POSITION'].value_counts().to_dict()}")
    display(projections.head())

Loading: data/projections/projections_all_positions_20250814.csv

Loaded 747 player projections
Positions: {'WR': 231, 'RB': 188, 'TE': 157, 'QB': 102, 'K': 37, 'DST': 32}


Unnamed: 0,PASSING_ATT,PASSING_CMP,PASSING_YDS,PASSING_TDS,PASSING_INTS,RUSHING_ATT,RUSHING_YDS,RUSHING_TDS,MISC_FL,MISC_FPTS,...,FPTS,SACK,INT,FR,FF,TD,SAFETY,PA,YDS_AGN,PLAYER
0,522.4,334.7,3914.7,29.0,10.5,115.7,571.8,10.7,3.4,376.7,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Josh Allen BUF
1,480.4,321.0,3923.2,32.5,8.3,134.7,827.9,3.9,4.5,375.6,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Lamar Jackson BAL
2,461.5,311.7,3536.8,23.7,8.9,147.2,633.0,12.6,3.9,358.7,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Jalen Hurts PHI
3,536.2,364.4,3899.3,26.9,10.7,136.5,746.8,5.6,2.5,355.8,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Jayden Daniels WAS
4,626.7,434.1,4635.9,35.9,10.5,48.2,180.1,2.1,3.8,341.6,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Joe Burrow CIN


In [10]:
# Load league configuration
with open('config/league-config.yaml', 'r') as f:
    config = yaml.safe_load(f)

scoring = config['scoring']
print("League Scoring System:")
print(f"Pass TD: {scoring['passing']['touchdown']} pts")
print(f"Rush/Rec TD: {scoring['rushing']['touchdown']} pts")
print(f"PPR: {scoring['receiving'].get('reception', 0)} pts")

League Scoring System:
Pass TD: 4 pts
Rush/Rec TD: 6 pts
PPR: 0 pts


In [11]:
def calculate_fantasy_points_vectorized(df, scoring_config):
    """Calculate fantasy points using vectorized operations for all players at once"""
    points = pd.Series(0.0, index=df.index)
    
    # Passing (vectorized)
    if 'PASSING_YDS' in df.columns:
        points += df['PASSING_YDS'] * scoring_config['passing'].get('yards', 0)
    if 'PASSING_TDS' in df.columns:
        points += df['PASSING_TDS'] * scoring_config['passing'].get('touchdown', 0)
    if 'PASSING_INTS' in df.columns:
        points += df['PASSING_INTS'] * scoring_config['passing'].get('interception', 0)
    
    # Rushing (vectorized)
    if 'RUSHING_YDS' in df.columns:
        points += df['RUSHING_YDS'] * scoring_config['rushing'].get('yards', 0)
    if 'RUSHING_TDS' in df.columns:
        points += df['RUSHING_TDS'] * scoring_config['rushing'].get('touchdown', 0)
    
    # Receiving (vectorized)
    if 'RECEIVING_REC' in df.columns:
        points += df['RECEIVING_REC'] * scoring_config['receiving'].get('reception', 0)
    if 'RECEIVING_YDS' in df.columns:
        points += df['RECEIVING_YDS'] * scoring_config['receiving'].get('yards', 0)
    if 'RECEIVING_TDS' in df.columns:
        points += df['RECEIVING_TDS'] * scoring_config['receiving'].get('touchdown', 0)
    
    # Fumbles (vectorized)
    if 'MISC_FL' in df.columns:
        points += df['MISC_FL'] * scoring_config['miscellaneous'].get('fumble_lost', 0)
    
    # Kicking (vectorized)
    if 'XPT' in df.columns:
        points += df['XPT'] * scoring_config['kicking'].get('pat', 0)
    
    # Defense (vectorized)
    if 'SACK' in df.columns:
        points += df['SACK'] * scoring_config['defense'].get('sack', 0)
    if 'INT' in df.columns:
        points += df['INT'] * scoring_config['defense'].get('interception', 0)
    if 'FR' in df.columns:
        points += df['FR'] * scoring_config['defense'].get('fumble_recovery', 0)
    if 'SAFETY' in df.columns:
        points += df['SAFETY'] * scoring_config['defense'].get('safety', 0)
    if 'TD' in df.columns:
        points += df['TD'] * scoring_config['defense'].get('return_touchdowns', 0)
    
    return points.round(2)

# Apply scoring with vectorized operations
if projections is not None:
    projections['FANTASY_PTS'] = calculate_fantasy_points_vectorized(projections, scoring)
    
    # Sort by fantasy points and filter to top 300 players
    projections = projections.sort_values('FANTASY_PTS', ascending=False)
    top_300 = projections.head(300).copy()
    
    print(f"Filtered from {len(projections)} to top 300 players by fantasy points")
    print("\nTop 20 Players by Projected Fantasy Points:")
    display(top_300[['PLAYER', 'POSITION', 'MISC_FPTS', 'FANTASY_PTS']].head(20))

Filtered from 747 to top 300 players by fantasy points

Top 20 Players by Projected Fantasy Points:


Unnamed: 0,PLAYER,POSITION,MISC_FPTS,FANTASY_PTS
1,Lamar Jackson BAL,QB,375.6,367.52
0,Josh Allen BUF,QB,376.7,366.17
2,Jalen Hurts PHI,QB,358.7,349.57
3,Jayden Daniels WAS,QB,355.8,345.45
4,Joe Burrow CIN,QB,341.6,331.05
5,Patrick Mahomes II KC,QB,328.8,317.03
7,Kyler Murray ARI,QB,319.1,307.99
6,Baker Mayfield TB,QB,321.6,307.24
8,Brock Purdy SF,QB,312.1,299.12
9,Bo Nix DEN,QB,308.8,296.48


In [12]:
# Position-specific rankings from top 300
if projections is not None:
    for position in ['QB', 'RB', 'WR', 'TE', 'K', 'DST']:
        pos_df = top_300[top_300['POSITION'] == position].copy()
        if len(pos_df) > 0:
            pos_df['POS_RANK'] = range(1, len(pos_df) + 1)
            
            print(f"\nTop 10 {position}s (from top 300 overall):")
            display(pos_df[['POS_RANK', 'PLAYER', 'MISC_FPTS', 'FANTASY_PTS']].head(10))
            print(f"Total {position}s in top 300: {len(pos_df)}")


Top 10 QBs (from top 300 overall):


Unnamed: 0,POS_RANK,PLAYER,MISC_FPTS,FANTASY_PTS
1,1,Lamar Jackson BAL,375.6,367.52
0,2,Josh Allen BUF,376.7,366.17
2,3,Jalen Hurts PHI,358.7,349.57
3,4,Jayden Daniels WAS,355.8,345.45
4,5,Joe Burrow CIN,341.6,331.05
5,6,Patrick Mahomes II KC,328.8,317.03
7,7,Kyler Murray ARI,319.1,307.99
6,8,Baker Mayfield TB,321.6,307.24
8,9,Brock Purdy SF,312.1,299.12
9,10,Bo Nix DEN,308.8,296.48


Total QBs in top 300: 37

Top 10 RBs (from top 300 overall):


Unnamed: 0,POS_RANK,PLAYER,MISC_FPTS,FANTASY_PTS
102,1,Saquon Barkley PHI,273.1,273.17
103,2,Bijan Robinson ATL,263.2,262.99
104,3,Jahmyr Gibbs DET,258.9,259.23
105,4,Derrick Henry BAL,248.4,248.56
106,5,Christian McCaffrey SF,228.4,228.67
107,6,Josh Jacobs GB,226.1,226.58
108,7,De'Von Achane MIA,223.1,223.46
109,8,Ashton Jeanty LV,217.4,217.63
110,9,Jonathan Taylor IND,216.5,216.36
111,10,Kyren Williams LAR,213.0,213.42


Total RBs in top 300: 72

Top 10 WRs (from top 300 overall):


Unnamed: 0,POS_RANK,PLAYER,MISC_FPTS,FANTASY_PTS
290,1,Ja'Marr Chase CIN,231.2,231.04
291,2,Justin Jefferson MIN,205.1,205.11
292,3,CeeDee Lamb DAL,193.3,193.18
293,4,Puka Nacua LAR,187.7,187.96
294,5,Nico Collins HOU,183.5,183.9
295,6,Malik Nabers NYG,183.3,183.07
296,7,Brian Thomas Jr. JAC,182.1,182.18
297,8,Amon-Ra St. Brown DET,180.9,181.43
298,9,A.J. Brown PHI,172.8,173.07
299,10,Drake London ATL,169.8,169.97


Total WRs in top 300: 109

Top 10 TEs (from top 300 overall):


Unnamed: 0,POS_RANK,PLAYER,MISC_FPTS,FANTASY_PTS
521,1,Brock Bowers LV,149.7,148.75
522,2,George Kittle SF,147.6,147.71
523,3,Trey McBride ARI,139.4,139.13
524,4,Mark Andrews BAL,117.3,117.3
525,5,Sam LaPorta DET,115.6,115.79
526,6,Travis Kelce KC,106.9,106.9
527,7,T.J. Hockenson MIN,106.6,106.52
528,8,David Njoku CLE,100.4,100.35
529,9,Tucker Kraft GB,92.5,92.49
530,10,Tyler Warren IND,88.6,88.35


Total TEs in top 300: 40

Top 10 Ks (from top 300 overall):


Unnamed: 0,POS_RANK,PLAYER,MISC_FPTS,FANTASY_PTS
683,1,,0.0,51.1
685,2,,0.0,48.4
686,3,,0.0,47.5
679,4,,0.0,47.3
687,5,,0.0,47.2
681,6,,0.0,46.2
680,7,,0.0,46.1
690,8,,0.0,44.3
689,9,,0.0,42.3
691,10,,0.0,42.2


Total Ks in top 300: 10

Top 10 DSTs (from top 300 overall):


Unnamed: 0,POS_RANK,PLAYER,MISC_FPTS,FANTASY_PTS
715,1,,0.0,116.2
716,2,,0.0,116.2
717,3,,0.0,115.5
718,4,,0.0,115.4
719,5,,0.0,114.4
720,6,,0.0,112.1
721,7,,0.0,110.7
722,8,,0.0,108.7
723,9,,0.0,108.4
724,10,,0.0,108.2


Total DSTs in top 300: 32


In [13]:
# Save processed rankings (top 300 only)
if projections is not None:
    # Add overall rank column
    top_300['OVERALL_RANK'] = range(1, len(top_300) + 1)
    
    # Position distribution in top 300
    pos_dist = top_300['POSITION'].value_counts()
    print("\nPosition distribution in top 300:")
    for pos, count in pos_dist.items():
        print(f"{pos}: {count} players")
    
    # Basic validation
    print(f"\nValidation:")
    print(f"- Total players: {len(top_300)}")
    print(f"- Fantasy points range: {top_300['FANTASY_PTS'].min():.2f} - {top_300['FANTASY_PTS'].max():.2f}")
    print(f"- All stat columns numeric: {top_300.select_dtypes(include=[np.number]).shape[1]} numeric columns")
    
    output_file = f'data/rankings_top300_{datetime.now().strftime("%Y%m%d")}.csv'
    top_300.to_csv(output_file, index=False)
    print(f"\nTop 300 rankings saved to: {output_file}")


Position distribution in top 300:
WR: 109 players
RB: 72 players
TE: 40 players
QB: 37 players
DST: 32 players
K: 10 players

Validation:
- Total players: 300
- Fantasy points range: 41.79 - 367.52
- All stat columns numeric: 27 numeric columns

Top 300 rankings saved to: data/rankings_top300_20250814.csv
