In [1]:
# =============================================================================
#  STEP 1: SETUP AND DATA LOADING
# =============================================================================
import pandas as pd
import numpy as np
import re
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.model_selection import GridSearchCV
import warnings

warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning)

print("--- Step 1: Loading and Preparing Player Data ---")

def clean_col_names(df):
    cols = df.columns
    new_cols = [re.sub(r'[^a-zA-Z0-9_]+', '_', col.lower()).strip('_') for col in cols]
    df.columns = new_cols
    return df

def standardize_player_name(name):
    if not isinstance(name, str): return ''
    name = name.lower()
    name = re.sub(r'[.\'"]', '', name)
    name = re.sub(r'\s+(jr|sr|ii|iii|iv|v)$', '', name)
    return name.strip()

try:
    df_2024 = pd.read_csv('FantasyData2024_clean.csv')
    df_2023 = pd.read_csv('FantasyData2023_clean.csv')
    print("Successfully loaded player CSVs.")
except FileNotFoundError as e:
    print(f"Error loading CSVs: {e}.")
    df_2024, df_2023 = pd.DataFrame(), pd.DataFrame()

if not df_2024.empty and not df_2023.empty:
    df_2024, df_2023 = clean_col_names(df_2024), clean_col_names(df_2023)
    for df in [df_2023, df_2024]:
        df['player_standardized'] = df['player'].apply(standardize_player_name)
        if 'player_touches' not in df.columns: df['player_touches'] = 0
        if 'total_td' not in df.columns: df['total_td'] = 0

    df_merged = pd.merge(df_2023, df_2024, on='player_standardized', how='outer', suffixes=('_2023', '_2024'))
    df_merged['player'] = df_merged['player_2024'].fillna(df_merged['player_2023'])
    
    df_merged['tm'] = np.where(df_merged['tm_2024'].notna() & (df_merged['tm_2024'] != 0), df_merged['tm_2024'], df_merged['tm_2023'])
    df_merged['tm'] = df_merged['tm'].fillna('FA')

    df_merged['fantpos'] = np.where(df_merged['fantpos_2024'] != 0, df_merged['fantpos_2024'], df_merged['fantpos_2023'])
    df_merged['age'] = df_merged['age_2024'].fillna(0).astype(int)
    
    df_merged.fillna(0, inplace=True)
    print("Player data merged using standardized names.")

# =============================================================================
#  STEP 2: TEAM-LEVEL ANALYSIS
# =============================================================================
print("\n--- Step 2: Analyzing Team Offensive Environments ---")

def aggregate_team_stats(df, year_suffix):
    df_agg = df[df['tm'] != 0].groupby('tm').agg(
        pass_att_team=('pass_att', 'sum'), rush_att_team=('rush_att', 'sum'),
        pass_yds_team=('pass_yds', 'sum'), rush_yds_team=('rush_yds', 'sum'),
        ppr_team=('ppr', 'sum'), total_td_team=('total_td', 'sum')
    ).reset_index()

    df_agg['total_plays_team'] = df_agg['pass_att_team'] + df_agg['rush_att_team']
    df_agg['total_yards_team'] = df_agg['pass_yds_team'] + df_agg['rush_yds_team']
    
    with np.errstate(divide='ignore', invalid='ignore'):
        df_agg['pass_rate_team'] = (df_agg['pass_att_team'] / df_agg['total_plays_team']).fillna(0)
        df_agg['yards_per_play_team'] = (df_agg['total_yards_team'] / df_agg['total_plays_team']).fillna(0)
    
    df_agg['offense_score'] = (df_agg['yards_per_play_team'].rank(pct=True) + df_agg['total_td_team'].rank(pct=True)) / 2
    
    df_agg.columns = ['tm'] + [f"{col}{year_suffix}" for col in df_agg.columns if col != 'tm']
    return df_agg

team_stats_2023 = aggregate_team_stats(df_2023, '_2023')
team_stats_2024 = aggregate_team_stats(df_2024, '_2024')

print("\n--- 2024 Team-Level Offensive Stats (Gut Check) ---")
print(team_stats_2024.sort_values(by='offense_score_2024', ascending=False).to_string(index=False))

df_merged = pd.merge(df_merged, team_stats_2023, left_on='tm_2023', right_on='tm', how='left').drop(columns=['tm_x'], errors='ignore')
df_merged = pd.merge(df_merged, team_stats_2024, left_on='tm_2024', right_on='tm', how='left').drop(columns=['tm_y'], errors='ignore')

df_merged['offensive_momentum'] = (df_merged['offense_score_2024'] - df_merged['offense_score_2023']).rank(pct=True)
df_merged.fillna(0, inplace=True)
print("\nTeam-level features calculated and merged.")

# =============================================================================
#  STEP 3: POSITION-AWARE MACHINE LEARNING
# =============================================================================
print("\n--- Step 3: Predicting 2025 PPR with Position-Aware Models ---")

# Define position-specific feature sets
feature_sets = {
    'QB': ['age', 'g', 'gs', 'pass_att', 'pass_yds', 'pass_td', 'rush_att', 'rush_yds', 'rush_td', 'offense_score'],
    'RB': ['age', 'g', 'gs', 'rush_att', 'rush_yds', 'rush_td', 'tgt', 'rec', 'rec_yds', 'receiving_td', 'offense_score'],
    'WR': ['age', 'g', 'gs', 'tgt', 'rec', 'rec_yds', 'receiving_td', 'rush_att', 'rush_yds', 'offense_score'],
    'TE': ['age', 'g', 'gs', 'tgt', 'rec', 'rec_yds', 'receiving_td', 'offense_score']
}

df_merged['predicted_ppr_2025'] = 0.0

for pos, base_features in feature_sets.items():
    pos_df = df_merged[df_merged['fantpos'] == pos].copy()
    if pos_df.empty: continue

    features_2023 = [f"{feat}_2023" for feat in base_features]
    features_2024 = [f"{feat}_2024" for feat in base_features]
    
    for col in features_2023 + features_2024:
        if col not in pos_df.columns: pos_df[col] = 0

    trainable_players = pos_df[(pos_df['ppr_2023'] > 10) & (pos_df['ppr_2024'] > 10)]
    predictable_players = pos_df[pos_df['ppr_2024'] > 10]

    if len(trainable_players) < 5 or predictable_players.empty:
        print(f"Skipping model for {pos}: Not enough reliable data.")
        continue

    X_train_base = trainable_players[features_2023].replace([np.inf, -np.inf], 0).fillna(0)
    y_train = trainable_players['ppr_2024']
    X_predict_base = predictable_players[features_2024].replace([np.inf, -np.inf], 0).fillna(0)
    
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_base)
    X_predict_scaled = scaler.transform(X_predict_base)

    X_train_final = np.c_[X_train_scaled, np.full(len(X_train_scaled), 0.5)]
    X_predict_final = np.c_[X_predict_scaled, predictable_players['offensive_momentum'].values]
    
    param_grid = {'alpha': [0.1, 1.0, 10.0, 15.0, 20.0, 25.0, 30.0, 50.0]}
    grid_search = GridSearchCV(Ridge(), param_grid, cv=5, scoring='neg_mean_squared_error')
    grid_search.fit(X_train_final, y_train)
    
    best_model = grid_search.best_estimator_
    print(f"Best Parameters for {pos}: {grid_search.best_params_}")
    
    predictions = best_model.predict(X_predict_final)
    
    df_merged.loc[predictable_players.index, 'predicted_ppr_2025'] = predictions
    print(f"Successfully built robust model for {pos}.")

weighted_avg_score = (df_merged['ppr_2024'] * 0.6) + (df_merged['ppr_2023'] * 0.4)
df_merged['predicted_ppr_2025'] = np.where(df_merged['predicted_ppr_2025'] == 0, weighted_avg_score, df_merged['predicted_ppr_2025'])

df_merged['predicted_ppr_2025'] = df_merged['predicted_ppr_2025'].clip(lower=0, upper=500)

# =============================================================================
#  STEP 4: TIERING AND FINAL RANKING
# =============================================================================
print("\n--- Step 4: Generating Tiers and Positional Rankings ---")
final_rankings = []
positions_to_rank = ['QB', 'WR', 'RB', 'TE']
num_tiers = 5

for pos in positions_to_rank:
    pos_df = df_merged[(df_merged['fantpos'] == pos) & (df_merged['predicted_ppr_2025'] > 0)].copy()
    if pos_df.empty: continue
    
    pos_df = pos_df.sort_values(by='predicted_ppr_2025', ascending=False).reset_index(drop=True)
    
    tiering_df = pos_df.head(50).copy()
    if tiering_df.empty:
        final_rankings.append(pos_df)
        continue

    X_cluster = tiering_df[['predicted_ppr_2025', 'age']].values
    k = min(num_tiers, len(tiering_df))
    if k <= 1:
        tiering_df['tier'] = 1
    else:
        kmeans = KMeans(n_clusters=k, random_state=42, n_init=25)
        tiering_df['tier_label'] = kmeans.fit_predict(X_cluster)
        
        tier_centers = tiering_df.groupby('tier_label')['predicted_ppr_2025'].mean().sort_values(ascending=False).reset_index()
        tier_centers['tier'] = range(1, len(tier_centers) + 1)
        tiering_df = pd.merge(tiering_df, tier_centers[['tier_label', 'tier']], on='tier_label', how='left')
    
    if 'tier' not in tiering_df.columns:
        tiering_df['tier'] = 1
    
    pos_df = pd.merge(pos_df, tiering_df[['player_standardized', 'tier']], on='player_standardized', how='left')
    pos_df['tier'] = pos_df['tier'].fillna(num_tiers + 1).astype(int)
    pos_df['pos_rank'] = range(1, len(pos_df) + 1)
    final_rankings.append(pos_df)

if final_rankings:
    final_df = pd.concat(final_rankings, ignore_index=True)
else:
    final_df = pd.DataFrame()

# =============================================================================
#  STEP 5: DISPLAYING THE RESULTS
# =============================================================================
print("\n--- Step 5: Final Rankings and Tiers ---")

if not final_df.empty:
    display_cols = [
        'player', 'tm', 'fantpos', 'age', 'pos_rank', 'tier',
        'predicted_ppr_2025', 'ppr_2024', 'ppr_2023', 'offensive_momentum'
    ]
    
    for col in display_cols:
        if col not in final_df.columns:
            final_df[col] = 0

    final_df = final_df[display_cols].copy()
    final_df['predicted_ppr_2025'] = final_df['predicted_ppr_2025'].round(1)
    final_df['offensive_momentum'] = final_df['offensive_momentum'].round(2)

    for pos in positions_to_rank:
        print(f"\n====================== TOP 20 {pos} RANKINGS FOR 2025 ======================")
        display_data = final_df[final_df['fantpos'] == pos]
        if display_data.empty:
            print(f"No players found for position {pos}")
        else:
            print(display_data.head(20).to_string(index=False))
else:
    print("Could not generate final rankings due to data issues.")


  from pandas.core.computation.check import NUMEXPR_INSTALLED


--- Step 1: Loading and Preparing Player Data ---
Successfully loaded player CSVs.
Player data merged using standardized names.

--- Step 2: Analyzing Team Offensive Environments ---

--- 2024 Team-Level Offensive Stats (Gut Check) ---
 tm  pass_att_team_2024  rush_att_team_2024  pass_yds_team_2024  rush_yds_team_2024  ppr_team_2024  total_td_team_2024  total_plays_team_2024  total_yards_team_2024  pass_rate_team_2024  yards_per_play_team_2024  offense_score_2024
BAL                 477               554.0                4189              3189.0         1743.8                  63                 1031.0                 7378.0             0.462658                  7.156159            0.984375
DET                 550               533.0                4701              2487.0         1841.1                  68                 1083.0                 7188.0             0.507849                  6.637119            0.953125
TAM                 570               477.0                4500     

Best Parameters for TE: {'alpha': 10.0}
Successfully built robust model for TE.

--- Step 4: Generating Tiers and Positional Rankings ---

--- Step 5: Final Rankings and Tiers ---

            player  tm fantpos  age  pos_rank  tier  predicted_ppr_2025  ppr_2024  ppr_2023  offensive_momentum
     Lamar Jackson BAL      QB   27         1     1               357.1     430.4     331.2                0.63
    Jayden Daniels WAS      QB   24         2     1               312.3     355.8       0.0                0.00
    Baker Mayfield TAM      QB   29         3     1               302.1     365.8     274.1                0.97
        Joe Burrow CIN      QB   28         4     1               289.4     372.8     147.2                0.82
        Josh Allen BUF      QB   28         5     1               284.8     379.0     392.6                0.45
       Jalen Hurts PHI      QB   26         6     1               278.7     315.1     356.8                0.54
        Jared Goff DET      QB   30