# 01 player performance Notebook

This notebook implements the analysis for the 01 player performance stage of the Fantasy Football Analysis project.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os

# Add the src directory to path
sys.path.append('..')

# Set display options
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 1000)

In [None]:
# Import project modules
from src.data.data_loader import load_config, load_all_data
from src.data.data_processor import (
    standardize_team_names, 
    filter_season_data, 
    create_master_player_dataset,
    calculate_half_ppr_points,
    save_processed_data
)
from src.analysis.performance import (
    calculate_performance_metrics,
    analyze_expectation_vs_performance
)
from src.visualization.performance_vis import (
    plot_top_performers,
    plot_position_distributions,
    plot_expectation_vs_performance
)
from src.utils.validation import validate_analysis_output

# Set pandas display options
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 1000)

In [None]:
# 1. Load configuration and data
config = load_config()
data_dict = load_all_data(config)

In [None]:
# 2. Process data
data_dict = standardize_team_names(data_dict)
data_dict = filter_season_data(data_dict, config['analysis']['season'])
master_df = create_master_player_dataset(data_dict)
master_df = calculate_half_ppr_points(master_df)

In [None]:
# 3. Calculate performance metrics
performance_df = calculate_performance_metrics(master_df)

# Validate performance data
print(f"Performance dataframe shape: {performance_df.shape}")
print("\nTop 5 players by Half-PPR points:")
display(performance_df.sort_values('Half_PPR', ascending=False)[['Player', 'FantPos', 'Team', 'Half_PPR', 'Half_PPR_PPG', 'G']].head())

In [None]:
# 4. Analyze performance by position
positions = ['QB', 'RB', 'WR', 'TE']
for pos in positions:
    pos_df = performance_df[performance_df['FantPos'] == pos]
    print(f"\nTop 10 {pos}s by Half-PPR points:")
    display(pos_df.sort_values('Half_PPR', ascending=False)[['Player', 'Team', 'Half_PPR', 'Half_PPR_PPG', 'G']].head(10))

In [None]:
# 5. Analyze expectation vs. performance
expectation_df = analyze_expectation_vs_performance(performance_df)

In [None]:
# Show players who significantly overperformed or underperformed
print("\nTop 10 Overperformers (based on ADP):")
display(expectation_df.sort_values('ADP_vs_Actual_Rank_Delta', ascending=False)[
    ['Player', 'FantPos', 'Team', 'ADP', 'Overall_Rank', 'ADP_vs_Actual_Rank_Delta', 'Half_PPR']
].head(10))

print("\nTop 10 Underperformers (based on ADP):")
display(expectation_df.sort_values('ADP_vs_Actual_Rank_Delta')[
    ['Player', 'FantPos', 'Team', 'ADP', 'Overall_Rank', 'ADP_vs_Actual_Rank_Delta', 'Half_PPR']
].head(10))

In [None]:
# 6. Create visualizations
# Top performers by position
plt.figure(figsize=(12, 8))
plot_top_performers(performance_df, n=10)
plt.tight_layout()
plt.savefig('../outputs/figures/top_performers_by_position.png')
plt.show()

# Position point distributions
plt.figure(figsize=(12, 8))
plot_position_distributions(performance_df)
plt.tight_layout()
plt.savefig('../outputs/figures/position_point_distributions.png')
plt.show()

# Expectation vs. performance
plt.figure(figsize=(12, 10))
plot_expectation_vs_performance(expectation_df)
plt.tight_layout()
plt.savefig('../outputs/figures/expectation_vs_performance.png')
plt.show()

In [None]:
# 7. Save processed data
save_processed_data(
    performance_df, 
    'player_performance.csv', 
    config['data_paths']['processed_data']
)
save_processed_data(
    expectation_df, 
    'player_expectation_vs_performance.csv', 
    config['data_paths']['processed_data']
)

print("Player performance analysis completed!")