# 06 advanced metrics Notebook

This notebook implements the analysis for the 06 advanced metrics stage of the Fantasy Football Analysis project.

In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os

# Add the src directory to path
sys.path.append('..')

# Set display options
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 1000)

In [2]:
# TODO: Implement 06 advanced metrics analysis

In [3]:
# Import project modules
from src.data.data_loader import load_config, load_all_data
from src.data.data_loader import load_csv_data
from src.analysis.advanced_metrics import (
    analyze_efficiency_metrics,
    analyze_pff_grade_correlations
)
from src.visualization.advanced_metrics_vis import (
    plot_efficiency_correlations,
    plot_efficiency_outliers,
    plot_pff_correlations
)
from src.utils.validation import validate_analysis_output

# Set pandas display options
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 1000)


ImportError: cannot import name 'plot_efficiency_outliers' from 'src.visualization.advanced_metrics_vis' (/Users/henrymarsh/Documents/fantasy_football_analysis/src/visualization/advanced_metrics_vis.py)

In [4]:
# 1. Load configuration and processed data
config = load_config()
opportunity_df = load_csv_data(os.path.join(config['data_paths']['processed_data'], 'opportunity_share.csv'))

# Load additional position-specific data
data_dict = load_all_data(config)
passing_df = data_dict['passing_data']
receiving_df = data_dict['receiving_data']
rushing_df = data_dict['rushing_data']

# Merge advanced metrics into main dataframe
player_df = opportunity_df.copy()

2025-03-28 21:14:02,185 - INFO - Configuration loaded from config/config.yaml
2025-03-28 21:14:02,186 - INFO - Loading data from data/processed/opportunity_share.csv
2025-03-28 21:14:02,188 - ERROR - Error loading data from data/processed/opportunity_share.csv: No columns to parse from file


EmptyDataError: No columns to parse from file

In [None]:
# 2. Analyze efficiency metrics
efficiency_results = analyze_efficiency_metrics(player_df)

# Display efficiency correlations
if 'efficiency_correlations' in efficiency_results:
    print("Efficiency Metric Correlations with Fantasy Points:")
    display(efficiency_results['efficiency_correlations'].sort_values(['Position', 'Correlation'], ascending=[True, False]))

# Display efficiency outliers
if 'efficiency_outliers' in efficiency_results:
    print("\nEfficiency Outliers (Regression/Improvement Candidates):")
    display(efficiency_results['efficiency_outliers'].sort_values(['Position', 'Efficiency_Score'], ascending=[True, False]))

# Display composite efficiency scores
if 'composite_efficiency_scores' in efficiency_results:
    print("\nTop Players by Composite Efficiency Score (by Position):")
    for pos in ['QB', 'RB', 'WR', 'TE']:
        pos_df = efficiency_results['composite_efficiency_scores'][
            efficiency_results['composite_efficiency_scores']['Position'] == pos
        ]
        if not pos_df.empty:
            print(f"\nTop 10 {pos}s by Efficiency:")
            display(pos_df.sort_values('Composite_Efficiency', ascending=False)[
                ['Player', 'Team_std', 'Half_PPR', 'Composite_Efficiency']
            ].head(10))

In [None]:
# 3. Analyze PFF grade correlations
pff_results = analyze_pff_grade_correlations(player_df)

# Display PFF grade correlations
if 'pff_grade_correlations' in pff_results:
    print("\nPFF Grade Correlations with Fantasy Points:")
    display(pff_results['pff_grade_correlations'].sort_values(['Position', 'Correlation'], ascending=[True, False]))

# Display composite PFF scores
if 'composite_pff_scores' in pff_results:
    print("\nTop Players by Composite PFF Score (by Position):")
    for pos in ['QB', 'RB', 'WR', 'TE']:
        pos_df = pff_results['composite_pff_scores'][
            pff_results['composite_pff_scores']['Position'] == pos
        ]
        if not pos_df.empty:
            print(f"\nTop 10 {pos}s by PFF Score:")
            display(pos_df.sort_values('Composite_PFF_Score', ascending=False)[
                ['Player', 'Team_std', 'Half_PPR', 'Composite_PFF_Score']
            ].head(10))

# Display composite PFF correlations
if 'composite_pff_correlations' in pff_results:
    print("\nComposite PFF Score Correlations with Fantasy Points:")
    display(pff_results['composite_pff_correlations'])


In [None]:
# 4. Create visualizations
# Efficiency correlations
if 'efficiency_correlations' in efficiency_results:
    plt.figure(figsize=(12, 8))
    plot_efficiency_correlations(efficiency_results['efficiency_correlations'])
    plt.tight_layout()
    plt.savefig('../outputs/figures/efficiency_correlations.png')
    plt.show()

# Efficiency outliers
if 'efficiency_outliers' in efficiency_results:
    plt.figure(figsize=(14, 10))
    plot_efficiency_outliers(efficiency_results['efficiency_outliers'])
    plt.tight_layout()
    plt.savefig('../outputs/figures/efficiency_outliers.png')
    plt.show()

# PFF correlations
if 'pff_grade_correlations' in pff_results:
    plt.figure(figsize=(12, 8))
    plot_pff_correlations(pff_results['pff_grade_correlations'])
    plt.tight_layout()
    plt.savefig('../outputs/figures/pff_correlations.png')
    plt.show()

In [None]:
# 5. Save processed data
for key, df in efficiency_results.items():
    df.to_csv(os.path.join(config['data_paths']['processed_data'], f"{key}.csv"), index=False)

for key, df in pff_results.items():
    df.to_csv(os.path.join(config['data_paths']['processed_data'], f"{key}.csv"), index=False)

print("Advanced metrics analysis completed!")