In [None]:
# Allow output from every line
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages
from scipy.stats import linregress

In [4]:
# File paths
base_path = r"C:\Users\Christopher\OneDrive - Syracuse University\PythonSportAnalytics\Section_8\Final_Project\CSV_Files"
roster_files = [
    os.path.join(base_path, f"cfbstats_team_rosters_{year}.csv") for year in [2022, 2023, 2024]
]
win_stats_file = os.path.join(base_path, "combined_fbs_stats_2022_2024.csv")
passing_defense_file = os.path.join(base_path, "cfbstats_team_passing_stats_defense_2022_2024.csv")
rushing_defense_file = os.path.join(base_path, "cfbstats_team_rushing_stats_defense_2022_2024.csv")
per_game_stats_file = os.path.join(base_path, "fbs_total_team_stats_per_game_average_by_season_2022_2024.csv")
# Path to save visualizations
visualizations_path = r"C:\Users\Christopher\OneDrive - Syracuse University\PythonSportAnalytics\Section_8\Final_Project\Visualizations\Position_Analysis"

# Position groups for analysis
position_groups = {
    "Offensive_Rushing": ["OC", "OG", "OL", "OT"],
    "Defensive_Rushing": ["DE", "DL", "NG", "DT", "EDGE"],
    "Offensive_Passing": ["WR", "TE"],
    "Defensive_Passing": ["CB", "DB", "S", "SAF"]
}

# Function to convert height from ft-in format to inches
def convert_height(height):
    if isinstance(height, str) and "'" in height:
        try:
            parts = height.split("'")
            feet = int(parts[0].strip())
            inches = int(parts[1].strip().replace('"', '')) if len(parts) > 1 and parts[1].strip() else 0
            return feet * 12 + inches
        except ValueError:
            return np.nan
    return np.nan

# Load and process roster data
all_rosters = []
for file in roster_files:
    if os.path.exists(file):
        df = pd.read_csv(file)
        df["Ht"] = df["Ht"].apply(convert_height)
        df["Wt"] = pd.to_numeric(df["Wt"], errors='coerce')
        all_rosters.append(df)

roster_data = pd.concat(all_rosters, ignore_index=True)

# Create a "Composite_Score" combining height and weight
roster_data["BMI_Composite_Score"] = roster_data["Ht"] + roster_data["Wt"]

# Aggregating height, weight, and composite score by position groups
aggregated_data = {}
for category, positions in position_groups.items():
    filtered_data = roster_data[roster_data["Pos"].isin(positions)]
    aggregated_data[category] = filtered_data.groupby("Team")[["Ht", "Wt", "BMI_Composite_Score"]].mean().reset_index()

# Load win statistics
df_win_stats = pd.read_csv(win_stats_file)[["Team", "Pct", "W", "L"]]
df_win_stats_combined = df_win_stats.groupby("Team")[["Pct", "W", "L"]].mean().reset_index()

# Merge win statistics with aggregated data
for category in aggregated_data:
    aggregated_data[category] = pd.merge(aggregated_data[category], df_win_stats_combined, on="Team", how="left")

# Load offensive stats
df_offensive_stats = pd.read_csv(win_stats_file)[["Team", "Rushing Yds", "Rushing Y/A", "Receiving Yds_Rec", "Receiving Y/G_Rec", "Scrimmage Avg", "TD_Scrimmage"]]
df_offensive_stats_combined = df_offensive_stats.groupby("Team").mean().reset_index()

# Merge offensive stats with aggregated data only for relevant categories (Rushing + Passing)
for category in aggregated_data:
    if "Rushing" in category:
        aggregated_data[category] = pd.merge(aggregated_data[category], df_offensive_stats_combined[["Team", "Rushing Yds", "Rushing Y/A"]], on="Team", how="left")
    else:
        aggregated_data[category] = pd.merge(aggregated_data[category], df_offensive_stats_combined[["Team", "Receiving Yds_Rec", "Receiving Y/G_Rec"]], on="Team", how="left")

# Load defensive stats (both passing and rushing)
df_passing_defense = pd.read_csv(passing_defense_file)[["Team", "Yards", "Yards/Att", "TD", "Yards/G"]]
df_rushing_defense = pd.read_csv(rushing_defense_file)[["Team", "Yards", "Avg.", "TD", "Yards/G"]]

# Aggregate defensive stats
df_passing_defense_agg = df_passing_defense.groupby("Team").mean().reset_index()
df_rushing_defense_agg = df_rushing_defense.groupby("Team").mean().reset_index()

# Merge with aggregated data (only passing defense with passing-related categories, and rushing defense with rushing categories)
for category in aggregated_data:
    if "Rushing" in category:
        aggregated_data[category] = pd.merge(aggregated_data[category], df_rushing_defense_agg, on="Team", how="left")
    else:
        aggregated_data[category] = pd.merge(aggregated_data[category], df_passing_defense_agg, on="Team", how="left")

# Function to plot scatter plots with trend lines
def plot_scatter_with_trend_line(data, x, y, category, positions):
    plt.figure(figsize=(8, 6))
    
    # Add position group title above the main title
    plt.suptitle(f"Positions: {', '.join(positions)}", fontsize=12, fontweight="bold")
    
    # Main title
    plt.title(f"{category}: {x} vs {y}", fontsize=14)
    
    sns.scatterplot(x=x, y=y, data=data)
    sns.regplot(x=x, y=y, data=data, scatter=False, color='red', line_kws={"color": "red"})
    
    plt.xlabel(x)
    plt.ylabel(y)

# Function to save plots as separate PDF files
def save_plots_to_individual_pdfs():
    for category, category_data in aggregated_data.items():
        file_name = f"position_analysis_{category}.pdf"
        file_path = os.path.join(visualizations_path, file_name)
        
        with PdfPages(file_path) as pdf:
            positions = position_groups.get(category, [])  # Get positions for category
            
            if "Rushing" in category:
                plot_scatter_with_trend_line(category_data, "BMI_Composite_Score", "Rushing Yds", category, positions)
            elif "Passing" in category:
                plot_scatter_with_trend_line(category_data, "BMI_Composite_Score", "Receiving Yds_Rec", category, positions)
            
            # Save the plot to its respective PDF file
            pdf.savefig()
            plt.close()

# Generate and save each plot as an individual PDF file
save_plots_to_individual_pdfs()
