In [1]:
# Allow output from every line
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages

In [4]:
# File paths
base_path = r"C:\Users\Christopher\OneDrive - Syracuse University\PythonSportAnalytics\Section_8\Final_Project\CSV_Files"
roster_files = [
    os.path.join(base_path, f"cfbstats_team_rosters_{year}.csv") for year in [2022, 2023, 2024]
]
win_stats_file = os.path.join(base_path, "combined_fbs_stats_2022_2024.csv")
# Path to save visualizations
visualizations_path = r"C:\Users\Christopher\OneDrive - Syracuse University\PythonSportAnalytics\Section_8\Final_Project\Visualizations\Height_Weight_Winning_Percentage"

# Function to convert height from ft-in format to inches
def convert_height(height):
    if isinstance(height, str) and "'" in height:
        try:
            parts = height.split("'")
            feet = int(parts[0].strip())
            inches = int(parts[1].strip().replace('"', '')) if len(parts) > 1 and parts[1].strip() else 0
            return feet * 12 + inches
        except ValueError:
            return np.nan
    return np.nan

# Function to calculate BMI (Body Mass Index)
def calculate_bmi(weight, height_in_inches):
    # Convert height from inches to meters
    height_in_meters = height_in_inches * 0.0254
    # Convert weight from pounds to kilograms
    weight_in_kg = weight * 0.453592
    # Calculate BMI
    return weight_in_kg / (height_in_meters ** 2)

# Load and process roster data
all_rosters = []
for file in roster_files:
    if os.path.exists(file):
        df = pd.read_csv(file)
        df["Ht"] = df["Ht"].apply(convert_height)
        df["Wt"] = pd.to_numeric(df["Wt"], errors='coerce')
        # Calculate BMI for each player
        df["BMI"] = df.apply(lambda row: calculate_bmi(row["Wt"], row["Ht"]) if not np.isnan(row["Ht"]) else np.nan, axis=1)
        all_rosters.append(df)

roster_data = pd.concat(all_rosters, ignore_index=True)

# Compute average BMI per team per season
team_bmi_metrics = roster_data.groupby(["Team", "Year"])[["BMI"]].mean().reset_index()

# Compute overall FBS average BMI
fbs_avg_bmi = roster_data["BMI"].mean()

# Load win statistics
df_win_stats = pd.read_csv(win_stats_file)[["Team", "Year", "Pct", "W", "L"]]

# Merge team BMI with win stats
df_merged = pd.merge(team_bmi_metrics, df_win_stats, on=["Team", "Year"], how="left")

# Visualization: Scatter plots for BMI vs win%
sns.set_style("whitegrid")

# Create a function to save each plot as a separate PDF
def save_single_pdf(plot_func, filename):
    with PdfPages(os.path.join(visualizations_path, filename)) as pdf:
        plot_func()  # Call the plotting function
        pdf.savefig()  # Save the plot to the PDF
        plt.close()  # Close the plot to release memory after saving it

# Compute combined average BMI and win percentage per team (across all seasons)
team_bmi_metrics_combined = roster_data.groupby("Team")[["BMI"]].mean().reset_index()

# Compute the combined win percentage per team (averaging across seasons)
win_percentage_combined = df_win_stats.groupby("Team")["Pct"].mean().reset_index()

# Merge the combined team BMI with the combined win percentage
df_merged_combined = pd.merge(team_bmi_metrics_combined, win_percentage_combined, on="Team", how="left")

# Visualization 1: Scatter plot for combined average BMI vs win% (across all seasons)
def plot_bmi_vs_win_percentage():
    plt.figure(figsize=(12, 8))
    sns.regplot(x="BMI", y="Pct", data=df_merged_combined, scatter_kws={"color": "purple"}, line_kws={"color": "red"})
    plt.title("Average BMI vs Win Percentage (All Seasons Combined)")
    plt.xlabel("Average Team BMI")
    plt.ylabel("Win Percentage")
    plt.tight_layout()

    save_single_pdf(lambda: plt.plot(), "bmi_vs_win_percentage_all_seasons_combined.pdf")

# Function to annotate bars with record and win percentage inside the bar
def annotate_bars_with_record_and_win_pct(ax, data, team_stats):
    for p in ax.patches:
        height = p.get_height()
        x = p.get_x() + p.get_width() / 2
        
        # Access team name using the correct index
        team_name = data.iloc[int(x)]['Team']
        
        # Get the record and win percentage for the team
        team_data = team_stats[team_stats["Team"] == team_name]
        record = f"{team_data['W'].values[0]}-{team_data['L'].values[0]}"
        win_pct = f"{team_data['Pct'].values[0]:.3f}"
        
        # Annotate the bar with record and win percentage inside the bar
        ax.text(x, height / 2, f"{record}\n({win_pct})", ha="center", va="center", fontsize=9, color="white", fontweight='bold')

# Visualization 2: Top 25 teams by BMI for each season
def plot_top_25_bmi_single_season():
    for year in [2022, 2023, 2024]:
        top_25_bmi = df_merged[df_merged["Year"] == year].sort_values(by="BMI", ascending=False).head(25)
        
        plt.figure(figsize=(16, 12))
        ax = sns.barplot(x="Team", y="BMI", data=top_25_bmi)
        plt.axhline(fbs_avg_bmi, color='orange', linestyle='--', label=f'FBS Average BMI: {fbs_avg_bmi:.2f}')
        plt.title(f"Top 25 Teams by BMI (Year {year})")
        plt.xlabel("Team")
        plt.ylabel("Average Team BMI")
        plt.xticks(rotation=90)
        plt.legend()
        plt.tight_layout()
        
        # Annotate the bars with record and win percentage
        annotate_bars_with_record_and_win_pct(ax, top_25_bmi, df_win_stats)

        save_single_pdf(lambda: plt.plot(), f"top_25_bmi_{year}.pdf")

# Visualization 3: Bottom 25 teams by BMI for each season
def plot_bottom_25_bmi_single_season():
    for year in [2022, 2023, 2024]:
        bottom_25_bmi = df_merged[df_merged["Year"] == year].sort_values(by="BMI", ascending=True).head(25)
        
        plt.figure(figsize=(16, 12))
        ax = sns.barplot(x="Team", y="BMI", data=bottom_25_bmi)
        plt.axhline(fbs_avg_bmi, color='orange', linestyle='--', label=f'FBS Average BMI: {fbs_avg_bmi:.2f}')
        plt.title(f"Bottom 25 Teams by BMI (Year {year})")
        plt.xlabel("Team")
        plt.ylabel("Average Team BMI")
        plt.xticks(rotation=90)
        plt.legend()
        plt.tight_layout()
        
        # Annotate the bars with record and win percentage
        annotate_bars_with_record_and_win_pct(ax, bottom_25_bmi, df_win_stats)

        save_single_pdf(lambda: plt.plot(), f"bottom_25_bmi_{year}.pdf")


# Create the visualizations
plot_bmi_vs_win_percentage()
plot_top_25_bmi_single_season()
plot_bottom_25_bmi_single_season()

print("All visualizations saved to PDF.")


All visualizations saved to PDF.
