In [None]:
# Allow output from every line
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages
from scipy import stats

In [None]:
# File paths
base_path = r"C:\Users\Christopher\OneDrive - Syracuse University\PythonSportAnalytics\Section_8\Final_Project\CSV_Files"
roster_files = [
    os.path.join(base_path, f"cfbstats_team_rosters_{year}.csv") for year in [2022, 2023, 2024]
]
win_stats_file = os.path.join(base_path, "combined_fbs_stats_2022_2024.csv")
passing_defense_file = os.path.join(base_path, "cfbstats_team_passing_stats_defense_2022_2024.csv")
rushing_defense_file = os.path.join(base_path, "cfbstats_team_rushing_stats_defense_2022_2024.csv")

# Path to save visualizations
visualizations_path = r"C:\Users\Christopher\OneDrive - Syracuse University\PythonSportAnalytics\Section_8\Final_Project\Visualizations\Height_Weight_Statistics"

In [None]:
# Function to convert height from ft-in format to inches
def convert_height(height):
    if isinstance(height, str) and "'" in height:
        try:
            feet, inches = map(int, height.replace('"', "").split("'"))
            return feet * 12 + inches
        except ValueError:
            return float('nan')
    return float('nan')

In [None]:
# Function to calculate BMI (using height in inches and weight in pounds)
def calculate_bmi(height_in_inches, weight_in_pounds):
    return (weight_in_pounds * 0.453592) / ((height_in_inches * 0.0254) ** 2) if height_in_inches > 0 else float('nan')

In [None]:
# Load and process roster data
all_rosters = []
for file in roster_files:
    if os.path.exists(file):
        df = pd.read_csv(file)
        df["Ht"] = df["Ht"].apply(convert_height)
        df["Wt"] = pd.to_numeric(df["Wt"], errors='coerce')
        # Calculate BMI and add it to the dataframe
        df["BMI"] = df.apply(lambda row: calculate_bmi(row["Ht"], row["Wt"]), axis=1)
        all_rosters.append(df)

roster_data = pd.concat(all_rosters, ignore_index=True)


In [None]:
# Compute average BMI per team (across all seasons)
team_size_metrics = roster_data.groupby(["Team"])[["BMI"]].mean().reset_index()

In [None]:
# Load win statistics
df_win_stats = pd.read_csv(win_stats_file)[["Team", "Year", "Pct", "W", "L"]]

In [None]:
# Aggregate win stats across all seasons for each team
df_win_stats_combined = df_win_stats.groupby("Team")[["Pct", "W", "L"]].mean().reset_index()

In [None]:
# Merge team BMI with win stats
df_merged = pd.merge(team_size_metrics, df_win_stats_combined, on="Team", how="left")

In [None]:
# Load passing defense stats and aggregate across all seasons
df_passing_defense = pd.read_csv(passing_defense_file)[["Team", "Year", "Yards", "Yards/G", "TD", "Int"]]
df_passing_defense_combined = df_passing_defense.groupby("Team")[["Yards", "Yards/G", "TD", "Int"]].mean().reset_index()
df_passing_defense_combined.rename(columns={"Yards": "D_Passing_Yards", "Yards/G": "D_Passing_Yards/G"}, inplace=True)

In [None]:
# Load rushing defense stats and aggregate across all seasons
df_rushing_defense = pd.read_csv(rushing_defense_file)[["Team", "Year", "Yards", "Yards/G", "TD"]]
df_rushing_defense_combined = df_rushing_defense.groupby("Team")[["Yards", "Yards/G", "TD"]].mean().reset_index()
df_rushing_defense_combined.rename(columns={"Yards": "D_Rushing_Yards", "Yards/G": "D_Rushing_Yards/G"}, inplace=True)

In [None]:
# Merge passing and rushing defense into df_merged
df_merged = pd.merge(df_merged, df_passing_defense_combined, on="Team", how="left")
df_merged = pd.merge(df_merged, df_rushing_defense_combined, on="Team", how="left")

In [None]:
# Load per game stats and apply necessary renaming
df_per_game_stats = pd.read_csv(win_stats_file)

In [None]:
# Rename the relevant Offensive Stat columns
df_per_game_stats.rename(columns={
    "Rushing Yds": "O_Rushing_Yards",
    "Receiving Yds_Rec": "O_Receiving_Yards",
    "Rushing Y/G": "O_Rushing_Yards/G",
    "Receiving Y/G_Rec": "O_Receiving_Yards/G"
}, inplace=True)


In [None]:
# Ensure the proper renaming and check the columns after renaming
print(df_per_game_stats.columns)


In [None]:
# Aggregate per game stats across all seasons
df_per_game_stats_combined = df_per_game_stats.groupby("Team")[["O_Rushing_Yards", "O_Receiving_Yards", "O_Rushing_Yards/G", "O_Receiving_Yards/G"]].mean().reset_index()

In [None]:
# Merge per game stats into df_merged
df_merged = pd.merge(df_merged, df_per_game_stats_combined, on="Team", how="left")

In [None]:
# Verify columns again after merging
print(df_merged.columns)

In [None]:
# Function to save each plot as a separate PDF
def save_single_pdf(plot_func, filename):
    with PdfPages(os.path.join(visualizations_path, filename)) as pdf:
        plot_func() 
        pdf.savefig() 
        plt.close() 

In [None]:
# Calculate FBS average BMI
fbs_avg_bmi = df_merged["BMI"].mean()

In [None]:
# Create Bar Chart Visualizations
def plot_top_25_bmi_stats():
    top_25_bmi = df_merged.sort_values(by="BMI", ascending=False).head(25)
    plt.figure(figsize=(16, 12))
    ax = sns.barplot(x="Team", y="BMI", data=top_25_bmi)
    plt.axhline(fbs_avg_bmi, color='orange', linestyle='--', label=f'FBS Average BMI: {fbs_avg_bmi:.2f}')
    plt.title("Top 25 Teams by BMI")
    plt.xlabel("Team")
    plt.ylabel("Average Team BMI")
    plt.xticks(rotation=90)
    plt.legend()
    
    # Add BMI number inside the bars (using BMI value)
    for p in ax.patches:
        ax.annotate(f'{p.get_height():.2f}', 
                    (p.get_x() + p.get_width() / 2., p.get_height() / 2.), 
                    ha='center', va='center', fontsize=10, color='black')
    
    plt.tight_layout()
    save_single_pdf(lambda: plt.plot(), "Top_25_Teams_by_BMI_Stats.pdf")

In [None]:
def plot_bottom_25_bmi_stats():
    bottom_25_bmi = df_merged.sort_values(by="BMI", ascending=True).head(25)
    plt.figure(figsize=(16, 12))
    ax = sns.barplot(x="Team", y="BMI", data=bottom_25_bmi)
    plt.axhline(fbs_avg_bmi, color='orange', linestyle='--', label=f'FBS Average BMI: {fbs_avg_bmi:.2f}')
    plt.title("Bottom 25 Teams by BMI")
    plt.xlabel("Team")
    plt.ylabel("Average Team BMI")
    plt.xticks(rotation=90)
    plt.legend()
    
    # Add BMI number inside the bars (using BMI value)
    for p in ax.patches:
        ax.annotate(f'{p.get_height():.2f}', 
                    (p.get_x() + p.get_width() / 2., p.get_height() / 2.), 
                    ha='center', va='center', fontsize=10, color='black')
    
    plt.tight_layout()
    save_single_pdf(lambda: plt.plot(), "Bottom_25_Teams_by_BMI_Stats.pdf")

In [None]:
# Create Top 25 bar charts for rushing and passing stats
def plot_top_25_by_bmi(stat_column, stat_label, title, filename):
    top_25_stat = df_merged.sort_values(by=stat_column, ascending=False).head(25)
    plt.figure(figsize=(16, 12))
    ax = sns.barplot(x="Team", y=stat_column, data=top_25_stat)
    
    # Add a legend with FBS Average BMI
    plt.legend([f'FBS Average BMI: {fbs_avg_bmi:.2f}'], loc='upper right')

    plt.title(title)
    plt.xlabel("Team")
    plt.ylabel(stat_label)
    
    # Add BMI number inside the bars (using BMI value)
    for p in ax.patches:
        team_name = top_25_stat.iloc[int(p.get_x())]["Team"]  # Extract team name based on x-position
        bmi_value = top_25_stat[top_25_stat["Team"] == team_name]["BMI"].values[0]  # Get BMI for the team
        ax.annotate(f'{bmi_value:.2f}', 
                    (p.get_x() + p.get_width() / 2., p.get_height() / 2.), 
                    ha='center', va='center', fontsize=10, color='black')
    
    plt.xticks(rotation=90)
    plt.tight_layout()
    save_single_pdf(lambda: plt.plot(), filename)

In [None]:
# Create Bottom 25 bar charts
def plot_bottom_25_by_bmi(stat_column, stat_label, title, filename):
    bottom_25_stat = df_merged.sort_values(by=stat_column, ascending=True).head(25)
    plt.figure(figsize=(16, 12))
    ax = sns.barplot(x="Team", y=stat_column, data=bottom_25_stat)

    # Add a legend with FBS Average BMI
    plt.legend([f'FBS Average BMI: {fbs_avg_bmi:.2f}'], loc='upper right')

    plt.title(title)
    plt.xlabel("Team")
    plt.ylabel(stat_label)
    
    # Add BMI number inside the bars (using BMI value)
    for p in ax.patches:
        team_name = bottom_25_stat.iloc[int(p.get_x())]["Team"]  # Extract team name based on x-position
        bmi_value = bottom_25_stat[bottom_25_stat["Team"] == team_name]["BMI"].values[0]  # Get BMI for the team
        ax.annotate(f'{bmi_value:.2f}', 
                    (p.get_x() + p.get_width() / 2., p.get_height() / 2.), 
                    ha='center', va='center', fontsize=10, color='black')
    
    plt.xticks(rotation=90)
    plt.tight_layout()
    save_single_pdf(lambda: plt.plot(), filename)

In [None]:
# Create Top 25 bar charts for rushing and passing stats
plot_top_25_by_bmi('O_Rushing_Yards', 'Offensive Rushing Yards', 'Top 25 Teams by Offensive Rushing Yards', 'Top_25_Teams_by_Offensive_Rushing_Yards.pdf')
plot_top_25_by_bmi('D_Rushing_Yards', 'Defensive Rushing Yards', 'Bottom 25 Teams by Defensive Rushing Yards', 'Bottom_25_Teams_by_Defensive_Rushing_Yards.pdf')
plot_top_25_by_bmi('O_Receiving_Yards', 'Offensive Receiving Yards', 'Top 25 Teams by Offensive Receiving Yards', 'Top_25_Teams_by_Offensive_Receiving_Yards.pdf')
plot_top_25_by_bmi('D_Passing_Yards', 'Defensive Passing Yards', 'Bottom 25 Teams by Defensive Passing Yards', 'Bottom_25_Teams_by_Defensive_Passing_Yards.pdf')

In [None]:
# Create Bottom 25 bar charts for rushing and passing stats
plot_bottom_25_by_bmi('O_Rushing_Yards', 'Offensive Rushing Yards', 'Bottom 25 Teams by Offensive Rushing Yards', 'Bottom_25_Teams_by_Offensive_Rushing_Yards.pdf')
plot_bottom_25_by_bmi('D_Rushing_Yards', 'Defensive Rushing Yards', 'Top 25 Teams by Defensive Rushing Yards', 'Top_25_Teams_by_Defensive_Rushing_Yards.pdf')
plot_bottom_25_by_bmi('O_Receiving_Yards', 'Offensive Receiving Yards', 'Bottom 25 Teams by Offensive Receiving Yards', 'Bottom_25_Teams_by_Offensive_Receiving_Yards.pdf')
plot_bottom_25_by_bmi('D_Passing_Yards', 'Defensive Passing Yards', 'Top 25 Teams by Defensive Passing Yards', 'Top_25_Teams_by_Defensive_Passing_Yards.pdf')

In [None]:
# Plot the top and bottom 25 teams by BMI
plot_top_25_bmi_stats()
plot_bottom_25_bmi_stats()

In [None]:
# Function to generate scatter plots with BMI vs various metrics, including regression lines
def scatter_bmi_vs_metric_with_trendline(x_data, x_label, title, filename):
    plt.figure(figsize=(10, 8))
    sns.regplot(x=x_data, y=df_merged["BMI"], scatter_kws={'s': 100}, line_kws={'color': 'red'})
    plt.title(title)
    plt.xlabel(x_label)
    plt.ylabel("Team BMI")
    plt.tight_layout()
    save_single_pdf(lambda: plt.plot(), filename)

In [None]:
# Scatter plot: BMI vs Offensive Rushing Yards with trend line
scatter_bmi_vs_metric_with_trendline(df_merged["O_Rushing_Yards"], "Offensive Rushing Yards", "BMI vs Offensive Rushing Yards", "BMI_vs_Offensive_Rushing_Yards_with_Trendline.pdf")

In [None]:
# Scatter plot: BMI vs Defensive Rushing Yards with trend line
scatter_bmi_vs_metric_with_trendline(df_merged["D_Rushing_Yards"], "Defensive Rushing Yards", "BMI vs Defensive Rushing Yards", "BMI_vs_Defensive_Rushing_Yards_with_Trendline.pdf")

In [None]:
# Scatter plot: BMI vs Offensive Receiving Yards with trend line
scatter_bmi_vs_metric_with_trendline(df_merged["O_Receiving_Yards"], "Offensive Receiving Yards", "BMI vs Offensive Receiving Yards", "BMI_vs_Offensive_Receiving_Yards_with_Trendline.pdf")

In [3]:
# Scatter plot: BMI vs Defensive Passing Yards with trend line
scatter_bmi_vs_metric_with_trendline(df_merged["D_Passing_Yards"], "Defensive Passing Yards", "BMI vs Defensive Passing Yards", "BMI_vs_Defensive_Passing_Yards_with_Trendline.pdf")

Index(['Team', 'Year', 'G', 'W', 'L', 'Pct', 'Rushing Att', 'O_Rushing_Yards',
       'Rushing Y/A', 'Rushing TD', 'O_Rushing_Yards/G', 'Total Receptions',
       'O_Receiving_Yards', 'Receiving Y/R', 'Receiving TD_Rec',
       'O_Receiving_Yards/G', 'Scrimmage Plays', 'Yds_Scrimmage',
       'Scrimmage Avg', 'TD_Scrimmage', 'Cmp', 'Att', 'Cmp%', 'Yds', 'TD',
       'TD%', 'Int', 'Int%', 'Y/A', 'AY/A', 'Y/C', 'Y/G', 'Rate'],
      dtype='object')
Index(['Team', 'BMI', 'Pct', 'W', 'L', 'D_Passing_Yards', 'D_Passing_Yards/G',
       'TD_x', 'Int', 'D_Rushing_Yards', 'D_Rushing_Yards/G', 'TD_y',
       'O_Rushing_Yards', 'O_Receiving_Yards', 'O_Rushing_Yards/G',
       'O_Receiving_Yards/G'],
      dtype='object')


In [None]:
print("All visualizations saved to PDF.")