In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the main CSV file
file_path = '/Users/justin/Documents/git/NHL/analysis/assets/c/team_season_complete.csv'
df = pd.read_csv(file_path)

# Load the Arizona team stats CSV file
arizona_file_path = '/Users/justin/Documents/git/NHL/analysis/assets/c/az/team_season_complete_ARI.csv'
df_ari = pd.read_csv(arizona_file_path)

# Filter Arizona data for the 2023 season
ari2023 = df_ari[df_ari['season'] == 2023]
ari2022 = df_ari[df_ari['season'] == 2022]

# Define the other groups based on 'rk' column
group1 = df[df['rk'] == 1.0]  # Stanley Cup winners
group2 = df[df['rk'] == 2.0]  # Runners-up
group3 = df[(df['rk'] >= 3.0) & (df['rk'] <= 31.0)]  # Playoff teams
group32 = df[df['rk'] == 32.0]  # Teams that missed the playoffs

# List of groups including the new Arizona group
groups = {'Group1': group1, 'Group2': group2, 'Group3': group3, 'Group32': group32, 'AZ 2023': ari2023, 'AZ 2022': ari2022}

# Function to clean data by removing empty cells (NaNs)
def clean_data(data):
    return data.dropna()

# Function to plot specified columns across groups
def plot_columns_across_groups(columns_to_plot):
    for col in columns_to_plot:
        if col not in df.columns:
            print(f"Column '{col}' not found in the DataFrame. Skipping...")
            continue

        # Clean data by removing NaNs before plotting
        group1_clean = clean_data(group1[col])
        group2_clean = clean_data(group2[col])
        group3_clean = clean_data(group3[col])
        group32_clean = clean_data(group32[col])
        ari2023_clean = clean_data(ari2023[col])
        ari2022_clean = clean_data(ari2022[col])

        plt.figure(figsize=(6.2, 4 * 0.65))
        plt.title(f'Comparison of {col} across Groups')
        plt.boxplot([group1_clean, group2_clean, group3_clean, group32_clean, ari2022_clean, ari2023_clean], 
                    labels=['Champions', 'Runner-Up', 'Playoffs', 'non-Playoff', 'ARI 2022', 'ARI 2023'], notch=True)
        plt.ylabel(col)
        plt.show()

# Example usage: specify columns to plot
columns_to_plot = ['lowDangerGoalsAgainst','mediumDangerGoalsAgainst','highDangerGoalsAgainst','lowDangerGoalsFor','mediumDangerGoalsFor','highDangerGoalsFor', 'missedShotsFor', 'missedShotsAgainst','l', 'highDangerxGoalsAgainst', 'reboundsAgainst', 'highDangerShotsAgainst', 'dZoneGiveawaysFor','dZoneGiveawaysAgainst', 'reboundGoalsAgainst', 'reboundGoalsFor','xGoalsFromActualReboundsOfShotsAgainst', 'reboundxGoalsAgainst', 'goalsAgainst', 'ga', 'missedShotsAgainst', 'reboundsFor', 'ga/gp', 'highDangerxGoalsFor', 'highDangerShotsFor', 'scoreVenueAdjustedxGoalsAgainst', 'playContinuedInZoneAgainst', 'xGoalsAgainst', 'flurryScoreVenueAdjustedxGoalsAgainst', 'scoreAdjustedTotalShotCreditAgainst', 'playStoppedFor', 'playStoppedAgainst', 's/o win', 'ot', 'p%', 'p', 'rw', 'w', 'row', 'freezeFor', 'shots/gp', 'mediumDangerShotsFor', 'fow%', 'savedShotsOnGoalFor', 'fenwickPercentage', 'takeawaysFor', 'mediumDangerxGoalsFor']

plot_columns_across_groups(columns_to_plot)