In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the main CSV file
file_path = 'assets/team_stats_complete_wip.csv'
df = pd.read_csv(file_path)

# Load the Arizona team stats CSV file
arizona_file_path = 'assets/team_stats_arizona.csv'
df_ari = pd.read_csv(arizona_file_path)

# Filter Arizona data for the 2023 season
groupARI = df_ari[df_ari['season'] == 2023]

# Define the other groups based on 'rk' column
group1 = df[df['rk'] == 1.0]  # Stanley Cup winners
group2 = df[df['rk'] == 2.0]  # Runners-up
group3 = df[(df['rk'] >= 3.0) & (df['rk'] <= 16.0)]  # Playoff teams
group32 = df[df['rk'] == 32.0]  # Teams that missed the playoffs

# List of groups including the new Arizona group
groups = {'Group1': group1, 'Group2': group2, 'Group3': group3, 'Group32': group32, 'GroupARI': groupARI}

# Function to clean data by removing empty cells (NaNs)
def clean_data(data):
    return data.dropna()

# Calculate and print the average for each column starting from the 6th column
column_start_index = 5  # Adjusting for zero-based indexing

for group_name, group_data in groups.items():
    # Filter out the 'iceTime' column if present
    filtered_data = group_data.iloc[:, column_start_index:].drop(columns=['iceTime'], errors='ignore')
    
    # print(f"\nAverages for {group_name}:")
    # averages = filtered_data.mean()
    # print(averages)

    # # Optional: Visualize the averages with bar plots
    # averages.plot(kind='bar', figsize=(10, 6), title=f"Average Statistics for {group_name}")
    # plt.ylabel('Average Value')
    # plt.xlabel('Statistic')
    # plt.show()

# Compare Group1 against other groups visually including GroupARI
for col in df.columns[column_start_index:]:
    if col == 'iceTime':
        continue  # Skip the 'iceTime' column
    
    # Clean data by removing NaNs before plotting
    group1_clean = clean_data(group1[col])
    group2_clean = clean_data(group2[col])
    group3_clean = clean_data(group3[col])
    group32_clean = clean_data(group32[col])
    groupARI_clean = clean_data(groupARI[col])
    
    plt.figure(figsize=(10, 6))
    plt.title(f'Comparison of {col} across Groups')
    plt.boxplot([group1_clean, group2_clean, group3_clean, group32_clean, groupARI_clean], 
                labels=['Group1', 'Group2', 'Group3', 'Group32', 'GroupARI'], notch=True)
    plt.ylabel(col)
    plt.show()