In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import random
import os

In [6]:
import warnings
warnings.simplefilter(action='ignore')

In [2]:
pd.set_option('display.max_columns', None)
pd.reset_option('display.max_rows', None)

In [4]:
df = pd.read_csv("b2b_updated_final4.csv")
df_player = pd.read_csv("player_performance_data.csv")
df_team = pd.read_csv("team_data_updated.csv")


# Calculating the per game stats for the team dataset

In [9]:

numeric_columns = df_team.select_dtypes(include=['float64', 'int64']).columns.tolist()

non_stat_columns = ['Unnamed: 0', 'game_id', 'game_played','season','3_in_4', '5_in_8', 'b2b']
team_columns_of_interest = [col for col in numeric_columns if col not in non_stat_columns]

team_sum_stats = df_team.groupby(['team', 'season', 'b2b']).sum().reset_index()

game_counts = df_team.groupby(['team', 'season', 'b2b']).size().reset_index(name='num_games')

team_sum_stats = team_sum_stats.merge(game_counts, on=['team', 'season', 'b2b'])

for column in team_columns_of_interest:
    team_sum_stats[f'avg_{column}'] = team_sum_stats[column] / team_sum_stats['num_games']

In [10]:
game_counts.head(5)

Unnamed: 0,team,season,b2b,num_games
0,Anaheim Ducks,20092010,0,65
1,Anaheim Ducks,20092010,1,17
2,Anaheim Ducks,20102011,0,67
3,Anaheim Ducks,20102011,1,15
4,Anaheim Ducks,20112012,0,69


In [11]:
team_sum_stats.head(5)

Unnamed: 0.1,team,season,b2b,Unnamed: 0,game_id,game_played,team_GF,team_GA,extra_info,team_shots,team_PIM,team_PPG,team_PPO,team_SHG,extra_info_1,opp_shots,opp_PIM,opp_PPG,opp_PPO,opp_SHG,extra_info_2,team_CF,team_CA,team_CF_percentage,team_FF,team_FA,team_FF_percentage,team_FOW,team_FOL,team_FO_win_percentage,team_oZS_percentage,team_PDO,3_in_4,5_in_8,num_games,avg_team_GF,avg_team_GA,avg_extra_info,avg_team_shots,avg_team_PIM,avg_team_PPG,avg_team_PPO,avg_team_SHG,avg_extra_info_1,avg_opp_shots,avg_opp_PIM,avg_opp_PPG,avg_opp_PPO,avg_opp_SHG,avg_extra_info_2,avg_team_CF,avg_team_CA,avg_team_CF_percentage,avg_team_FF,avg_team_FA,avg_team_FF_percentage,avg_team_FOW,avg_team_FOL,avg_team_FO_win_percentage,avg_team_oZS_percentage,avg_team_PDO
0,Anaheim Ducks,20092010,0,2663,2663,2630,198,185,0.0,2029,1000,50,0.0,8,0.0,2138,1003,48,0.0,7,0.0,2730.0,2927.0,3137.4,2171.0,2366,3119.8,1314,1371,3185.2,3290.7,6555.0,8,6,65,3.046154,2.846154,0.0,31.215385,15.384615,0.769231,0.0,0.123077,0.0,32.892308,15.430769,0.738462,0.0,0.107692,0.0,42.0,45.030769,48.267692,33.4,36.4,47.996923,20.215385,21.092308,49.003077,50.626154,100.846154
1,Anaheim Ducks,20092010,1,786,786,773,40,66,0.0,443,321,13,0.0,0,0.0,598,290,19,0.0,2,0.0,676.0,842.0,756.4,493.0,652,730.8,352,389,809.1,843.4,1678.1,9,6,17,2.352941,3.882353,0.0,26.058824,18.882353,0.764706,0.0,0.0,0.0,35.176471,17.058824,1.117647,0.0,0.117647,0.0,39.764706,49.529412,44.494118,29.0,38.352941,42.988235,20.705882,22.882353,47.594118,49.611765,98.711765
2,Anaheim Ducks,20102011,0,8600,8600,2799,196,180,0.0,1938,867,51,0.0,6,0.0,2122,852,42,0.0,5,0.0,2680.0,3281.0,3012.9,2103.0,2491,3073.4,1367,1521,3175.0,3287.0,6782.8,9,12,67,2.925373,2.686567,0.0,28.925373,12.940299,0.761194,0.0,0.089552,0.0,31.671642,12.716418,0.626866,0.0,0.074627,0.0,40.0,48.970149,44.968657,31.38806,37.179104,45.871642,20.402985,22.701493,47.38806,49.059701,101.235821
3,Anaheim Ducks,20102011,1,1901,1901,604,43,55,0.0,396,311,16,0.0,1,0.0,526,246,15,0.0,2,0.0,542.0,729.0,642.1,407.0,547,641.6,332,339,744.7,740.0,1482.8,6,5,15,2.866667,3.666667,0.0,26.4,20.733333,1.066667,0.0,0.066667,0.0,35.066667,16.4,1.0,0.0,0.133333,0.0,36.133333,48.6,42.806667,27.133333,36.466667,42.773333,22.133333,22.6,49.646667,49.333333,98.853333
4,Anaheim Ducks,20112012,0,14761,14761,2855,171,193,0.0,1905,812,37,0.0,3,0.0,2003,733,42,0.0,3,0.0,2911.0,3056.0,3311.2,2175.0,2291,3313.9,1399,1527,3260.9,3335.8,6839.7,10,10,69,2.478261,2.797101,0.0,27.608696,11.768116,0.536232,0.0,0.043478,0.0,29.028986,10.623188,0.608696,0.0,0.043478,0.0,42.188406,44.289855,47.988406,31.521739,33.202899,48.027536,20.275362,22.130435,47.25942,48.344928,99.126087


# T-tests

In [12]:
from scipy.stats import ttest_ind

results_team_avg = {}
significant_columns = {}  

for column in team_columns_of_interest:
    avg_column = f'avg_{column}'
    
    b2b_values = team_sum_stats[team_sum_stats['b2b'] == 1][avg_column]
    non_b2b_values = team_sum_stats[team_sum_stats['b2b'] == 0][avg_column]
    
    if len(b2b_values) > 1 and len(non_b2b_values) > 1:
        t_stat, p_value = ttest_ind(b2b_values, non_b2b_values)
        results_team_avg[column] = p_value
        
        if p_value < 0.05:
            significant_columns[column] = p_value

print("All Columns:")
for col, p in results_team_avg.items():
    print(f"{col}: {p}")

print("\nSignificant Columns:")
for col, p in significant_columns.items():
    print(f"{col}: {p}")

All Columns:
team_GF: 2.4479621695939573e-06
team_GA: 4.480515397615983e-06
extra_info: nan
team_shots: 4.345792283202355e-06
team_PIM: 0.02396715890215416
team_PPG: 7.530839047784245e-07
team_PPO: 0.5318442289737795
team_SHG: 0.6124804455115467
extra_info_1: nan
opp_shots: 1.002209585684567e-10
opp_PIM: 0.5384606559153238
opp_PPG: 5.032086994035121e-05
opp_PPO: 0.6587372857789171
opp_SHG: 0.3134193453076036
extra_info_2: nan
team_CF: 2.118373517255709e-05
team_CA: 5.907132436586271e-09
team_CF_percentage: 1.5388120621396008e-11
team_FF: 8.535372247929533e-06
team_FA: 3.578675644444439e-10
team_FF_percentage: 4.052974548785665e-13
team_FOW: 0.008626710894139711
team_FOL: 5.634650643194023e-05
team_FO_win_percentage: 7.822886248875213e-07
team_oZS_percentage: 1.1063279211157258e-07
team_PDO: 0.10349040868819252

Significant Columns:
team_GF: 2.4479621695939573e-06
team_GA: 4.480515397615983e-06
team_shots: 4.345792283202355e-06
team_PIM: 0.02396715890215416
team_PPG: 7.530839047784245e-

# Just changed this code to get other congested periods