In [None]:
# (The NHL and the NHL Shield are registered trademarks of the 
# National Hockey League. NHL and NHL team marks are the property of 
# the NHL and its teams. © NHL 2023. All Rights Reserved.)

import pandas as pd
import datetime
import os
import requests
import json
import matplotlib.pyplot as plt
import numpy as np
import hvplot.pandas
import geopandas as gpd
import seaborn as sns
import scipy.stats as st
from sqlalchemy import create_engine
from random import randint
from time import sleep
from scipy.stats import pearsonr, mannwhitneyu



In [None]:
# Get Stanely Cup Winning and Losing teams since 1927
# Code by Ron Brennan

url = "https://en.wikipedia.org/wiki/List_of_Stanley_Cup_champions"

stanleycup_champs = pd.read_html(url)

stanleycup_byyear_df = stanleycup_champs[2]
stanleycup_byyear_df.head()

In [None]:
# Find Winning Teams from 1991-2023
# Code by Ron Brennan

stanleycup_winner_cleaned_df = stanleycup_byyear_df.loc[stanleycup_byyear_df['Year'].between(1992,2023)]
stanleycup_winner_cleaned_df = stanleycup_winner_cleaned_df[["Year", "Winning team"]]

stanleycup_winner_cleaned_df.reset_index(drop=True, inplace=True)

stanleycup_winner_cleaned_df["Year"] = (stanleycup_winner_cleaned_df["Year"] -1).map(str) + (stanleycup_winner_cleaned_df["Year"]).map(str)
stanleycup_winner_cleaned_df["Winning team"] = stanleycup_winner_cleaned_df["Winning team"].str.split('(').str[0]

stanleycup_winner_cleaned_df.drop(stanleycup_winner_cleaned_df[(stanleycup_winner_cleaned_df['Year'] == "20042005")].index, inplace=True)
stanleycup_winner_cleaned_df.rename(columns={"Winning team":"Team Name"}, inplace=True)
stanleycup_winner_cleaned_df["Team Name"] = stanleycup_winner_cleaned_df["Team Name"].str.strip()

# Change names 
stanleycup_winner_cleaned_df["Team Name"] = stanleycup_winner_cleaned_df["Team Name"].str.replace("Montreal Canadiens", "Montréal Canadiens")
stanleycup_winner_cleaned_df

In [None]:
# Get Team Name and associated with its ID in the NHL API JSON request

teamID_url = "https://statsapi.web.nhl.com/api/v1/teams"
   
response = requests.get(teamID_url)

response_json = response.json()
team_info = {}

for team in response_json["teams"]:
    team_id = team["id"]
    team_name = team["name"]
    team_info[team_id] = team_name
   
team_info_df = pd.DataFrame.from_dict(team_info, orient="index", columns=["Team Name"])
team_info_df.reset_index(inplace=True)
team_info_df.columns = ["Team ID", "Team Name"]

team_info_df

In [None]:
# Merge dataframes to associate the Winning Team and Season won with the Team ID

stanleycup_winning_merge_byname_ID = pd.DataFrame.merge(stanleycup_winner_cleaned_df,team_info_df, on='Team Name', how='left')
stanleycup_winning_merge_byname_ID

In [None]:
# Get Roster for each Winning team per Season.  
# Will use this code

player_list = []
base_url = "https://statsapi.web.nhl.com/api/v1/teams/{}?expand=team.roster&season={}"
for index, row in stanleycup_winning_merge_byname_ID.iterrows():
    team_ids = row ["Team ID"]
    # print(team_ids)
    year_cl= row ["Year"]
    # print(year_cl)
    url = base_url.format(team_ids, year_cl)
    # print(url)
    response = requests.get(url)
    data = response.json()
    
    for player in data["teams"][0]["roster"]["roster"]:
        
        person = player["person"]["fullName"]
        # Create dictionary
        player_id = player["person"]["id"]
        # Printing for testing purposes
        #print("Name:", person)
        #print("ID:", player_id)
        player_list.append({
            "Name":person,
            "Player ID":player_id,
            "Team ID": team_ids,
            "Season":year_cl
        })
winning_df=pd.DataFrame(player_list)
winning_df.tail(30)
     

In [19]:
   
# Player Profile - Winning Team per Season
# Code by Ron Brennan

player_info_url = "https://statsapi.web.nhl.com/api/v1/people/{}"
player_info = []
player_stats = []

for index, row in winning_df.iterrows():
    player_nhl_id = row["Player ID"]
    winning_season = row["Season"]
    url = player_info_url.format(player_nhl_id)
    response = requests.get(url)
    data = response.json()

    for players in data["people"]:
            player_id = players["id"]
            person = players["fullName"]
            player_firstName = players["firstName"]
            player_lastName = players["lastName"]
            player_birthDate = players["birthDate"]
            player_birthCity = players["birthCity"]
            player_birthCountry = players["birthCountry"]
            try:
                player_birthStateProvince = players["birthStateProvince"]
            except Exception as e:
                player_birthStateProvince = 'N/A'     
            player_nationality = players["nationality"]
            player_height = players["height"]
            player_weight = players["weight"]
            try:
                player_shootsCatches = players["shootsCatches"]
            except Exception as e:
                player_shootsCatches = 'L'     
            player_primaryPositionCode = players["primaryPosition"]["code"]

            player_info.append({
                "player_id":player_id,
                "player_name":person,
                "player_firstName":player_firstName,
                "player_lastName":player_lastName,
                "player_birthDate":player_birthDate,
                "player_birthCity":player_birthCity,
                "player_birthCountry":player_birthCountry,
                "player_birthStateProvince": player_birthStateProvince,
                "player_nationality":player_nationality,
                "player_height":player_height,
                "player_weight":player_weight,
                "player_shootsCatches":player_shootsCatches,
                "player_primaryPositionCode":player_primaryPositionCode,
                "season": int(winning_season  )
            })
            
player_information_df = pd.DataFrame(player_info)
player_information_df = player_information_df.drop_duplicates(subset='player_id', keep="first")
player_information_sorted_df = player_information_df.sort_values(by=['player_id'])

goalie_df = player_information_sorted_df.loc[player_information_sorted_df['player_primaryPositionCode'] == 'G']

player_information_sorted_df.drop(['season'], axis=1, inplace = True)
player_information_sorted_df.to_csv("./schema/player_information_utf-8.csv", index=False, encoding = 'utf-8-sig') 
player_information_sorted_df.to_csv("./schema/player_information.csv", index=False) 
player_information_sorted_df.head()


Unnamed: 0,player_id,player_name,player_firstName,player_lastName,player_birthDate,player_birthCity,player_birthCountry,player_birthStateProvince,player_nationality,player_height,player_weight,player_shootsCatches,player_primaryPositionCode
107,8444919,Tommy Albelin,Tommy,Albelin,1964-05-21,Stockholm,SWE,,SWE,"6' 2""",195,L,D
71,8444945,Glenn Anderson,Glenn,Anderson,1960-10-02,Vancouver,CAN,BC,CAN,"6' 1""",190,L,R
403,8445000,Dave Andreychuk,Dave,Andreychuk,1963-09-29,Hamilton,CAN,ON,CAN,"6' 4""",225,R,L
33,8445275,Tom Barrasso,Tom,Barrasso,1965-03-31,Boston,USA,MA,USA,"6' 3""",210,R,G
269,8445386,Ed Belfour,Ed,Belfour,1965-04-21,Carman,CAN,MB,CAN,"6' 0""",214,L,G


In [46]:
# Player Statistics - Winning Team (Skaters) per Season
# Code by Ron Brennan

player_stats_url = "https://statsapi.web.nhl.com/api/v1/people/{}/stats?stats=statsSingleSeason&season={}"
player_stats = []

winning_skaters_df = player_information_df.loc[player_information_df['player_primaryPositionCode'] != 'G']


for index, row in winning_skaters_df.iterrows():
     
        player_nhl_id = row["player_id"]
        season = row ["season"]
        player_birthyear_yyyy = row["player_birthDate"]
        win_season_yyyy = str(season)
        url = player_stats_url.format(player_nhl_id, season)
        response = requests.get(url)
        data_st = response.json()
        

        for player_st in data_st["stats"][0]["splits"]:
                player_season = player_st["season"]
                try:
                    player_timeOnIce = player_st["stat"]["timeOnIce"]
                except Exception as e:
                    player_timeOnIce = '-1'     
                player_assists = player_st["stat"]["assists"]
                player_goals = player_st["stat"]["goals"]
                player_pim = player_st["stat"]["pim"]
                player_shots= player_st["stat"]["shots"]
                player_games = player_st["stat"]["games"]
                try:
                    player_hits = player_st["stat"]["hits"]
                except Exception as e:
                    player_hits = '-1'
                player_powerPlayGoals = player_st["stat"]["powerPlayGoals"]
                player_powerPlayPoints = player_st["stat"]["powerPlayPoints"]
                try:
                    player_powerPlayTimeOnIce = player_st["stat"]["powerPlayTimeOnIce"]
                except Exception as e:
                    player_powerPlayTimeOnIce = '-1'
                try:
                    player_evenTimeOnIce = player_st["stat"]["evenTimeOnIce"]
                except Exception as e:
                    player_evenTimeOnIce = '-1'
                player_penaltyMinutes = player_st["stat"]["penaltyMinutes"]
                player_shotPct = player_st["stat"]["shotPct"]
                player_gameWinningGoals = player_st["stat"]["gameWinningGoals"]
                player_overTimeGoals = player_st["stat"]["overTimeGoals"]
                player_shortHandedGoals = player_st["stat"]["shortHandedGoals"]
                player_shortHandedPoints = player_st["stat"]["shortHandedPoints"]
                try:
                    player_blocked = player_st["stat"]["blocked"]
                except Exception as e:
                    player_blocked = '-1'
                player_plusMinus = player_st["stat"]["plusMinus"]
                player_points = player_st["stat"]["points"]
                try:
                    player_shifts = player_st["stat"]["shifts"]
                except Exception as e:
                    player_shifts = '-1'
                try:
                    player_timeOnIcePerGame = player_st["stat"]["timeOnIcePerGame"]
                except Exception as e:
                    player_timeOnIcePerGame = '-1'
                try:
                    player_evenTimeOnIcePerGame = player_st["stat"]["evenTimeOnIcePerGame"]
                except Exception as e:
                    player_evenTimeOnIcePerGame = '-1'
                try:
                    player_shortHandedTimeOnIcePerGame = player_st["stat"]["shortHandedTimeOnIcePerGame"]
                except Exception as e:
                    player_shortHandedTimeOnIcePerGame = '-1'
                try:
                    player_powerPlayTimeOnIcePerGame = player_st["stat"]["powerPlayTimeOnIcePerGame"]
                except Exception as e:
                    player_powerPlayTimeOnIcePerGame = '-1'

                player_season = win_season_yyyy[4:]
                player_birth_year = player_birthyear_yyyy[:4]
                winning_age = (int(player_season)) - (int(player_birth_year))
                
                player_stats.append({
                    "player_stats_id" : '', 
                    "player_id" : player_nhl_id, 
                    "season_id" : player_season,
                    "player_stats_age" : winning_age,
                    "player_stats_timeOnIce" : player_timeOnIce,
                    "player_stats_assists" : player_assists,
                    "player_stats_goals" : player_goals,
                    "player_stats_pim" : player_pim,
                    "player_stats_shots" : player_shots,
                    "player_stats_games" : player_games,
                    "player_stats_hits" : player_hits,
                    "player_stats_powerPlayGoals" : player_powerPlayGoals,
                    "player_stats_powerPlayPoints" : player_powerPlayPoints,
                    "player_stats_powerPlayTimeOnIce" : player_powerPlayTimeOnIce,
                    "player_stats_evenTimeOnIce" : player_evenTimeOnIce,
                    "player_stats_penaltyMinutes" : player_penaltyMinutes,
                    "player_stats_shotPct" : player_shotPct,
                    "player_stats_gameWinningGoals" : player_gameWinningGoals,
                    "player_stats_overTimeGoals" : player_overTimeGoals,
                    "player_stats_shortHandedGoals" : player_shortHandedGoals,
                    "player_stats_shortHandedPoints" : player_shortHandedPoints,
                    "player_stats_blocked" : player_blocked,
                    "player_stats_plusMinus" : player_plusMinus,
                    "player_stats_points" : player_points,
                    "player_stats_shifts" : player_shifts,
                    "player_stats_timeOnIcePerGame" : player_timeOnIcePerGame,
                    "player_stats_evenTimeOnIcePerGame" : player_evenTimeOnIcePerGame,
                    "player_stats_shortHandedTimeOnIcePerGame" : player_shortHandedTimeOnIcePerGame,
                    "player_stats_powerPlayTimeOnIcePerGame" : player_powerPlayTimeOnIcePerGame  
        })               
      
player_statistics = pd.DataFrame(player_stats)
player_statistics.to_csv("./schema/player_statistics_utf-8.csv", index=False, encoding = 'utf-8-sig')
player_statistics.to_csv("./schema/player_statistics.csv", index=False)
player_statistics.head()
      

Unnamed: 0,player_stats_id,player_id,season_id,player_stats_age,player_stats_timeOnIce,player_stats_assists,player_stats_goals,player_stats_pim,player_stats_shots,player_stats_games,...,player_stats_shortHandedGoals,player_stats_shortHandedPoints,player_stats_blocked,player_stats_plusMinus,player_stats_points,player_stats_shifts,player_stats_timeOnIcePerGame,player_stats_evenTimeOnIcePerGame,player_stats_shortHandedTimeOnIcePerGame,player_stats_powerPlayTimeOnIcePerGame
0,,8445629,1992,30,-1,16,10,58,51,58,...,1,3,-1,-6,26,-1,-1,-1,-1,-1
1,,8446013,1992,32,-1,0,0,175,16,50,...,0,0,-1,-6,0,-1,-1,-1,-1,-1
2,,8446050,1992,26,-1,4,0,111,26,43,...,0,0,-1,-12,4,-1,-1,-1,-1,-1
3,,8446117,1992,31,-1,58,11,87,232,64,...,0,2,-1,1,69,-1,-1,-1,-1,-1
4,,8446305,1992,24,-1,0,0,0,0,2,...,0,0,-1,0,0,-1,-1,-1,-1,-1


In [44]:
# Player Statistics - Winning Team (Goalies) per Season
# Code by Ron Brennan

player_stats_url = "https://statsapi.web.nhl.com/api/v1/people/{}/stats?stats=statsSingleSeason&season={}"
goalie_stats = []

for index, row in goalie_df.iterrows():
    player_nhl_id = row["player_id"]
    season = row ["season"]
    player_birthyear_yyyy = row["player_birthDate"]
    win_season_yyyy = str(season)
    url = player_stats_url.format(player_nhl_id, season)
    response = requests.get(url)
    data_st = response.json()

    for goalie_st in data_st["stats"][0]["splits"]:
           
            player_season = goalie_st["season"]
            goalie_timeOnIce = goalie_st["stat"]["timeOnIce"]
            try:
                goalie_ot = goalie_st["stat"]["ot"]
            except Exception as e:
                 goalie_ot = '-1'
            goalie_shutouts = goalie_st["stat"]["shutouts"]
            try:
                goalie_ties = goalie_st["stat"]["ties"]
            except Exception as e:
                 goalie_ties = '-1'         
            goalie_wins = goalie_st["stat"]["wins"]
            goalie_losses = goalie_st["stat"]["losses"]
            goalie_saves = goalie_st["stat"]["saves"]
            try:
                goalie_powerPlaySaves = goalie_st["stat"]["powerPlaySaves"]
            except Exception as e:
                goalie_powerPlaySaves = '-1'
            try:
                goalie_shortHandedSaves = goalie_st["stat"]["shortHandedSaves"]
            except Exception as e:
                goalie_shortHandedSaves = '-1'
            try:
                goalie_evenSaves = goalie_st["stat"]["evenSaves"]
            except Exception as e:
                goalie_evenSaves = '-1'
            try:
                goalie_shortHandedShots = goalie_st["stat"]["shortHandedShots"]
            except Exception as e:
                goalie_shortHandedShots = '-1'
            try:
                goalie_evenShots = goalie_st["stat"]["evenShots"]
            except Exception as e:
                goalie_evenShots = '-1'
            try:
                goalie_powerPlayShots = goalie_st["stat"]["powerPlayShots"]
            except Exception as e:
                goalie_powerPlayShots = '-1'
            try:
                goalie_savePercentage = goalie_st["stat"]["savePercentage"]
            except Exception as e:
                goalie_savePercentage = '-1'
            try:
                goalie_goalAgainstAverage = goalie_st["stat"]["goalAgainstAverage"]
            except Exception as e:
                goalie_goalAgainstAverage = '-1'
            goalie_games = goalie_st["stat"]["games"]
            goalie_gamesStarted = goalie_st["stat"]["gamesStarted"]
            goalie_shotsAgainst = goalie_st["stat"]["shotsAgainst"]
            goalie_goalsAgainst = goalie_st["stat"]["goalsAgainst"]
            try:
                goalie_timeOnIcePerGame = goalie_st["stat"]["timeOnIcePerGame"]
            except Exception as e:
                goalie_timeOnIcePerGame = '-1'          
            try:
                goalie_powerPlaySavePercentage = goalie_st["stat"]["powerPlaySavePercentage"]
            except Exception as e:
                goalie_powerPlaySavePercentage = '-1'
            try:
                goalie_shortHandedSavePercentage = goalie_st["stat"]["shortHandedSavePercentage"]
            except Exception as e:
                goalie_shortHandedSavePercentage = '-1'
            try:
                goalie_evenStrengthSavePercentage = goalie_st["stat"]["evenStrengthSavePercentage"]
            except Exception as e:
                goalie_evenStrengthSavePercentage = '-1'


            player_season = win_season_yyyy[4:]
            player_birth_year = player_birthyear_yyyy[:4]
            winning_age = (int(player_season)) - (int(player_birth_year))
            goalie_stats.append({
                "goalie_stats_id" : '',
                "player_id" : player_nhl_id, 
                "goalie_stats_season" : player_season,
                "goalie_stats_age" : winning_age,
                "goalie_stats_timeOnIce" : goalie_timeOnIce,
                "goalie_stats_goalie_ot" : goalie_ot,
                "goalie_stats_goalie_shutouts" : goalie_shutouts,
                "goalie_stats_goalie_ties" : goalie_ties,
                "goalie_stats_wins" : goalie_wins,
                "goalie_stats_goalie_losses" : goalie_losses,
                "goalie_stats_saves" : goalie_saves,               
                "goalie_stats_powerPlayGoals" : goalie_powerPlaySaves,
                "goalie_stats_powerPlayPoints" : goalie_shortHandedSaves,
                "goalie_stats_powerPlayTimeOnIce" : goalie_evenSaves,
                "goalie_stats_evenTimeOnIce" : goalie_shortHandedShots,
                "goalie_stats_penaltyMinutes" : goalie_evenShots,
                "goalie_stats_shotPct" : goalie_powerPlayShots,
                "goalie_stats_gameWinningGoals" : goalie_savePercentage,
                "goalie_stats_overTimeGoals" : goalie_goalAgainstAverage,
                "goalie_stats_shortHandedGoals" : goalie_games,
                "goalie_stats_primaryPositionCode" : goalie_gamesStarted,
                "goalie_stats_shortHandedPoints" : goalie_shotsAgainst,
                "goalie_stats_blocked" : goalie_goalsAgainst,
                "goalie_stats_plusMinus" : goalie_timeOnIcePerGame,
                "goalie_stats_points" : goalie_powerPlaySavePercentage,
                "goalie_stats_shortHandedSavePercentage" : goalie_powerPlaySavePercentage,
                "goalie_stats_timeOnIcePerGame" :goalie_shortHandedSavePercentage,
                "goalie_stats_evenTimeOnIcePerGame" : goalie_shortHandedSavePercentage,
                "goalie_stats_shortHandedTimeOnIcePerGame" : goalie_evenStrengthSavePercentage,

           })
           
goalie_statistics = pd.DataFrame(goalie_stats)
goalie_statistics.to_csv("./schema/goalie_statistics_utf-8.csv", index=False, encoding = 'utf-8-sig')
goalie_statistics.to_csv("./schema/goalie_statistics.csv", index=False)
goalie_statistics.head()
      

Unnamed: 0,goalie_stats_id,player_id,goalie_stats_season,goalie_stats_age,goalie_stats_timeOnIce,goalie_stats_goalie_ot,goalie_stats_goalie_shutouts,goalie_stats_goalie_ties,goalie_stats_wins,goalie_stats_goalie_losses,...,goalie_stats_shortHandedGoals,goalie_stats_primaryPositionCode,goalie_stats_shortHandedPoints,goalie_stats_blocked,goalie_stats_plusMinus,goalie_stats_points,goalie_stats_shortHandedSavePercentage,goalie_stats_timeOnIcePerGame,goalie_stats_evenTimeOnIcePerGame,goalie_stats_shortHandedTimeOnIcePerGame
0,,8445275,1992,27,3329:29,-1,1,9,25,22,...,57,57,1702,196,58:24,-1.0,-1.0,-1.0,-1.0,-1.0
1,,8445386,1999,34,3535:49,-1,5,9,35,15,...,61,59,1373,117,57:57,86.666667,86.666667,95.3125,95.3125,92.402659
2,,8446134,1993,25,40:00,-1,0,0,0,0,...,1,1,19,1,40:00,-1.0,-1.0,-1.0,-1.0,-1.0
3,,8446831,1996,26,2106:38,-1,1,7,22,6,...,37,35,1012,103,56:56,-1.0,-1.0,-1.0,-1.0,-1.0
4,,8447687,2002,37,3871:40,-1,5,8,41,15,...,65,64,1654,140,59:33,87.767584,87.767584,91.935484,91.935484,92.490119


In [None]:
# Player Profile - Losing Team per Season
# base_url = "https://statsapi.web.nhl.com/api/v1/people/{}"
# player_ages = []
# player_nationalities = []
# for index, row in losing_df.iterrows():
#     player_id = row["Player ID"]
#     url = base_url.format(player_id)
#     response = requests.get(url)
#     data = response.json()

#     player_age = data['people'][0]['birthDate']
    
#     player_nationality = data['people'][0]['nationality']
#     player_birthyear_yyyy = player_age[:4]
#     losing_season_yyyy = row["Season"][4:]

#     losing_age = (int(losing_season_yyyy)) - (int(player_birthyear_yyyy))
    
#     player_ages.append(losing_age)
#     player_nationalities.append(player_nationality)

In [None]:
# losing_df["Age"] = player_ages
# losing_df["Nationality"] = player_nationalities
# losing_df.tail()

In [None]:
# Get averave age per season of winning seasons

# for row in winning_df:
#     avg_player_age = winning_df. groupby(by='Season')['Age'].mean().round()
# avg_player_age_winning_df = pd.DataFrame(avg_player_age)
# avg_player_age_winning_df

In [None]:
# Get averave age per season of losing seasons
# for row in losing_df:
#     avg_player_age = losing_df. groupby(by='Season')['Age'].mean().round()
    
# avg_player_age_losing_df = pd.DataFrame(avg_player_age)
# avg_player_age_losing_df

In [None]:
# Nationality Count from 19911992 season

# player_nat_1992 = winning_df.loc[winning_df['Season'] == "19911992"]["Nationality"]
# player_nat_1992.value_counts()


In [None]:
# Nationality Count from 20212022 season

# player_nat_2022 = winning_df.loc[winning_df['Season'] == "20222023"]["Nationality"]
# player_nat_2022.value_counts()


In [None]:
# Building Penalty In Minutes (PIM) for Winning DF

base_url = "https://statsapi.web.nhl.com/api/v1/people/{}/stats?stats=statsSingleSeason&season={}" 
pim=[]

for index, row in winning_df.iterrows():
    player_id = row["Player ID"]
    year = row["Season"]
    url = base_url.format(player_id,year)
    response = requests.get(url)
    data = response.json()
    try:
        pim_data = data["stats"][0]["splits"][0]['stat']['pim']
        print(pim_data)
        pim.append(pim_data)
        
    except:
        print("not found")
        pim.append(0)    
        pass

In [None]:
# Building Penalty In Minutes (PIM) for Losing DF

Lpim=[]

for index, row in losing_df.iterrows():
    player_id = row["Player ID"]
    year = row["Season"]
    url = base_url.format(player_id,year)
    response = requests.get(url)
    data = response.json()
    try:
        pim_data = data["stats"][0]["splits"][0]['stat']['pim']
        print(pim_data)
        Lpim.append(pim_data)
        
    except:
        print("not found")
        Lpim.append(0)    
        pass

In [None]:
# Building Power Play Goals for Winning DF

base_url = "https://statsapi.web.nhl.com/api/v1/people/{}/stats?stats=statsSingleSeason&season={}" 
power_play_goal=[]

for index, row in winning_df.iterrows():
    player_id = row["Player ID"]
    year = row["Season"]
    url = base_url.format(player_id,year)
    response = requests.get(url)
    data = response.json()
    try:
        ppg_data = data["stats"][0]["splits"][0]['stat']['powerPlayGoals']
        print(ppg_data)
        power_play_goal.append(ppg_data)
        
    except:
        print("not found")
        power_play_goal.append(0)    
        pass

In [None]:
# Building Power Play Goals for Losing DF

Lpower_play_goal=[]

for index, row in losing_df.iterrows():
    player_id = row["Player ID"]
    year = row["Season"]
    url = base_url.format(player_id,year)
    response = requests.get(url)
    data = response.json()
    try:
        ppg_data = data["stats"][0]["splits"][0]['stat']['powerPlayGoals']
        print(ppg_data)
        Lpower_play_goal.append(ppg_data)
        
    except:
        print("not found")
        Lpower_play_goal.append(0)    
        pass

In [None]:
# Building Over Time Goal for Winning DF

base_url = "https://statsapi.web.nhl.com/api/v1/people/{}/stats?stats=statsSingleSeason&season={}" 
over_time_goal=[]

for index, row in winning_df.iterrows():
    player_id = row["Player ID"]
    year = row["Season"]
    url = base_url.format(player_id,year)
    response = requests.get(url)
    data = response.json()
    try:
        otg_data = data["stats"][0]["splits"][0]['stat']['overTimeGoals']
        print(otg_data)
        over_time_goal.append(otg_data)
        
    except:
        print("not found")
        over_time_goal.append(0)    
        pass

In [None]:
# Building Power Play Goals for Losing DF

Lover_time_goal=[]

for index, row in losing_df.iterrows():
    player_id = row["Player ID"]
    year = row["Season"]
    url = base_url.format(player_id,year)
    response = requests.get(url)
    data = response.json()
    try:
        otg_data = data["stats"][0]["splits"][0]['stat']['overTimeGoals']
        print(otg_data)
        Lover_time_goal.append(otg_data)
        
    except:
        print("not found")
        Lover_time_goal.append(0)    
        pass

In [None]:
# Building Plus Minus (+/-) for Winning DF

base_url = "https://statsapi.web.nhl.com/api/v1/people/{}/stats?stats=statsSingleSeason&season={}" 
plus_mins=[]

for index, row in winning_df.iterrows():
    player_id = row["Player ID"]
    year = row["Season"]
    url = base_url.format(player_id,year)
    response = requests.get(url)
    data = response.json()
    try:
        pm_data = data["stats"][0]["splits"][0]['stat']['plusMinus']
        print(pm_data)
        plus_mins.append(pm_data)
        
    except:
        print("not found")
        plus_mins.append(0)    
        pass

In [None]:
# Building Plus Minus (+/-) for Losing DF

Lplus_mins=[]

for index, row in losing_df.iterrows():
    player_id = row["Player ID"]
    year = row["Season"]
    url = base_url.format(player_id,year)
    response = requests.get(url)
    data = response.json()
    try:
        pm_data = data["stats"][0]["splits"][0]['stat']['plusMinus']
        print(pm_data)
        Lplus_mins.append(pm_data)
        
    except:
        print("not found")
        Lplus_mins.append(0)    
        pass

In [None]:
# Building Points for Winning DF

base_url = "https://statsapi.web.nhl.com/api/v1/people/{}/stats?stats=statsSingleSeason&season={}" 
points=[]

for index, row in winning_df.iterrows():
    player_id = row["Player ID"]
    year = row["Season"]
    url = base_url.format(player_id,year)
    response = requests.get(url)
    data = response.json()
    try:
        points_data = data["stats"][0]["splits"][0]['stat']['points']
        print(points_data)
        points.append(points_data)
        
    except:
        print("not found")
        points.append(0)    
        pass

In [None]:
# Building Points for Losing DF

Lpoints=[]

for index, row in losing_df.iterrows():
    player_id = row["Player ID"]
    year = row["Season"]
    url = base_url.format(player_id,year)
    response = requests.get(url)
    data = response.json()
    try:
        points_data = data["stats"][0]["splits"][0]['stat']['points']
        print(points_data)
        Lpoints.append(points_data)
        
    except:
        print("not found")
        Lpoints.append(0)    
        pass

In [None]:
winning_df['PIM'] = pim 
winning_df['PPG'] = power_play_goal
winning_df['OTG'] = over_time_goal
winning_df['+/-'] = plus_mins
winning_df['Pts'] = points
winning_df.head()

In [None]:
losing_df['PIM'] = Lpim 
losing_df['PPG'] = Lpower_play_goal
losing_df['OTG'] = Lover_time_goal
losing_df['+/-'] = Lplus_mins
losing_df['Pts'] = Lpoints
losing_df.head()

In [None]:
# Get averave PIM of winning team over 29 seasons

for row in winning_df:
    avg_player_pim = winning_df. groupby(by='Season')['PIM'].mean().round()
avg_player_pim_winning_df = pd.DataFrame(avg_player_pim)
avg_player_pim_winning_df.mean()

In [None]:
# Get averave PIM of losing team over 29 seasons

for row in losing_df:
    avg_player_pim = losing_df. groupby(by='Season')['PIM'].mean().round()
avg_player_pim_losing_df = pd.DataFrame(avg_player_pim)
avg_player_pim_losing_df.mean()

In [None]:
# Get averave OT goals of winning team over 29 seasons

for row in winning_df:
    avg_player_otg = winning_df. groupby(by='Season')['OTG'].mean()
avg_player_otg_winning_df = pd.DataFrame(avg_player_otg)
avg_player_otg_winning_df.mean()

In [None]:
# Get averave OT goals of losing team over 29 seasons

for row in losing_df:
    avg_player_otg = losing_df. groupby(by='Season')['OTG'].mean()
avg_player_otg_losing_df = pd.DataFrame(avg_player_otg)
avg_player_otg_losing_df.mean()

In [None]:
# Get averave PP goals of winning team over 29 seasons

for row in winning_df:
    avg_player_ppg = winning_df. groupby(by='Season')['PPG'].mean().round()
avg_player_ppg_winning_df = pd.DataFrame(avg_player_ppg)
avg_player_ppg_winning_df.mean()

In [None]:
# Get averave PP goals of losing team over 29 seasons

for row in losing_df:
    avg_player_ppg = losing_df. groupby(by='Season')['PPG'].mean().round()
avg_player_ppg_losing_df = pd.DataFrame(avg_player_ppg)
avg_player_ppg_losing_df.mean()

In [None]:
# Get averave points of winning team over 29 seasons

for row in winning_df:
    avg_player_pts = winning_df. groupby(by='Season')['Pts'].mean().round()
avg_player_pts_winning_df = pd.DataFrame(avg_player_pts)
avg_player_pts_winning_df.mean()

In [None]:
# Get averave points of losing team over 29 seasons

for row in losing_df:
    avg_player_pts = losing_df. groupby(by='Season')['Pts'].mean().round()
avg_player_pts_losing_df = pd.DataFrame(avg_player_pts)
avg_player_pts_losing_df.mean()

In [None]:
# Get averave +/- of winning team over 29 seasons

for row in winning_df:
    avg_player_plusminus = winning_df. groupby(by='Season')['+/-'].mean().round()
avg_player_plusminus_winning_df = pd.DataFrame(avg_player_plusminus)
avg_player_plusminus_winning_df.mean()

In [None]:
# Get averave +/- of losing team over 29 seasons

for row in losing_df:
    avg_player_plusminus = losing_df. groupby(by='Season')['+/-'].mean().round()
avg_player_plusminus_losing_df = pd.DataFrame(avg_player_plusminus)
avg_player_plusminus_losing_df.mean()

In [None]:
# Pie chart, then and now comparison of nationality breakdown for the 1992 and 2022 winning teams
# Labels for the sections of our pie chart


# labels = ["Europe", "Canada", 'USA']

# # The values of each section of the pie chart.  Player total count value for each continent
# sizes = [6, 21, 10]

# # The colors of each section of the pie chart
# colors = ["yellow", "red", 'lightblue']

# # Tells matplotlib to separate the "Humans" section from the others
# explode = (0.1, 0, 0)

# # Labels for current the sections of our pie chart
# labels2 = ["Europe", "Canada", 'USA']

# # The values of each section of the pie chart.  Player total count value for each continent
# sizes2 = [12, 17, 10]

# # The colors of each section of the pie chart
# colors2 = ["yellow", "red", "lightblue"]

# # Tells matplotlib to separate the "Humans" section from the others
# explode2 = (0.1, 0, 0)

# # Create a figure with two subplots arranged side by side
# fig, (ax1, ax2) = plt.subplots(1, 2)

# # Plot the first pie chart in the left subplot
# ax1.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90)
# ax1.set_title("1992 player distribution")

# # Plot the second pie chart in the right subplot
# ax2.pie(sizes2, explode=explode2, labels=labels2, colors=colors2, autopct='%1.1f%%', startangle=90)
# ax2.set_title("2022 player distribution")

# # Adjust the spacing between subplots
# fig.tight_layout()

# # Save the figure
# plt.savefig("output/Fig1.png")

# # Display the figure with the two pie charts
# plt.show()

# print(f"There is a noticeable increase in European players when comparing the most recent winning team's roster vs that of 1991-1992. Canadian representation has had a significant reduction, and the U.S. remains constant, but most recently had the lowest representation.")

In [None]:
# line graph with trendline to identify geographical shifts in player's nationality/hockey program per country


# nationalities = ['USA', 'Canada', 'Europe']
# decades = [2002, 2012, 2022]
# data_points = {
#     'USA': [73, 67, 88],
#     'Canada': [211, 162, 163],
#     'Europe': [86, 65, 91],  
# }

# # Define colors for the lines and trendlines
# line_colors = ['blue', 'red', 'green']
# trendline_colors = ['darkblue', 'darkred', 'darkgreen']

# # Create line graph
# for i, nationality in enumerate(nationalities):
#     plt.plot(decades, data_points[nationality], label=nationality, color=line_colors[i])

# # Add trendline (linear regression)
# for i, nationality in enumerate(nationalities):
#     z = np.polyfit(decades, data_points[nationality], 1)
#     p = np.poly1d(z)
#     plt.plot(decades, p(decades), '--', color=trendline_colors[i])

# # Set labels and title
# plt.xlabel('Decades')
# plt.ylabel('Number of Players')
# plt.title('Winning Team Player Nationality Breakdown Trend')

# # Set x-axis tick positions and labels
# plt.xticks(decades, ['2002', '2012', '2022'])

# # Remove decimals from y-axis
# plt.gca().yaxis.set_major_formatter(plt.FuncFormatter(lambda value, tick_number: f'{int(value)}'))

# # Add legend
# plt.legend()

# # Save the figure
# plt.savefig("output/Fig2.png")

# # Display the plot
# plt.show()

# print("The Canadian player reduction in representation was most significantly impacted in the decade of 2003-2012 and has remained consistent since.")


In [None]:
# plus minus win

base_url = "https://statsapi.web.nhl.com/api/v1/people/{}/stats?stats=statsSingleSeason&season={}"
plus_mins_win = []

for index, row in winning_df.iterrows():
    player_id = row["Player ID"]
    year = row["Season"]
    url = base_url.format(player_id, year)
    response = requests.get(url)
    data = response.json()
    
    try:
        pm_data_win = data["stats"][0]["splits"][0]['stat']['plusMinus']
        #print(pm_data_win)
        plus_mins_win.append(pm_data_win)  # Store each value in a separate list
        
    except:
        #print("not found")
        plus_mins_win.append(0)  # Store 0 for missing values
        
overall_mean_win = sum(plus_mins_win) / len(plus_mins_win)
overall_mean_win

In [None]:
# plus minus lose

base_url = "https://statsapi.web.nhl.com/api/v1/people/{}/stats?stats=statsSingleSeason&season={}"
plus_mins_lose = []

for index, row in losing_df.iterrows():
    player_id = row["Player ID"]
    year = row["Season"]
    url = base_url.format(player_id, year)
    response = requests.get(url)
    data = response.json()
    
    try:
        pm_data_lose = data["stats"][0]["splits"][0]['stat']['plusMinus']
        #print(pm_data_lose)
        plus_mins_lose.append(pm_data_lose)  # Store each value in a separate list
        
    except:
        #print("not found")
        plus_mins_lose.append(0)  # Store 0 for missing values
        
overall_mean_lose = sum(plus_mins_lose) / len(plus_mins_lose)
overall_mean_lose


In [None]:

# correlation_co = st.pearsonr(winning_df["PIM"],winning_df["+/-"])
# #Print the correaltion anser

# #Calculate linear regression
# (slope, intercept,rvalue, pvalue, stderr)=st.linregress(winning_df["PIM"],winning_df["+/-"])
# regress_values=winning_df["PIM"]* slope + intercept
# line_eq= f"y = {round(slope, 2)} x + {round(intercept, 2)}"
# plt.scatter(winning_df["PIM"],winning_df["+/-"],color='tab:blue')
# plt.plot(winning_df["PIM"], regress_values, color='red')
# plt.annotate(line_eq,(160,-35), fontsize=15, color = "blue")
# plt.xlabel("PIM")
# plt.ylabel("+/-")
# plt.title("PIM vs +/-")
# # Save the figure
# plt.savefig("output/Fig3.png")
# plt.show()
# print(f"The correlation between PIM and +/- is {round(correlation_co[0],2)}.  Except for outliers, the lower a player's PIM, the more likely of a more positive +/- result.")


In [None]:
# ppg_vs_pts = plt.scatter(winning_df["PPG"],winning_df["Pts"],edgecolors='black',marker='o',alpha=0.75,s=30)

# correlation_co = st.pearsonr(winning_df["PPG"],winning_df["Pts"])

# #Calculate linear regression
# (slope, intercept,rvalue, pvalue, stderr)=st.linregress(winning_df["PPG"],winning_df["Pts"])
# regress_values=winning_df["PPG"]* slope + intercept
# line_eq= f"y = {round(slope, 2)} x + {round(intercept, 2)}"
# plt.scatter(winning_df["PPG"],winning_df["Pts"],color='tab:blue')
# plt.plot(winning_df["PPG"], regress_values, color='red')
# plt.annotate(line_eq,(15,0), fontsize=15, color = "blue")
# plt.xlabel("PPG")
# plt.ylabel("Pts")
# plt.title("PPG vs Pts")
# # Save the figure
# plt.savefig("output/Fig4.png")
# plt.show()
# #Print the correlation answer
# print(f"The correlation between PPG vs Pts is {round(correlation_co[0],2)}. This graph clearly depicts the correlation between scoring PPG and resulting in an overall increase in points")


In [None]:
#Scatter plot for losing team +/-
# Remove gridlines

# plt.grid(False)

# # Scatter plot for losing team
# plt.scatter(range(len(plus_mins_lose)), plus_mins_lose, color='red', label='Losing team')

# # Calculate correlation coefficient and p-value
# corr, p_value = pearsonr(range(len(plus_mins_lose)), plus_mins_lose)

# # Add correlation and p-value to the plot
# plt.text(0.95, 0.05, f"Correlation: {corr:.2f}\nP-value: {p_value:.2e}", transform=plt.gca().transAxes,
#          ha='right', va='bottom', bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))

# # Set plot title and labels
# plt.title('Plus/Minus Data')
# plt.xlabel('Index')
# plt.ylabel('Plus/Minus')

# # Display legend
# plt.legend()

# # Save the figure
# plt.savefig("output/Fig5.png")
# # Show the plot
# plt.show()



In [None]:
# scatter plot for winning team +/-
# Remove gridlines

# plt.grid(False)

# # Scatter plot for losing team
# plt.scatter(range(len(plus_mins_win)), plus_mins_win, color='blue', label='Winning team')

# # Calculate correlation coefficient and p-value
# corr, p_value = pearsonr(range(len(plus_mins_win)), plus_mins_win)

# # Add correlation and p-value to the plot
# plt.text(0.95, 0.05, f"Correlation: {corr:.2f}\nP-value: {p_value:.2e}", transform=plt.gca().transAxes,
#          ha='right', va='bottom', bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))

# # Set plot title and labels
# plt.title('Plus/Minus Data')
# plt.xlabel('Index')
# plt.ylabel('Plus/Minus')

# # Display legend
# plt.legend()

# # Save the figure
# plt.savefig("output/Fig6.png")

# # Show the plot
# plt.show()




In [None]:
# scatter plot comparison, winning vs losing with mannwhitneyu pvalue calculation
# Remove gridlines

# plt.grid(False)

# # Scatter plot for losing team
# plt.scatter(range(len(plus_mins_lose)), plus_mins_lose, color='red', label='Losing team')

# # Scatter plot for winning team
# plt.scatter(range(len(plus_mins_win)), plus_mins_win, color='blue', label='Winning team')

# # Calculate correlation coefficient and p-value
# corr, p_value = pearsonr(range(len(plus_mins_lose)), plus_mins_lose)

# # Calculate Mann-Whitney U test for p-value
# mwu, mwu_pvalue = mannwhitneyu(plus_mins_win, plus_mins_lose, alternative='two-sided')

# # Add correlation and p-value to the plot
# text_box_props = dict(facecolor='white', edgecolor='black', boxstyle='round')
# plt.text(0.95, 0.05, f"Correlation: {corr:.2f}\nMann-Whitney U p-value: {mwu_pvalue:.2e}", transform=plt.gca().transAxes,
#          ha='right', va='bottom', bbox=text_box_props)

# # Set plot title and labels
# plt.title('Plus/Minus Data')
# plt.xlabel('Index')
# plt.ylabel('Plus/Minus')

# # Create a legend with a box
# legend_box_props = dict(facecolor='white', edgecolor='black', boxstyle='round')
# plt.legend(prop={'size': 12}, loc='upper right', bbox_to_anchor=(1.0, 1.0), borderaxespad=0., framealpha=1.0, frameon=True, handlelength=1, handletextpad=0.5, labelspacing=0.5, borderpad=0.5, fancybox=True)

# # Add box around the legend
# legend_box = plt.gca().get_legend()
# legend_box.set_frame_on(True)
# legend_box.get_frame().set_edgecolor('black')
# legend_box.get_frame().set_facecolor('white')
# legend_box.get_frame().set_linestyle('-')

# # Show the plot
# plt.tight_layout()
# # Save the figure
# plt.savefig("output/Fig7.png")
# plt.show()

# print(f"Of the 2000+ entries, +2 to -2 is very dense.  The winning team (blue) has a slightly above 0 stonger representation with an average +/- of slightly above 4 and the losing team at 2.  When compare the two sets of values, the p-value supports that there is a very strong correlation between a more positive +/- and winning.")

In [None]:
#box plot for winning team +/-
#Create the figure and axes

# fig, ax = plt.subplots(figsize=(20, 15))

# # Create the boxplot for the winning team with blue fill color
# boxplot = ax.boxplot(plus_mins_win, labels=["Winning"], patch_artist=True)
# colors = ['blue']
# for patch, color in zip(boxplot['boxes'], colors):
#     patch.set_facecolor(color)

# # Set the color and opacity of the outliers to blue
# for flier in boxplot['fliers']:
#     flier.set(marker='o', color='blue', alpha=1.0)

# # Set the mean line color to orange and make it bold
# for median in boxplot['medians']:
#     median.set(color='orange', linewidth=2)

# # Calculate correlation coefficient and p-value
# corr, p_value = pearsonr(range(len(plus_mins_win)), plus_mins_win)

# # Add correlation and p-value to the plot with larger font size
# ax.text(0.95, 0.05, f"Correlation: {corr:.2f}\nP-value: {p_value:.2e}", transform=ax.transAxes,
#         ha='right', va='bottom', fontsize=20)

# # Set the plot title and labels with larger font size
# ax.set_title("Plus/Minus Boxplot (Winning Team)", fontsize=30)
# ax.set_xlabel("Team", fontsize=30)
# ax.set_ylabel("Plus/Minus", fontsize=30)

# # Set the font size of the tick labels
# ax.tick_params(axis='both', labelsize=30)

# # Save the figure
# plt.savefig("output/Fig8.png")
# # Display the plot
# plt.show()




In [None]:
#boxplot for losing team +/-
# Create the figure and axes

# fig, ax = plt.subplots(figsize=(20, 15))

# # Create the boxplot for the winning team with blue fill color
# boxplot = ax.boxplot(plus_mins_lose, labels=["Losing"], patch_artist=True)
# colors = ['red']
# for patch, color in zip(boxplot['boxes'], colors):
#     patch.set_facecolor(color)

# # Set the color and opacity of the outliers to blue
# for flier in boxplot['fliers']:
#     flier.set(marker='o', color='orange', alpha=1.0)

# # Set the mean line color to orange and make it bold
# for median in boxplot['medians']:
#     median.set(color='black', linewidth=2)

# # Calculate correlation coefficient and p-value
# corr, p_value = pearsonr(range(len(plus_mins_lose)), plus_mins_lose)

# # Add correlation and p-value to the plot with larger font size
# ax.text(0.95, 0.05, f"Correlation: {corr:.2f}\nP-value: {p_value:.2e}", transform=ax.transAxes,
#         ha='right', va='bottom', fontsize=20)

# # Set the plot title and labels with larger font size
# ax.set_title("Plus/Minus Boxplot (Losing Team)", fontsize=30)
# ax.set_xlabel("Team", fontsize=30)
# ax.set_ylabel("Plus/Minus", fontsize=30)

# # Set the font size of the tick labels
# ax.tick_params(axis='both', labelsize=30)
# # Save the figure
# plt.savefig("output/Fig9.png")
# # Display the plot
# plt.show()


In [None]:
# boxplot for winning vs losing team +/-, using mannwhitneyu calc for pvalue
# Create the figure and axes

# fig, ax = plt.subplots(figsize=(20, 15))

# # Create the boxplot for the winning team with blue fill color
# boxplot1 = ax.boxplot(plus_mins_win, positions=[1], labels=["Winning"], patch_artist=True)
# colors1 = ['blue']
# for patch, color in zip(boxplot1['boxes'], colors1):
#     patch.set_facecolor(color)

# # Set the color and opacity of the outliers to blue
# for flier in boxplot1['fliers']:
#     flier.set(marker='o', color='blue', alpha=1.0)

# # Set the mean line color to orange and make it bold
# for median in boxplot1['medians']:
#     median.set(color='orange', linewidth=2)

# # Create the boxplot for the losing team with orange fill color
# boxplot2 = ax.boxplot(plus_mins_lose, positions=[2], labels=["Losing"], patch_artist=True)
# colors2 = ['red']
# for patch, color in zip(boxplot2['boxes'], colors2):
#     patch.set_facecolor(color)

# # Set the color and opacity of the outliers to orange
# for flier in boxplot2['fliers']:
#     flier.set(marker='o', color='orange', alpha=1.0)

# # Set the mean line color to black and make it bold
# for median in boxplot2['medians']:
#     median.set(color='black', linewidth=2)

# # Calculate correlation coefficient and p-value using Mann-Whitney U test
# statistic, p_value = mannwhitneyu(plus_mins_win, plus_mins_lose, alternative='two-sided')

# # Add correlation and p-value to the plot with larger font size
# ax.text(0.95, 0.05, f"Mann-Whitney U test:\nCorrelation: N/A\nP-value: {p_value:.2e}",
#         transform=ax.transAxes, ha='right', va='bottom', fontsize=20)

# # Set the plot title and labels with larger font size
# ax.set_title("Plus/Minus Boxplots", fontsize=30)
# ax.set_xlabel("Team", fontsize=30)
# ax.set_ylabel("Plus/Minus", fontsize=30)

# # Set the font size of the tick labels
# ax.tick_params(axis='both', labelsize=30)

# # Set the x-axis limits
# ax.set_xlim(0.5, 2.5)
# # Save the figure
# plt.savefig("output/Fig10.png")
# # Display the plot
# plt.show()


# print(f"The results of the box plots depicts how the winning team has far more consistency amongst it's players.  This is also supported by having less outliers then the losing team at each extremety.")

In [None]:
# Comparison bar graph for defined stats, winning vs losing team

# categories = ['Age','+/-', 'PIM', 'OT Goals', 'PP Goals', 'Points']
# winning_stats = [28.1, 4.5, 34.36, 0.14, 2.0, 21.9]
# losing_stats = [27.8, 2.1, 35.03, 0.13, 2.1, 20.3]

# # Set the positions of the bars on the x-axis
# bar_width = 0.35
# bar_positions = np.arange(len(categories))

# # Create the figure and axes
# fig, ax = plt.subplots()

# # Plot the winning team's stats
# ax.bar(bar_positions, winning_stats, bar_width, label='Winning Team')

# # Plot the losing team's stats
# ax.bar(bar_positions + bar_width, losing_stats, bar_width, label='Losing Team')

# # Set labels and title
# ax.set_xlabel('Categories')
# ax.set_ylabel('Average result per statistic')
# ax.set_title('Stats Comparison: Winning Team vs Losing Team')

# # Set x-axis tick labels
# ax.set_xticks(bar_positions + bar_width / 2)
# ax.set_xticklabels(categories)

# # Add a legend
# ax.legend()

# # Save the figure
# plt.savefig("output/Fig11.png")

# # Display the plot
# plt.show()

# print(f"When comparing the identified key stats for the season between the winning and losing team, it is apparent that the one stat with the most material difference and impact is the '+/-'.")

Final Analysis/Conclusion:

- Based on our overall analysis and supporting data, there is no material difference regarding age groups and winning.  Years of experience requires additional analysis.
- There should be a heightened focus on recruiting European players.  Their hockey programs have significantly evolved, and the number of European players playing in the NHL is showing a steady increase, while the Canadian player representation is reducing.
- More PPG equates to a higher overall point value and not a substitute for regular goals.
- It is favorable and improves the likelihood of winning to have more consistency amongst contributing players vs having one or two superstars on the team.  The box plot comparing the +/- statistic supports this theor as the winning team has less outliers and more consistency when comparing the winning and losing team.  The +/- statistic is also the only key statistic analyzed that has a material difference(100% increase) in results when comparing the winning and losing team.
- Future analysis reviewing Stanley Cup Finals' data only, can provide greater variability in data points and provide additional trending, influencing additional recommendations.