In [1]:
# importing the requests library
import requests
from datetime import datetime
import json
import time 
import pandas as pd
from tqdm.notebook import tqdm_notebook
from flatten_json import flatten
import numpy as np 


import matplotlib.pyplot as plt
color_map = plt.cm.winter
from matplotlib.patches import RegularPolygon
import math
from PIL import Image
from matplotlib.colors import ListedColormap,LinearSegmentedColormap
import matplotlib.colors as mcolors
import warnings
warnings.filterwarnings('ignore')

API_URL = 'https://statsapi.web.nhl.com/api/v1/'

In [2]:
# NHL-api-endpoints of use
seasons_url = 'seasons'
games_schedule_of_season_url = 'schedule?season={}&gameType={}'
game_events_url = 'game/{}/feed/live'
game_types = ['R', 'P']


In [3]:
#SEASON DETAILS
seasons_response = requests.get(url = API_URL + seasons_url)
seasons = seasons_response.json()['seasons']
# games_played_all_seasons = []
season_ids = []
for season in seasons:
    season_end_date_year = datetime.strptime(season['seasonEndDate'], '%Y-%m-%d').date().year
    if(season_end_date_year > 2019):
        season_ids.append(season['seasonId'])
print(season_ids)

['20192020', '20202021', '20212022']


In [4]:
#get game IDs from the scheulde to fetch the actual events of the game
# {
#     game_Date : [{home_team: val1, away_team: val2, game_id: val3, winner: val4, }, {}]
#     #array coz there are multiple games on the same day 
# }

season_id_to_game_ids = {} #{20192020: [1,2,3]}
game_ids = []
for season_id in season_ids:
    for game_type in game_types:
        season_games_schedule_url = API_URL+games_schedule_of_season_url.format(season_id, game_type)
        #https://statsapi.web.nhl.com/api/v1/schedule?season=20182019&gameType=R
        schedule_details_response = requests.get(url = season_games_schedule_url)
        schedule_details_dates = schedule_details_response.json()['dates']
        for schedule_details_date in schedule_details_dates:
            scheduled_games = schedule_details_date["games"]
            for scheduled_game in scheduled_games:
                if season_id not in season_id_to_game_ids:
                    season_id_to_game_ids[season_id] = []
                season_id_to_game_ids[season_id].append(scheduled_game['gamePk'])
                game_ids.append(scheduled_game['gamePk']) 
                #this is just game ids that we need to use to get the events
                
print(len(season_id_to_game_ids['20202021']))

952


In [5]:
begin = time.time()
game_id_to_live_feed_storage = {}
combined_games_data = [] #final array that will contain all the details 
player_statistics = []
top_4_player_coordinates = {}
top_4_players = ['Connor McDavid', 'Nikita Kucherov', 'Sidney Crosby', 'Nathan MacKinnon']

for game_id in tqdm_notebook(game_ids):
    game_live_feed_url = API_URL + game_events_url.format(game_id)
    game_live_feed_response = requests.get(url = game_live_feed_url)
    game_live_feed_json_response = game_live_feed_response.json()
    game_id_to_live_feed_storage[game_id] = game_live_feed_json_response #HTTP data saved for future use
    
    #teams playing the match
    teams_playing = game_live_feed_json_response['gameData']['teams']
    current_match_away_team = teams_playing['away']['name'] 
    current_match_home_team = teams_playing['home']['name']
    
    # events/live data/plays of the match
    game_live_data = game_live_feed_json_response['liveData']['plays'] #only the play data(goals etc)
    scoring_play_event_ids = game_live_data['scoringPlays'] #all the plays that resulted in a goal
    all_plays = game_live_data['allPlays']
    
    for play in all_plays:
        #Filter plays that resulted in a goal/scoring plays
        if (play['about']['eventIdx'] in scoring_play_event_ids and play['result']['event'] == 'Goal') \
            or (play['result']['event'] == 'Shot'):
            if None != play['coordinates'] and len(play["coordinates"]) == 2:
                shot_or_goal_data = {}
                shot_or_goal_data["coor_x"] = play['coordinates']['x']
                shot_or_goal_data["coor_y"] = play['coordinates']['y']
                shot_or_goal_data['event_type'] = play['result']['event'].strip() #either shot or a goal
                #need shot data to find efficiency
                if play['coordinates']['x'] < 0:
                    #since the coordinate system stays the same but the team switch sides
                    #we need to normalize to make sure the coordiantes are plotted on the same
                    #part of the court/field
                    shot_or_goal_data["coor_x_normalized"] = -1 * play['coordinates']['x']
                    shot_or_goal_data["coor_y_normalized"] = -1 * play['coordinates']['y']
                else:
                    shot_or_goal_data["coor_x_normalized"] = play['coordinates']['x']
                    shot_or_goal_data["coor_y_normalized"] = play['coordinates']['y']
                if shot_or_goal_data['coor_x_normalized'] >= 89 and (shot_or_goal_data['coor_y_normalized'] <= 3 or shot_or_goal_data['coor_y_normalized'] >= -3):
                    continue
                shot_or_goal_data["game_id"] = game_id
                shooting_team = play['team']['name']
                shot_or_goal_data["shooting_team"] = shooting_team
                shot_or_goal_data["shooting_team_home_or_away"] = 'Home' if shooting_team == current_match_home_team else 'Away'
                if shooting_team == current_match_home_team:
                    shot_or_goal_data['shot_goal_defending_team'] =  current_match_away_team.strip()
                else:
                    shot_or_goal_data['shot_goal_defending_team'] =  current_match_home_team.strip()
                combined_games_data.append(shot_or_goal_data)
                
                #Player statistics
                if play['players'] != None:
                    involved_players = play['players']
                    for involved_player in involved_players:
                        if ((involved_player["playerType"]!= None) and (involved_player['playerType'] == "Scorer") and\
                            (involved_player['player'] != None) and (involved_player['player']['fullName'] != None) and\
                            (involved_player['player']['fullName'] in top_4_players)):
                            player_name = involved_player['player']['fullName']
                            if player_name not in top_4_player_coordinates:
                                top_4_player_coordinates[player_name] = []
                            top_4_player_coordinates[player_name].append(shot_or_goal_data)
                            
end = time.time()
print(f"Total runtime of the program is {end - begin}")

  0%|          | 0/3476 [00:00<?, ?it/s]

Total runtime of the program is 677.9715399742126


## Write formatted data to a json file called shots_goals_data.json

In [6]:
with open('../data/shots_goals_data.json', 'w') as outfile:
    json.dump(combined_games_data, outfile)

## Write formatted data to a json file for plater analytics called playername.json and playname.csv


In [7]:
for player in top_4_player_coordinates.keys():
    with open('../data/' + player + '.json', 'w') as outfile:
        json.dump(top_4_player_coordinates[player], outfile)
for player in top_4_player_coordinates.keys():
    with open('../data/' + player + '.json', 'r') as f:
        data = json.load(f)
    record_flattened = [flatten(record) for record in data]
    shot_goals_dataframe = pd.DataFrame(record_flattened)
    shot_goals_dataframe.to_csv('../data/' + player + '.csv')

## Create data for Visualization of shots to goals analysis

In [8]:
def write_data_for_visualization(team_a, team_b):
    with open('../data/shots_goals_data.json', 'r') as f:
        data = json.load(f)
        shots_to_goals_data = [flatten(record) for record in data]
        shot_goals_dataframe = pd.DataFrame(shots_to_goals_data)
        condition = ((shot_goals_dataframe['shooting_team'] == team_a) & 
                     (shot_goals_dataframe['shot_goal_defending_team'] == team_b)) | ((shot_goals_dataframe['shooting_team'] == team_b) & 
                    (shot_goals_dataframe['shot_goal_defending_team'] == team_a))
        filtered_dataframe = shot_goals_dataframe[condition]
        
        goals_dataframe = filtered_dataframe[filtered_dataframe['event_type'] == 'Goal'][['coor_x_normalized', 'coor_y_normalized']]
        shots_dataframe = filtered_dataframe[filtered_dataframe['event_type'] == 'Shot'][['coor_x_normalized', 'coor_y_normalized']]

        #team a's filtered data
        team_a_shots_condition = ((filtered_dataframe['shooting_team'] == team_a) & (filtered_dataframe['event_type'] == 'Shot'))
        team_a_goals_condition = ((filtered_dataframe['shooting_team'] == team_a) & (filtered_dataframe['event_type'] == 'Goal'))
        
        team_a_against_b_shots_dataframe = shots_dataframe[team_a_shots_condition]
        team_a_against_b_goals_dataframe = goals_dataframe[team_a_goals_condition]
        
        #rename the columns to correct names for visualization
        team_a_against_b_shots_dataframe.rename(columns={"coor_x_normalized": "Shots X coordinates", "coor_y_normalized": "Shots Y coordinates"}, inplace = True)
        team_a_against_b_goals_dataframe.rename(columns={"coor_x_normalized": "Goals X coordinates", "coor_y_normalized": "Goals Y coordinates"}, inplace = True)
        
        #create Numpy arrays for each column to avoid Nan Value on concat
        #For Shots
        shots_X_coordinates_np = team_a_against_b_shots_dataframe['Shots X coordinates'].to_numpy()
        shots_Y_coordinates_np = team_a_against_b_shots_dataframe['Shots Y coordinates'].to_numpy()
        
        #For Goals
        goals_X_coordinates_np = team_a_against_b_goals_dataframe['Goals X coordinates'].to_numpy()
        goals_Y_coordinates_np = team_a_against_b_goals_dataframe['Goals Y coordinates'].to_numpy()
        
        
        shots_team_a = pd.DataFrame({'Shots X coordinates': shots_X_coordinates_np, 'Shots Y coordinates': shots_Y_coordinates_np})
        goals_team_a = pd.DataFrame({'Goals X coordinates': goals_X_coordinates_np,'Goals Y coordinates': goals_Y_coordinates_np})
        combined_df = pd.concat([shots_team_a, goals_team_a], axis = 1)
        return combined_df

In [9]:
team_a = "Toronto Maple Leafs" 
team_b = "Vancouver Canucks"
#assuming Team A is our team and team B is our next opponent and we need to find the best 
#attacking positions
visualization_data_frame = write_data_for_visualization(team_a, team_b)
file_name = team_a + '_vs_' + team_b + '.csv'
visualization_data_frame.to_csv('../data/'+ file_name)


#assuming Team A is opposition team and we need to find the best defensive positions 
team_a, team_b = team_b, team_a
file_name = team_a + '_vs_' + team_b + '.csv'
visualization_data_frame = write_data_for_visualization(team_a, team_b)
visualization_data_frame.to_csv('../data/' + file_name)