In [1]:
import json
import os

import pandas as pd
from tqdm import tqdm
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [2]:
import os

game_type_map = {"regular_season": "02",
                 "playoffs": "03"}


class Directory:
    DATA_DIR = "../data/" # Modify the path accordingly


class APIList():
    GET_ALL_MATCHES_FOR_A_GIVEN_SEASON = "https://statsapi.web.nhl.com/api/v1/schedule?season="
    GET_ALL_DATA_FOR_A_GIVEN_MATCH = "https://statsapi.web.nhl.com/api/v1/game/{}/feed/live/"


class CustomRegex():
    REGULAR_GAME_ID = r"\d{0,4}02\d{0,4}"  # 02 for regular season
    PLAYOFFS_ID = r"\d{0,4}03\d{0,4}"  # 03 for playoffs

TYPES_OF_SHOTS = ["Goal", "Shot"]

In [3]:
def get_json_path(game_id=0, season=0):
    """
    This function takes an input game id and return the location of the json file
    @param game_id: game id for which we need to get the data
    @return: local system path
    """
    if game_id != 0:
        year = str(game_id)[:4]
        season = str(game_id)[4:6]
        if season == "02":
            game_type = "regular_season"
        elif season == "03":
            game_type = "playoffs"
        else:
            game_type = ""
        return Directory.DATA_DIR + year + os.path.sep + str(year) + "_" + game_type + ".json", ""
    elif season != 0:
        regular_season = Directory.DATA_DIR + str(season) + os.path.sep + str(season) + "_regular_season.json"
        playoffs_season = Directory.DATA_DIR + str(season) + os.path.sep + str(season) + "_playoffs.json"
        return regular_season, playoffs_season


def flatten_player_data(player_list):
    """
    This function transform list of players into a flatten encoded string in the form of (Full Name)_(Player Type)|
    (Full Name)_(Player Type)|.....
    @param player_list: list of players data
    @return: flatten string
    """
    flatten_string = ""
    for player in player_list:
        # flatten_string += "(" + player["player"]["id"] + ")_" # Can be uncommented in future if required
        flatten_string += "(" + player["player"]["fullName"] + ")_"
        flatten_string += "(" + player["playerType"] + ")|"
    return flatten_string[:-1]


def get_shooter_goalie(player_list):
    """
    This function gets the name of the goalie and the shooter
    @param player_list: return the shooter and goalie player names
    @return:
    """
    shooter = ""
    goalie = ""
    for player in player_list:
        if player["playerType"] == "Shooter":
            shooter = player["player"]["fullName"]
        elif player["playerType"] == "Goalie":
            goalie = player["player"]["fullName"]
        else:
            pass
    return shooter, goalie


def get_home_away_team(game_meta):
    """
    This functions get the team data
    @param game_meta: game metadata
    @return: dictionary of the team information
    """
    teams_data = game_meta["gameData"]["teams"]
    return {"home": teams_data["home"]["name"], "home_abv" : teams_data["home"]["abbreviation"],
            "away": teams_data["away"]["name"], "away_abv": teams_data["away"]["abbreviation"]}


def get_side(game_meta):
    """
    This fucntion gets the team on which  rink side they were there in each period.
    @param game_meta: game metadata
    @return: a dictionary for each period home and away team rink side
    """
    periods_data = game_meta["liveData"]["linescore"]["periods"]
    period_dict = {}
    if len(periods_data) > 0:
        for i, period in enumerate(periods_data):
            if "rinkSide" in period["home"]:
                period_dict[i + 1] = {"home": period["home"]["rinkSide"], "away": period["away"]["rinkSide"]}
            else:
                period_dict[i + 1] = {"home": "Side Not Available", "away": "Side Not Available"}
    return period_dict


def get_cordinates(coordinates_data):
    if "x" not in coordinates_data or "y" not in coordinates_data:
        return None
    return (coordinates_data["x"], coordinates_data["y"])


def data_parsing(data, id, event_type, period_dict, team_detail_dict):
    """
    This functions transforms the json data into the relevant information for the usecase
    @param data: entire metadata and details of the given game id
    @param id: game id
    @param event_type: type of game Shot / Goal
    @return: json object
    """
    players_data = data["players"]
    result_data = data["result"]
    about_data = data["about"]
    coordinates_data = data["coordinates"]
    team_data = data["team"]
    shooter, goalie = get_shooter_goalie(players_data)
    data_dict = {"game_id": id, "event_code": result_data["eventCode"],
                 "player_info": flatten_player_data(players_data),
                 "shooter": shooter, "goalie": goalie, "event": result_data["event"],
                 "event_type_id": result_data["eventTypeId"], "event_description": result_data["description"],

                 "home_team": team_detail_dict["home"], "home_team_abv": team_detail_dict["home_abv"],
                 "away_team": team_detail_dict["away"], "away_team_abv": team_detail_dict["away_abv"],

                 "about_event_id": about_data["eventId"], "about_period": about_data["period"],
                 "about_period_type": about_data["periodType"], "game_time": about_data["periodTime"],
                 "about_time_remaining": about_data["periodTimeRemaining"], "about_date_time": about_data["dateTime"],
                 "about_goal_away": about_data["goals"]["away"], "about_goal_home": about_data["goals"]["home"],
                 "action_team_name": team_data["name"]}

    if "secondaryType" not in result_data:
        data_dict["event_secondary_type"] = "NA"
    else:
        data_dict["event_secondary_type"] = result_data["secondaryType"]

    data_dict["coordinates"] = get_cordinates(coordinates_data)

    if about_data["period"] not in period_dict:
        data_dict["home_team_side"] = "NA-Shootout"
        data_dict["away_team_side"] = "NA-Shootout"
    else:
        data_dict["home_team_side"] = period_dict[about_data["period"]]["home"]
        data_dict["away_team_side"] = period_dict[about_data["period"]]["away"]

    if event_type == "Goal":
        data_dict["event_strength_name"] = result_data["strength"]["name"]
        data_dict["event_strength_code"] = result_data["strength"]["code"]
        data_dict["event_game_winning_goal"] = result_data["gameWinningGoal"]
        if "emptyNet" not in result_data:
            data_dict["event_empty_net"] = "Missing Data"
        else:
            data_dict["event_empty_net"] = result_data["emptyNet"]
    else:
        data_dict["event_strength_name"] = "NA"
        data_dict["event_strength_code"] = "NA"
        data_dict["event_game_winning_goal"] = "NA"
        data_dict["event_empty_net"] = "NA"
    return data_dict


def get_goal_shots_data_by_game_id(game_id: int):
    """
    This functions transforms the json data into a df by filtering the relevant live data of the matchs which is
    restricted to "Shots" and "Goals"
    @param game_id: game id for which the transformed data needs to be done
    @return: data frame which consists of shots and goals data
    """
    json_path, _ = get_json_path(game_id=game_id)
    with open(json_path, "r") as f:
        playoffs_game_data_dict = json.load(f)
    game_data = playoffs_game_data_dict[str(game_id)]

    period_dict = get_side(game_meta=game_data)
    teams_type = get_home_away_team(game_meta=game_data)
    live_data = game_data["liveData"]["plays"]["allPlays"]
    final_list = []
    for i in live_data:
        if i["result"]["event"] in TYPES_OF_SHOTS:
            try:
                parsed_data = data_parsing(data=i, id=game_id, event_type=i["result"]["event"],
                                           period_dict=period_dict, team_detail_dict=teams_type)
                final_list.append(parsed_data)
            except Exception as e:
                print(e)
                import traceback
                print(traceback.print_exc())
                break

    shots_goals_df = pd.DataFrame(final_list)
    return shots_goals_df


def get_goal_shots_by_season(season_year: int):
    """
    This functions get the goals and shorts data by the given input season
    @param season_year: The year for which we need to get the goal shots data
    @return: dataframe for the entire season.
    """
    regular_data_path, playoffs_data_paths = get_json_path(season=season_year)
    with open(regular_data_path, "r") as f:
        regular_game_data_dict = json.load(f)

    with open(playoffs_data_paths, "r") as p:
        playoffs_game_data_dict = json.load(p)

    total_game_list = []
    for key, val in tqdm(regular_game_data_dict.items()):
        game_data = regular_game_data_dict[str(key)]
        period_dict = get_side(game_meta=game_data)
        teams_type = get_home_away_team(game_meta=game_data)
        live_data = game_data["liveData"]["plays"]["allPlays"]
        for i in live_data:
            if i["result"]["event"] in TYPES_OF_SHOTS:
                try:
                    parsed_data = data_parsing(data=i, id=key, event_type=i["result"]["event"],
                                               period_dict=period_dict, team_detail_dict=teams_type)
                    total_game_list.append(parsed_data)
                except Exception as e:
                    print(key)
                    print(e)
                    import traceback
                    print(traceback.print_exc())
                    break

    for key, val in tqdm(playoffs_game_data_dict.items()):
        game_data = playoffs_game_data_dict[str(key)]
        period_dict = get_side(game_meta=game_data)
        teams_type = get_home_away_team(game_meta=game_data)
        live_data = game_data["liveData"]["plays"]["allPlays"]
        for i in live_data:
            if i["result"]["event"] in TYPES_OF_SHOTS:
                try:
                    parsed_data = data_parsing(data=i, id=key, event_type=i["result"]["event"],
                                               period_dict=period_dict, team_detail_dict=teams_type)
                    total_game_list.append(parsed_data)
                except Exception as e:
                    print(i)
                    print(key)
                    print(e)
                    import traceback
                    print(traceback.print_exc())
                    break

    shots_goals_df = pd.DataFrame(total_game_list)
    shots_goals_df.to_pickle(Directory.DATA_DIR + str(season_year) + "/adv_vis.pkl")
    return shots_goals_df

In [4]:
df = get_goal_shots_data_by_game_id(game_id=2017020001)
df.head()

Unnamed: 0,game_id,event_code,player_info,shooter,goalie,event,event_type_id,event_description,home_team,home_team_abv,away_team,away_team_abv,about_event_id,about_period,about_period_type,game_time,about_time_remaining,about_date_time,about_goal_away,about_goal_home,action_team_name,event_secondary_type,coordinates,home_team_side,away_team_side,event_strength_name,event_strength_code,event_game_winning_goal,event_empty_net
0,2017020001,WPG54,(Josh Morrissey)_(Shooter)|(Frederik Andersen)_(Goalie),Josh Morrissey,Frederik Andersen,Shot,SHOT,Josh Morrissey Wrist Shot saved by Frederik Andersen,Winnipeg Jets,WPG,Toronto Maple Leafs,TOR,54,1,REGULAR,00:38,19:22,2017-10-04T23:18:01Z,0,0,Winnipeg Jets,Wrist Shot,"(-36.0, -28.0)",right,left,,,,
1,2017020001,WPG55,(Shawn Matthias)_(Shooter)|(Frederik Andersen)_(Goalie),Shawn Matthias,Frederik Andersen,Shot,SHOT,Shawn Matthias Wrist Shot saved by Frederik Andersen,Winnipeg Jets,WPG,Toronto Maple Leafs,TOR,55,1,REGULAR,00:49,19:11,2017-10-04T23:18:12Z,0,0,Winnipeg Jets,Wrist Shot,"(-75.0, 1.0)",right,left,,,,
2,2017020001,WPG58,(Bryan Little)_(Shooter)|(Frederik Andersen)_(Goalie),Bryan Little,Frederik Andersen,Shot,SHOT,Bryan Little Backhand saved by Frederik Andersen,Winnipeg Jets,WPG,Toronto Maple Leafs,TOR,58,1,REGULAR,01:03,18:57,2017-10-04T23:19:04Z,0,0,Winnipeg Jets,Backhand,"(-73.0, 10.0)",right,left,,,,
3,2017020001,WPG62,(Eric Fehr)_(Shooter)|(Steve Mason)_(Goalie),Eric Fehr,Steve Mason,Shot,SHOT,Eric Fehr Wrist Shot saved by Steve Mason,Winnipeg Jets,WPG,Toronto Maple Leafs,TOR,62,1,REGULAR,01:46,18:14,2017-10-04T23:20:20Z,0,0,Toronto Maple Leafs,Wrist Shot,"(80.0, -3.0)",right,left,,,,
4,2017020001,WPG68,(Patrik Laine)_(Shooter)|(Frederik Andersen)_(Goalie),Patrik Laine,Frederik Andersen,Shot,SHOT,Patrik Laine Snap Shot saved by Frederik Andersen,Winnipeg Jets,WPG,Toronto Maple Leafs,TOR,68,1,REGULAR,03:42,16:18,2017-10-04T23:23:13Z,0,0,Winnipeg Jets,Snap Shot,"(-44.0, -21.0)",right,left,,,,


In [5]:
df_temp = get_goal_shots_by_season(season_year=2017).head()
df_temp.head()

100%|█████████████████████████████████████| 1271/1271 [00:00<00:00, 1636.04it/s]
100%|█████████████████████████████████████████| 84/84 [00:00<00:00, 1514.33it/s]


Unnamed: 0,game_id,event_code,player_info,shooter,goalie,event,event_type_id,event_description,home_team,home_team_abv,away_team,away_team_abv,about_event_id,about_period,about_period_type,game_time,about_time_remaining,about_date_time,about_goal_away,about_goal_home,action_team_name,event_secondary_type,coordinates,home_team_side,away_team_side,event_strength_name,event_strength_code,event_game_winning_goal,event_empty_net
0,2017020001,WPG54,(Josh Morrissey)_(Shooter)|(Frederik Andersen)_(Goalie),Josh Morrissey,Frederik Andersen,Shot,SHOT,Josh Morrissey Wrist Shot saved by Frederik Andersen,Winnipeg Jets,WPG,Toronto Maple Leafs,TOR,54,1,REGULAR,00:38,19:22,2017-10-04T23:18:01Z,0,0,Winnipeg Jets,Wrist Shot,"(-36.0, -28.0)",right,left,,,,
1,2017020001,WPG55,(Shawn Matthias)_(Shooter)|(Frederik Andersen)_(Goalie),Shawn Matthias,Frederik Andersen,Shot,SHOT,Shawn Matthias Wrist Shot saved by Frederik Andersen,Winnipeg Jets,WPG,Toronto Maple Leafs,TOR,55,1,REGULAR,00:49,19:11,2017-10-04T23:18:12Z,0,0,Winnipeg Jets,Wrist Shot,"(-75.0, 1.0)",right,left,,,,
2,2017020001,WPG58,(Bryan Little)_(Shooter)|(Frederik Andersen)_(Goalie),Bryan Little,Frederik Andersen,Shot,SHOT,Bryan Little Backhand saved by Frederik Andersen,Winnipeg Jets,WPG,Toronto Maple Leafs,TOR,58,1,REGULAR,01:03,18:57,2017-10-04T23:19:04Z,0,0,Winnipeg Jets,Backhand,"(-73.0, 10.0)",right,left,,,,
3,2017020001,WPG62,(Eric Fehr)_(Shooter)|(Steve Mason)_(Goalie),Eric Fehr,Steve Mason,Shot,SHOT,Eric Fehr Wrist Shot saved by Steve Mason,Winnipeg Jets,WPG,Toronto Maple Leafs,TOR,62,1,REGULAR,01:46,18:14,2017-10-04T23:20:20Z,0,0,Toronto Maple Leafs,Wrist Shot,"(80.0, -3.0)",right,left,,,,
4,2017020001,WPG68,(Patrik Laine)_(Shooter)|(Frederik Andersen)_(Goalie),Patrik Laine,Frederik Andersen,Shot,SHOT,Patrik Laine Snap Shot saved by Frederik Andersen,Winnipeg Jets,WPG,Toronto Maple Leafs,TOR,68,1,REGULAR,03:42,16:18,2017-10-04T23:23:13Z,0,0,Winnipeg Jets,Snap Shot,"(-44.0, -21.0)",right,left,,,,


In [None]:
year = [2016, 2017, 2018, 2019, 2020]
list_df = []
for y in year:
    list_df.append(get_goal_shots_by_season(season_year=y))
final_df = pd.concat(list_df)