In [1]:
import json
import os

import pandas as pd
pd.set_option('display.max_colwidth', None)

In [2]:
import os

game_type_map = {"regular_season": "02",
                 "playoffs": "03"}


class Directory:
    DATA_DIR = "/Users/amandalmia/workspace/mila/datascience/project/hockey-primer-1/data/" # Modify the path accordingly


class APIList():
    GET_ALL_MATCHES_FOR_A_GIVEN_SEASON = "https://statsapi.web.nhl.com/api/v1/schedule?season="
    GET_ALL_DATA_FOR_A_GIVEN_MATCH = "https://statsapi.web.nhl.com/api/v1/game/{}/feed/live/"


class CustomRegex():
    REGULAR_GAME_ID = r"\d{0,4}02\d{0,4}"  # 02 for regular season
    PLAYOFFS_ID = r"\d{0,4}03\d{0,4}"  # 03 for playoffs

TYPES_OF_SHOTS = ["Goal", "Shot"]

In [3]:
def get_json_path(game_id: int):
    """
    This function takes an input game id and return the location of the json file
    @param game_id: game id for which we need to get the data
    @return: local system path
    """
    year = str(game_id)[:4]
    season = str(game_id)[4:6]
    if season == "02":
        game_type = "regular_season"
    elif season == "03":
        game_type = "playoffs"
    else:
        game_type = ""
    return Directory.DATA_DIR + year + os.path.sep + str(year) + "_" + game_type + ".json"


def flatten_player_data(player_list):
    """
    This function transform list of players into a flatten encoded string in the form of (Full Name)_(Player Type)|
    (Full Name)_(Player Type)|.....
    @param player_list: list of players data
    @return: flatten string
    """
    flatten_string = ""
    for player in player_list:
        # flatten_string += "(" + player["player"]["id"] + ")_" # Can be uncommented in future if required
        flatten_string += "(" + player["player"]["fullName"] + ")_"
        flatten_string += "(" + player["playerType"] + ")|"
    return flatten_string[:-1]


def data_parsing(data, id, event_type):
    """
    This functions transforms the json data into the relevant information for the usecase
    @param data: entire metadata and details of the given game id
    @param id: game id
    @param event_type: type of game Shot / Goal
    @return: json object
    """
    players_data = data["players"]
    result_data = data["result"]
    about_data = data["about"]
    coordinates_data = data["coordinates"]
    team_data = data["team"]
    data_dict = {"game_id": id, "event_code": result_data["eventCode"],
                 "player_info": flatten_player_data(players_data), "event": result_data["event"],
                 "event_type_id": result_data["eventTypeId"], "event_description": result_data["description"],
                 "event_secondary_type": result_data["secondaryType"],
                 "about_event_id": about_data["eventId"], "about_period": about_data["period"],
                 "about_period_type": about_data["periodType"], "about_period_time": about_data["periodTime"],
                 "about_time_remaining": about_data["periodTimeRemaining"], "about_date_time": about_data["dateTime"],
                 "about_goal_away": about_data["goals"]["away"], "about_goal_home": about_data["goals"]["home"],
                 "coordinates": (coordinates_data["x"], coordinates_data["y"]), "team_name": team_data["name"]}
    if event_type == "Goal":
        data_dict["event_strength_name"] = result_data["strength"]["name"]
        data_dict["event_strength_code"] = result_data["strength"]["code"]
        data_dict["event_game_winning_goal"] = result_data["gameWinningGoal"]
        data_dict["event_empty_net"] = result_data["emptyNet"]
    else:
        data_dict["event_strength_name"] = "NA"
        data_dict["event_strength_code"] = "NA"
        data_dict["event_game_winning_goal"] = "NA"
        data_dict["event_empty_net"] = "NA"
    return data_dict


def get_goal_shots_data_by_game_id(game_id: int):
    """
    This functions transforms the json data into a df by filtering the relevant live data of the matchs which is
    restricted to "Shots" and "Goals"
    @param game_id: game id for which the transformed data needs to be done
    @return: data frame which consists of shots and goals data
    """
    json_path = get_json_path(game_id=game_id)
    with open(json_path, "r") as f:
        playoffs_game_data_dict = json.load(f)
    game_data = playoffs_game_data_dict[str(game_id)]
    live_data = game_data["liveData"]["plays"]["allPlays"]
    final_list = []
    for i in live_data:
        if i["result"]["event"] in TYPES_OF_SHOTS:
            try:
                parsed_data = data_parsing(data=i, id=game_id, event_type=i["result"]["event"])
                final_list.append(parsed_data)
            except Exception as e:
                print(e)
    shots_goals_df = pd.DataFrame(final_list)
    return shots_goals_df

In [4]:
df = get_goal_shots_data_by_game_id(game_id=2017020001)
df.head()

Unnamed: 0,game_id,event_code,player_info,event,event_type_id,event_description,event_secondary_type,about_event_id,about_period,about_period_type,...,about_time_remaining,about_date_time,about_goal_away,about_goal_home,coordinates,team_name,event_strength_name,event_strength_code,event_game_winning_goal,event_empty_net
0,2017020001,WPG54,(Josh Morrissey)_(Shooter)|(Frederik Andersen)_(Goalie),Shot,SHOT,Josh Morrissey Wrist Shot saved by Frederik Andersen,Wrist Shot,54,1,REGULAR,...,19:22,2017-10-04T23:18:01Z,0,0,"(-36.0, -28.0)",Winnipeg Jets,,,,
1,2017020001,WPG55,(Shawn Matthias)_(Shooter)|(Frederik Andersen)_(Goalie),Shot,SHOT,Shawn Matthias Wrist Shot saved by Frederik Andersen,Wrist Shot,55,1,REGULAR,...,19:11,2017-10-04T23:18:12Z,0,0,"(-75.0, 1.0)",Winnipeg Jets,,,,
2,2017020001,WPG58,(Bryan Little)_(Shooter)|(Frederik Andersen)_(Goalie),Shot,SHOT,Bryan Little Backhand saved by Frederik Andersen,Backhand,58,1,REGULAR,...,18:57,2017-10-04T23:19:04Z,0,0,"(-73.0, 10.0)",Winnipeg Jets,,,,
3,2017020001,WPG62,(Eric Fehr)_(Shooter)|(Steve Mason)_(Goalie),Shot,SHOT,Eric Fehr Wrist Shot saved by Steve Mason,Wrist Shot,62,1,REGULAR,...,18:14,2017-10-04T23:20:20Z,0,0,"(80.0, -3.0)",Toronto Maple Leafs,,,,
4,2017020001,WPG68,(Patrik Laine)_(Shooter)|(Frederik Andersen)_(Goalie),Shot,SHOT,Patrik Laine Snap Shot saved by Frederik Andersen,Snap Shot,68,1,REGULAR,...,16:18,2017-10-04T23:23:13Z,0,0,"(-44.0, -21.0)",Winnipeg Jets,,,,
