In [1]:
from ift6758.data import get_data
from ift6758.features import event_types, player as player_module
import pandas as pd

In [2]:
def extract_player_data(json_player_data) -> dict:
    players_dict = {}
    for player in json_player_data:
        players_dict[player["playerId"]] = player_module.Player(player_id=player["playerId"],
                    team_id=player["teamId"],
                    first_name=player["firstName"]["default"],
                    last_name=player["lastName"]["default"],
                    position_code=player["positionCode"])
    return players_dict
        
def import_game(game_id, game_list) -> None:
    game_data = get_data.retrieve_game_data(game_id)
    
    game_info = {
        "game id": game_data["id"],
        "date": game_data["gameDate"],
        "venue_location": game_data["venueLocation"]["default"],
        "home_team": game_data["homeTeam"]["name"]["default"],
        "away_team": game_data["awayTeam"]["name"]["default"]
    }
    print(game_info)
    players_dict = extract_player_data(game_data["rosterSpots"])
    
    events = game_data["plays"]
    for event in events:
        event_info = {
            "event_id": event["eventId"],
            "period": event["periodDescriptor"]["number"],
            "timeInPeriod": event["timeInPeriod"],
            "type": event["typeDescKey"],
        }
 
        if event_info["type"] == "goal":
            event_details = event["details"]
            # dealing with missing data
            if "goalieInNetId" not in event_details: # if None, net was empty (not an error)
                event_details["goalieInNetId"] = None
                goalie_name = None
            else:
                goalie_name = players_dict[event_details["goalieInNetId"]].first_name + " " + players_dict[event_details["goalieInNetId"]].last_name
            if "xCoord" not in event_details: # most likely an error because rare
                event_details["xCoord"] = None
            if "yCoord" not in event_details:
                event_details["yCoord"] = None # most likely an error because rare
            if "zoneCode" not in event_details:
                event_details["zoneCode"] = None # most likely an error because rare
            scoring_player_name = players_dict[event_details["scoringPlayerId"]].first_name + " " + players_dict[event_details["scoringPlayerId"]].last_name
            goal_event = event_types.GoalEvent(game_id=event_info["event_id"], 
                                               event_id=event_info["event_id"],
                                               period=event_info["period"], 
                                               time=event_info["timeInPeriod"], 
                                               shooting_player_id=event_details["scoringPlayerId"],
                                               shooting_player_name=scoring_player_name,
                                               goalie_id=event_details["goalieInNetId"],
                                               goalie_name=goalie_name,
                                               x_coord=event_details["xCoord"],
                                               y_coord=event_details["yCoord"],
                                               owner_team=event_details["eventOwnerTeamId"],
                                               shot_type=event_details["shotType"],
                                               zone_code=event_details["zoneCode"])
            event_info = {**event_info, **goal_event.to_dict()}
            game_list.append({**game_info, **event_info}) #unpacking and merging the dictionaries
        elif event_info["type"] == "shot-on-goal":
            event_details = event["details"]
            if "zoneCode" not in event_details:
                event_details["zoneCode"] = None
            if "shotType" not in event_details:
                event_details["shotType"] = None
            if "xCoord" not in event_details:
                event_details["xCoord"] = None
            if "yCoord" not in event_details:
                event_details["yCoord"] = None
            scoring_player_name = players_dict[event_details["shootingPlayerId"]].first_name + " " + players_dict[event_details["shootingPlayerId"]].last_name
            goalie_name = players_dict[event_details["goalieInNetId"]].first_name + " " + players_dict[event_details["goalieInNetId"]].last_name
            shot_on_goal_event = event_types.ShotOnGoalEvent(game_id=event_info["event_id"], 
                                                             event_id=event_info["event_id"],
                                                             period=event_info["period"], 
                                                             time=event_info["timeInPeriod"], 
                                                             shooting_player_id=event_details["shootingPlayerId"],
                                                             goalie_id=event_details["goalieInNetId"],
                                                             goalie_name=goalie_name,
                                                             shooting_player_name=scoring_player_name,
                                                             x_coord=event_details["xCoord"],
                                                             y_coord=event_details["yCoord"],
                                                             owner_team=event_details["eventOwnerTeamId"],
                                                             shot_type=event_details["shotType"],
                                                             zone_code=event_details["zoneCode"])
            event_info = {**event_info, **shot_on_goal_event.to_dict()}
            game_list.append({**game_info, **event_info}) #unpacking and merging the dictionaries


In [3]:
import_game("2017020273", []) # debug

using cached data for game id:  2017020273.json
{'game id': 2017020273, 'date': '2017-11-14', 'venue_location': 'Edmonton', 'home_team': 'Oilers', 'away_team': 'Golden Knights'}


In [4]:
game_list = []
for season in range(2017, 2018):
    regular_season_game_ids = get_data.regular_season_game_id_generator(str(season))
    for game_id in regular_season_game_ids:
        import_game(game_id, game_list)

using cached data for game id:  2017020001.json
{'game id': 2017020001, 'date': '2017-10-04', 'venue_location': 'Winnipeg', 'home_team': 'Jets', 'away_team': 'Maple Leafs'}
using cached data for game id:  2017020002.json
{'game id': 2017020002, 'date': '2017-10-04', 'venue_location': 'Pittsburgh', 'home_team': 'Penguins', 'away_team': 'Blues'}
using cached data for game id:  2017020003.json
{'game id': 2017020003, 'date': '2017-10-04', 'venue_location': 'Edmonton', 'home_team': 'Oilers', 'away_team': 'Flames'}
using cached data for game id:  2017020004.json
{'game id': 2017020004, 'date': '2017-10-04', 'venue_location': 'San Jose', 'home_team': 'Sharks', 'away_team': 'Flyers'}
using cached data for game id:  2017020005.json
{'game id': 2017020005, 'date': '2017-10-05', 'venue_location': 'Boston', 'home_team': 'Bruins', 'away_team': 'Predators'}
using cached data for game id:  2017020006.json
{'game id': 2017020006, 'date': '2017-10-05', 'venue_location': 'Buffalo', 'home_team': 'Sabres

In [5]:
df = pd.DataFrame(game_list)
df

Unnamed: 0,game id,date,venue_location,home_team,away_team,event_id,period,timeInPeriod,type,game_id,time,shooting_player_id,shooting_player_name,goalie_id,goalie_name,x_coord,y_coord,owner_team,shot_type,zone_code
0,2017020001,2017-10-04,Winnipeg,Jets,Maple Leafs,54,1,00:38,shot-on-goal,54,00:38,8477504,Josh Morrissey,8475883.0,Frederik Andersen,-36.0,-28.0,52,wrist,O
1,2017020001,2017-10-04,Winnipeg,Jets,Maple Leafs,55,1,00:49,shot-on-goal,55,00:49,8473574,Shawn Matthias,8475883.0,Frederik Andersen,-75.0,1.0,52,wrist,O
2,2017020001,2017-10-04,Winnipeg,Jets,Maple Leafs,58,1,01:03,shot-on-goal,58,01:03,8473412,Bryan Little,8475883.0,Frederik Andersen,-73.0,10.0,52,backhand,O
3,2017020001,2017-10-04,Winnipeg,Jets,Maple Leafs,62,1,01:46,shot-on-goal,62,01:46,8470611,Eric Fehr,8473461.0,Steve Mason,80.0,-3.0,10,wrist,O
4,2017020001,2017-10-04,Winnipeg,Jets,Maple Leafs,68,1,03:42,shot-on-goal,68,03:42,8479339,Patrik Laine,8475883.0,Frederik Andersen,-44.0,-21.0,52,snap,O
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81868,2017021271,2018-04-07,San Jose,Sharks,Wild,634,3,18:10,shot-on-goal,634,18:10,8478414,Timo Meier,8471227.0,Devan Dubnyk,-52.0,-35.0,28,snap,O
81869,2017021271,2018-04-07,San Jose,Sharks,Wild,638,3,18:50,goal,638,18:50,8478414,Timo Meier,8471227.0,Devan Dubnyk,-29.0,10.0,28,deflected,O
81870,2017021271,2018-04-07,San Jose,Sharks,Wild,641,3,19:22,shot-on-goal,641,19:22,8470613,Brent Burns,8471227.0,Devan Dubnyk,-31.0,16.0,28,wrist,O
81871,2017021271,2018-04-07,San Jose,Sharks,Wild,642,3,19:25,shot-on-goal,642,19:25,8470794,Joe Pavelski,8471227.0,Devan Dubnyk,-80.0,8.0,28,wrist,O


In [6]:
unique_event_types = df['type'].unique()
unique_event_types

array(['shot-on-goal', 'goal'], dtype=object)

In [7]:
events_with_no_zone_code = df[df['zone_code'].isna()]
events_with_no_zone_code

Unnamed: 0,game id,date,venue_location,home_team,away_team,event_id,period,timeInPeriod,type,game_id,time,shooting_player_id,shooting_player_name,goalie_id,goalie_name,x_coord,y_coord,owner_team,shot_type,zone_code
17554,2017020273,2017-11-14,Edmonton,Oilers,Golden Knights,701,3,00:57,goal,701,00:57,8478402,Connor McDavid,8476509.0,Maxime Lagace,,,22,wrist,
24850,2017020387,2017-11-30,Edmonton,Oilers,Maple Leafs,778,3,18:55,goal,778,18:55,8466139,Patrick Marleau,8476316.0,Laurent Brossoit,,,10,wrist,
34202,2017020533,2017-12-21,Tampa,Lightning,Senators,587,5,00:00,goal,587,00:00,8478010,Brayden Point,8467950.0,Craig Anderson,,,14,wrist,
34203,2017020533,2017-12-21,Tampa,Lightning,Senators,588,5,00:00,shot-on-goal,588,00:00,8471768,Tom Pyatt,8476883.0,Andrei Vasilevskiy,,,9,wrist,
34204,2017020533,2017-12-21,Tampa,Lightning,Senators,1202,5,00:00,shot-on-goal,1202,00:00,8476453,Nikita Kucherov,8467950.0,Craig Anderson,,,14,wrist,
34205,2017020533,2017-12-21,Tampa,Lightning,Senators,591,5,00:00,shot-on-goal,591,00:00,8475168,Matt Duchene,8476883.0,Andrei Vasilevskiy,,,9,backhand,
34206,2017020533,2017-12-21,Tampa,Lightning,Senators,592,5,00:00,goal,592,00:00,8474564,Steven Stamkos,8467950.0,Craig Anderson,,,14,wrist,
41272,2017020642,2018-01-07,Brooklyn,Islanders,Devils,834,5,00:00,goal,834,00:00,8475754,Brock Nelson,8471239.0,Cory Schneider,,,2,wrist,


In [8]:
events_with_no_shot_type = df[df['shot_type'].isna()]
events_with_no_shot_type

Unnamed: 0,game id,date,venue_location,home_team,away_team,event_id,period,timeInPeriod,type,game_id,time,shooting_player_id,shooting_player_name,goalie_id,goalie_name,x_coord,y_coord,owner_team,shot_type,zone_code
32022,2017020500,2017-12-16,Philadelphia,Flyers,Stars,700,4,03:44,shot-on-goal,700,03:44,8469465,Dan Hamhuis,8470880.0,Brian Elliott,97.0,-5.0,25,,D
62273,2017020966,2018-02-26,Tampa,Lightning,Maple Leafs,681,5,00:00,shot-on-goal,681,00:00,8477939,William Nylander,8476883.0,Andrei Vasilevskiy,77.0,-7.0,10,,O
72541,2017021124,2018-03-20,Raleigh,Hurricanes,Oilers,613,3,13:37,shot-on-goal,613,13:37,8476958,Jaccob Slavin,8475660.0,Cam Talbot,-22.0,-2.0,12,,N


In [9]:
events_with_no_goalie = df[df['goalie'].isna()]
events_with_no_goalie # net empty of no goalie ?

KeyError: 'goalie'

In [10]:
# errors ?
events_with_no_x_coord = df[df['x_coord'].isna()]
events_with_no_x_coord

Unnamed: 0,game id,date,venue_location,home_team,away_team,event_id,period,timeInPeriod,type,game_id,time,shooting_player,x_coord,y_coord,owner_team,goalie,shot_type,zone_code,scoring_player
17554,2017020273,2017-11-14,Edmonton,Oilers,Golden Knights,701,3,00:57,goal,701,00:57,,,,22,8476509.0,wrist,,8478402.0
24850,2017020387,2017-11-30,Edmonton,Oilers,Maple Leafs,778,3,18:55,goal,778,18:55,,,,10,8476316.0,wrist,,8466139.0
34202,2017020533,2017-12-21,Tampa,Lightning,Senators,587,5,00:00,goal,587,00:00,,,,14,8467950.0,wrist,,8478010.0
34203,2017020533,2017-12-21,Tampa,Lightning,Senators,588,5,00:00,shot-on-goal,588,00:00,8471768.0,,,9,8476883.0,wrist,,
34204,2017020533,2017-12-21,Tampa,Lightning,Senators,1202,5,00:00,shot-on-goal,1202,00:00,8476453.0,,,14,8467950.0,wrist,,
34205,2017020533,2017-12-21,Tampa,Lightning,Senators,591,5,00:00,shot-on-goal,591,00:00,8475168.0,,,9,8476883.0,backhand,,
34206,2017020533,2017-12-21,Tampa,Lightning,Senators,592,5,00:00,goal,592,00:00,,,,14,8467950.0,wrist,,8474564.0
41272,2017020642,2018-01-07,Brooklyn,Islanders,Devils,834,5,00:00,goal,834,00:00,,,,2,8471239.0,wrist,,8475754.0


In [11]:
events_with_no_y_coord = df[df['y_coord'].isna()]
events_with_no_y_coord

Unnamed: 0,game id,date,venue_location,home_team,away_team,event_id,period,timeInPeriod,type,game_id,time,shooting_player,x_coord,y_coord,owner_team,goalie,shot_type,zone_code,scoring_player
17554,2017020273,2017-11-14,Edmonton,Oilers,Golden Knights,701,3,00:57,goal,701,00:57,,,,22,8476509.0,wrist,,8478402.0
24850,2017020387,2017-11-30,Edmonton,Oilers,Maple Leafs,778,3,18:55,goal,778,18:55,,,,10,8476316.0,wrist,,8466139.0
34202,2017020533,2017-12-21,Tampa,Lightning,Senators,587,5,00:00,goal,587,00:00,,,,14,8467950.0,wrist,,8478010.0
34203,2017020533,2017-12-21,Tampa,Lightning,Senators,588,5,00:00,shot-on-goal,588,00:00,8471768.0,,,9,8476883.0,wrist,,
34204,2017020533,2017-12-21,Tampa,Lightning,Senators,1202,5,00:00,shot-on-goal,1202,00:00,8476453.0,,,14,8467950.0,wrist,,
34205,2017020533,2017-12-21,Tampa,Lightning,Senators,591,5,00:00,shot-on-goal,591,00:00,8475168.0,,,9,8476883.0,backhand,,
34206,2017020533,2017-12-21,Tampa,Lightning,Senators,592,5,00:00,goal,592,00:00,,,,14,8467950.0,wrist,,8474564.0
41272,2017020642,2018-01-07,Brooklyn,Islanders,Devils,834,5,00:00,goal,834,00:00,,,,2,8471239.0,wrist,,8475754.0
