In [4]:
from ift6758.data import get_data
from ift6758.features import event_types
import pandas as pd

In [2]:
def import_game(game_id, game_list) -> None:
    game_data = get_data.retrieve_game_data(game_id)
    
    game_info = {
        "game id": game_data["id"],
        "date": game_data["gameDate"],
        "venue_location": game_data["venueLocation"]["default"],
        "home_team": game_data["homeTeam"]["name"]["default"],
        "away_team": game_data["awayTeam"]["name"]["default"]
    }
    print(game_info)

    events = game_data["plays"]
    for event in events:
        event_info = {
            "event_id": event["eventId"],
            "period": event["periodDescriptor"]["number"],
            "timeInPeriod": event["timeInPeriod"],
            "type": event["typeDescKey"],
        }
        # print(event_info)
 
        if event_info["type"] == "goal":
            event_details = event["details"]
            # print(event_details)
            if "goalieInNetId" not in event_details:
                event_details["goalieInNetId"] = None
            if "xCoord" not in event_details:
                event_details["xCoord"] = None
            if "yCoord" not in event_details:
                event_details["yCoord"] = None
            goal_event = event_types.GoalEvent(game_id=event_info["event_id"], 
                                                period=event_info["period"], 
                                                time=event_info["timeInPeriod"], 
                                                scoring_player=event_details["scoringPlayerId"],
                                                x_coord=event_details["xCoord"],
                                                y_coord=event_details["yCoord"],
                                                owner_team=event_details["eventOwnerTeamId"],
                                                goalie=event_details["goalieInNetId"],
                                                shot_type=event_details["shotType"])
        
            game_list.append({**game_info, **goal_event.to_dict()}) #unpacking and merging the dictionaries


In [5]:
game_list = []
for season in range(2017, 2018):
    regular_season_game_ids = get_data.regular_season_game_id_generator(str(season))
    for game_id in regular_season_game_ids:
        import_game(game_id, game_list)

using cached data for game id:  2017020001.json
{'game id': 2017020001, 'date': '2017-10-04', 'venue_location': 'Winnipeg', 'home_team': 'Jets', 'away_team': 'Maple Leafs'}
using cached data for game id:  2017020002.json
{'game id': 2017020002, 'date': '2017-10-04', 'venue_location': 'Pittsburgh', 'home_team': 'Penguins', 'away_team': 'Blues'}
using cached data for game id:  2017020003.json
{'game id': 2017020003, 'date': '2017-10-04', 'venue_location': 'Edmonton', 'home_team': 'Oilers', 'away_team': 'Flames'}
using cached data for game id:  2017020004.json
{'game id': 2017020004, 'date': '2017-10-04', 'venue_location': 'San Jose', 'home_team': 'Sharks', 'away_team': 'Flyers'}
using cached data for game id:  2017020005.json
{'game id': 2017020005, 'date': '2017-10-05', 'venue_location': 'Boston', 'home_team': 'Bruins', 'away_team': 'Predators'}
using cached data for game id:  2017020006.json
{'game id': 2017020006, 'date': '2017-10-05', 'venue_location': 'Buffalo', 'home_team': 'Sabres

In [6]:
game_list

[{'game id': 2017020001,
  'date': '2017-10-04',
  'venue_location': 'Winnipeg',
  'home_team': 'Jets',
  'away_team': 'Maple Leafs',
  'game_id': 212,
  'period': 1,
  'time': '15:45',
  'scoring_player': 8475172,
  'x_coord': 84,
  'y_coord': -6,
  'owner_team': 10,
  'goalie': 8473461,
  'shot_type': 'wrist'},
 {'game id': 2017020001,
  'date': '2017-10-04',
  'venue_location': 'Winnipeg',
  'home_team': 'Jets',
  'away_team': 'Maple Leafs',
  'game_id': 219,
  'period': 1,
  'time': '17:40',
  'scoring_player': 8474037,
  'x_coord': 62,
  'y_coord': 5,
  'owner_team': 10,
  'goalie': 8473461,
  'shot_type': 'wrist'},
 {'game id': 2017020001,
  'date': '2017-10-04',
  'venue_location': 'Winnipeg',
  'home_team': 'Jets',
  'away_team': 'Maple Leafs',
  'game_id': 222,
  'period': 1,
  'time': '18:23',
  'scoring_player': 8477939,
  'x_coord': 84,
  'y_coord': -22,
  'owner_team': 10,
  'goalie': 8473461,
  'shot_type': 'wrist'},
 {'game id': 2017020001,
  'date': '2017-10-04',
  'ven

In [7]:
df = pd.DataFrame(game_list)
df

Unnamed: 0,game id,date,venue_location,home_team,away_team,game_id,period,time,scoring_player,x_coord,y_coord,owner_team,goalie,shot_type
0,2017020001,2017-10-04,Winnipeg,Jets,Maple Leafs,212,1,15:45,8475172,84.0,-6.0,10,8473461.0,wrist
1,2017020001,2017-10-04,Winnipeg,Jets,Maple Leafs,219,1,17:40,8474037,62.0,5.0,10,8473461.0,wrist
2,2017020001,2017-10-04,Winnipeg,Jets,Maple Leafs,222,1,18:23,8477939,84.0,-22.0,10,8473461.0,wrist
3,2017020001,2017-10-04,Winnipeg,Jets,Maple Leafs,403,2,08:32,8466139,-82.0,2.0,10,8473461.0,backhand
4,2017020001,2017-10-04,Winnipeg,Jets,Maple Leafs,448,3,00:36,8466139,68.0,12.0,10,8473461.0,wrist
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7686,2017021271,2018-04-07,San Jose,Sharks,Wild,250,2,11:19,8475798,-63.0,1.0,30,8474889.0,wrist
7687,2017021271,2018-04-07,San Jose,Sharks,Wild,402,2,11:47,8475722,-78.0,3.0,30,8474889.0,backhand
7688,2017021271,2018-04-07,San Jose,Sharks,Wild,413,2,15:25,8475722,-72.0,18.0,30,8474889.0,wrist
7689,2017021271,2018-04-07,San Jose,Sharks,Wild,638,3,18:50,8478414,-29.0,10.0,28,8471227.0,deflected


In [11]:
unique_event_types = df['type'].unique()
unique_event_types

array(['period-start', 'faceoff', 'blocked-shot', 'shot-on-goal', 'hit',
       'stoppage', 'missed-shot', 'giveaway', 'penalty', 'goal',
       'period-end', 'takeaway', 'game-end', 'shootout-complete'],
      dtype=object)

In [8]:
events_with_no_goalie = df[df['goalie'].isna()]
events_with_no_goalie # net empty of no goalie ?

Unnamed: 0,game id,date,venue_location,home_team,away_team,game_id,period,time,scoring_player,x_coord,y_coord,owner_team,goalie,shot_type
20,2017020003,2017-10-04,Edmonton,Oilers,Flames,809,3,19:01,8478402,68.0,1.0,22,,wrist
28,2017020004,2017-10-04,San Jose,Sharks,Flyers,851,3,19:24,8474190,63.0,13.0,4,,wrist
33,2017020005,2017-10-05,Boston,Bruins,Predators,648,3,17:07,8473419,-42.0,-19.0,6,,wrist
46,2017020007,2017-10-05,New York,Rangers,Avalanche,678,3,19:57,8476455,-55.0,29.0,21,,wrist
98,2017020014,2017-10-06,Tampa,Lightning,Panthers,657,3,19:46,8476292,80.0,27.0,14,,wrist
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7648,2017021265,2018-04-07,Nashville,Predators,Blue Jackets,633,3,19:20,8476887,-35.0,38.0,18,,wrist
7653,2017021266,2018-04-07,Denver,Avalanche,Blues,804,3,16:37,8476455,46.0,-29.0,21,,wrist
7655,2017021266,2018-04-07,Denver,Avalanche,Blues,811,3,19:51,8476442,-76.0,-10.0,21,,wrist
7658,2017021267,2018-04-07,Glendale,Coyotes,Ducks,820,3,19:56,8476854,74.0,-9.0,24,,wrist


In [9]:
# errors ?
events_with_no_x_coord = df[df['x_coord'].isna()]
events_with_no_x_coord

Unnamed: 0,game id,date,venue_location,home_team,away_team,game_id,period,time,scoring_player,x_coord,y_coord,owner_team,goalie,shot_type
1679,2017020273,2017-11-14,Edmonton,Oilers,Golden Knights,701,3,00:57,8478402,,,22,8476509.0,wrist
2370,2017020387,2017-11-30,Edmonton,Oilers,Maple Leafs,778,3,18:55,8466139,,,10,8476316.0,wrist
3248,2017020533,2017-12-21,Tampa,Lightning,Senators,587,5,00:00,8478010,,,14,8467950.0,wrist
3249,2017020533,2017-12-21,Tampa,Lightning,Senators,592,5,00:00,8474564,,,14,8467950.0,wrist
3865,2017020642,2018-01-07,Brooklyn,Islanders,Devils,834,5,00:00,8475754,,,2,8471239.0,wrist


In [10]:
events_with_no_y_coord = df[df['y_coord'].isna()]
events_with_no_y_coord

Unnamed: 0,game id,date,venue_location,home_team,away_team,game_id,period,time,scoring_player,x_coord,y_coord,owner_team,goalie,shot_type
1679,2017020273,2017-11-14,Edmonton,Oilers,Golden Knights,701,3,00:57,8478402,,,22,8476509.0,wrist
2370,2017020387,2017-11-30,Edmonton,Oilers,Maple Leafs,778,3,18:55,8466139,,,10,8476316.0,wrist
3248,2017020533,2017-12-21,Tampa,Lightning,Senators,587,5,00:00,8478010,,,14,8467950.0,wrist
3249,2017020533,2017-12-21,Tampa,Lightning,Senators,592,5,00:00,8474564,,,14,8467950.0,wrist
3865,2017020642,2018-01-07,Brooklyn,Islanders,Devils,834,5,00:00,8475754,,,2,8471239.0,wrist
