In [1]:
import json
import os

import pandas as pd
from tqdm import tqdm
import math
import pickle

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [2]:
import os

game_type_map = {"regular_season": "02",
                 "playoffs": "03"}


class Directory:
    DATA_DIR = "../data/" # Modify the path accordingly


class APIList():
    GET_ALL_MATCHES_FOR_A_GIVEN_SEASON = "https://statsapi.web.nhl.com/api/v1/schedule?season="
    GET_ALL_DATA_FOR_A_GIVEN_MATCH = "https://statsapi.web.nhl.com/api/v1/game/{}/feed/live/"


class CustomRegex():
    REGULAR_GAME_ID = r"\d{0,4}02\d{0,4}"  # 02 for regular season
    PLAYOFFS_ID = r"\d{0,4}03\d{0,4}"  # 03 for playoffs

TYPES_OF_SHOTS = ["Goal", "Shot"]

In [3]:
final_df = pd.read_pickle("all_season.pkl")

In [4]:
final_df.head()

Unnamed: 0,game_id,event_code,player_info,shooter,goalie,event,event_type_id,event_description,home_team,home_team_abv,away_team,away_team_abv,about_event_id,about_period,about_period_type,game_time,about_time_remaining,about_date_time,about_goal_away,about_goal_home,action_team_name,event_secondary_type,coordinates,home_team_side,away_team_side,event_strength_name,event_strength_code,event_game_winning_goal,event_empty_net
0,2016020001,OTT8,(Mitchell Marner)_(Shooter)|(Craig Anderson)_(Goalie),Mitchell Marner,Craig Anderson,Shot,SHOT,Mitchell Marner Wrist Shot saved by Craig Anderson,Ottawa Senators,OTT,Toronto Maple Leafs,TOR,8,1,REGULAR,01:11,18:49,2016-10-12T23:19:59Z,0,0,Toronto Maple Leafs,Wrist Shot,"(-77.0, 5.0)",left,right,,,,
1,2016020001,OTT11,(Chris Kelly)_(Shooter)|(Frederik Andersen)_(Goalie),Chris Kelly,Frederik Andersen,Shot,SHOT,Chris Kelly Wrist Shot saved by Frederik Andersen,Ottawa Senators,OTT,Toronto Maple Leafs,TOR,11,1,REGULAR,02:53,17:07,2016-10-12T23:21:41Z,0,0,Ottawa Senators,Wrist Shot,"(86.0, 13.0)",left,right,,,,
2,2016020001,OTT15,(Cody Ceci)_(Shooter)|(Frederik Andersen)_(Goalie),Cody Ceci,Frederik Andersen,Shot,SHOT,Cody Ceci Wrist Shot saved by Frederik Andersen,Ottawa Senators,OTT,Toronto Maple Leafs,TOR,15,1,REGULAR,04:01,15:59,2016-10-12T23:23:17Z,0,0,Ottawa Senators,Wrist Shot,"(23.0, -38.0)",left,right,,,,
3,2016020001,OTT16,(Erik Karlsson)_(Shooter)|(Frederik Andersen)_(Goalie),Erik Karlsson,Frederik Andersen,Shot,SHOT,Erik Karlsson Slap Shot saved by Frederik Andersen,Ottawa Senators,OTT,Toronto Maple Leafs,TOR,16,1,REGULAR,04:46,15:14,2016-10-12T23:24:02Z,0,0,Ottawa Senators,Slap Shot,"(33.0, -15.0)",left,right,,,,
4,2016020001,OTT24,(Martin Marincin)_(Shooter)|(Craig Anderson)_(Goalie),Martin Marincin,Craig Anderson,Shot,SHOT,Martin Marincin Wrist Shot saved by Craig Anderson,Ottawa Senators,OTT,Toronto Maple Leafs,TOR,24,1,REGULAR,06:46,13:14,2016-10-12T23:27:30Z,0,0,Toronto Maple Leafs,Wrist Shot,"(-34.0, 28.0)",left,right,,,,


In [5]:
final_df.shape

(387829, 29)

In [6]:
def get_offense_corr(row):
    #print(row)
    team_side = row.loc["side"]
    corr = row.loc['coordinates']
    if corr is not None:
        if team_side == 'right':
            return (corr[1],-corr[0])
        elif team_side == 'left':
            return (-corr[1],corr[0])
    else:
        print(corr)
        return None

def transform_coordinates(df):
    new_df = df[['game_id','action_team_name','home_team','away_team','home_team_side','away_team_side',
                 'coordinates']]
    df_home = new_df[new_df["action_team_name"] == new_df["home_team"]][["action_team_name",'game_id',
                                                                         "home_team_side",'coordinates']]
    df_home = df_home.rename(columns={"home_team_side":"side"})
    
    df_away = new_df[new_df["action_team_name"] == new_df["away_team"]][["action_team_name",'game_id', 
                                                                         "away_team_side",'coordinates']]
    df_away = df_away.rename(columns={"away_team_side":"side"})
    
    df = pd.concat([df_home,df_away],axis=0)
    
    final_df = df[df["side"] != "Side Not Available"]
    final_df = final_df[final_df["side"] != 'NA-Shootout']
    
    final_df["new_corr"] = final_df.apply(get_offense_corr, axis=1)
    final_df = final_df[final_df["new_corr"].notna()]
    
    return final_df


def get_shots_per_hr(df):
    return df.shape[0] / df["game_id"].unique().size


def smooth_coordinates(df):
    df["smooth_coordinates"] = df["new_corr"].apply(lambda x: (round(x[0], -1), round(x[1], -1)) if x is not None else None)
    return df

def get_count(final_df):
    final_df["year"] = final_df["game_id"].apply(lambda x : int(x[:4]))
    count_df = final_df.groupby(["year", "action_team_name", "smooth_coordinates"])["coordinates"].agg('count').reset_index()
    return count_df

def get_default_dict_for_shots_frequency():
    empty_dict = {}
    for i in range(0, 90, 10):
        for j in range(-40, 40, 10):
            empty_dict[(i, j)] = 0
    return empty_dict

def get_default_dict_for_teams(df):
    team_list = df["action_team_name"].unique().tolist()
    empty_dict = {}
    for i in team_list:
        empty_dict[i] = get_default_dict_for_shots_frequency()
    return empty_dict


def get_all_season_shot_perhr_map(df):
    league_shot_per_hr_map = get_default_dict_for_shots_frequency()
    no_of_games = len(df["game_id"].unique().tolist()) #Equivalent to no of hrs
    df_all_season_map = df.groupby(["smooth_coordinates"])["coordinates"].agg('count').reset_index()
    
    for index, row in df_all_season_map.iterrows():
        league_shot_per_hr_map[row["smooth_coordinates"]] = row["coordinates"] / no_of_games
    
    return league_shot_per_hr_map


def get_no_of_games_per_season(team_name, season, count_df):
    return count_df[(count_df["action_team_name"] == team_name) & (count_df["year"] == season)].shape[0]


In [7]:
league_shot_perhr = get_shots_per_hr(final_df)


final_df = transform_coordinates(final_df)
final_df = smooth_coordinates(final_df)
all_season_shot_perhr_map = get_all_season_shot_perhr_map(df=final_df)
count_df = get_count(final_df)

None
None
None
None
None
None
None
None
None
None


In [8]:
major_dict = {}
for k, v in count_df.iterrows():
    try:
        no_of_games_in_season = get_no_of_games_per_season(team_name=v["action_team_name"], season=v["year"], 
                                                           count_df=count_df)
        if v["year"] not in major_dict:
            major_dict[v["year"]] = get_default_dict_for_teams(count_df)
            major_dict[v["year"]][v["action_team_name"]][v["smooth_coordinates"]] = all_season_shot_perhr_map[v["smooth_coordinates"]] - (v["coordinates"] / no_of_games_in_season)
        else:
            major_dict[v["year"]][v["action_team_name"]][v["smooth_coordinates"]] = all_season_shot_perhr_map[v["smooth_coordinates"]] - (v["coordinates"] / no_of_games_in_season)
    except Exception as e:
        print(e)
        print(v)
        print(all_season_shot_perhr_map[(0, -40)])
        break

In [9]:
major_dict

{2016: {'Anaheim Ducks': {(0, -40): 0.00598994576079314,
   (0, -30): 0,
   (0, -20): 0,
   (0, -10): 0.011850055873093679,
   (0, 0): 0.06876912887574,
   (0, 10): 0.0702658859021827,
   (0, 20): 0.047448364800242754,
   (0, 30): 0.4082908085113832,
   (10, -40): 0,
   (10, -30): 0.003035820050708845,
   (10, -20): 0,
   (10, -10): 0,
   (10, 0): 0.015176182598521916,
   (10, 10): 0.01621632661397382,
   (10, 20): 0.03276526590049046,
   (10, 30): 0.22011081253774717,
   (20, -40): 0,
   (20, -30): 0,
   (20, -20): 0,
   (20, -10): -0.0007469196857101991,
   (20, 0): 0.026194706790258537,
   (20, 10): 0.04216011507231408,
   (20, 20): 0.013933261559019791,
   (20, 30): 0.37128910751850525,
   (30, -40): 0,
   (30, -30): 0.014843569925979093,
   (30, -20): 0,
   (30, -10): 0,
   (30, 0): -0.02240321408877257,
   (30, 10): 0.06032981172372141,
   (30, 20): 0.043203176742788285,
   (30, 30): 0.36917234879982264,
   (40, -40): 0.011850055873093679,
   (40, -30): 0,
   (40, -20): 0,
   (40

In [10]:
with open('major_dict.p', 'wb') as fp:
    pickle.dump(major_dict, fp, protocol=pickle.HIGHEST_PROTOCOL)

In [11]:
with open('major_dict.p', 'rb') as rp:
    dict_master = pickle.load(rp)