In [1]:
import shots_data_retriever
from shots_data_retriever import ShotsDataRetriever
import importlib
import warnings
import pandas as pd

# Filter dtype warnings. Column 10 can contain str, int, and float types 
# which spams a warning in the output
warnings.filterwarnings("ignore", category=pd.errors.DtypeWarning)
importlib.reload(shots_data_retriever)

shotsDataRetriever = ShotsDataRetriever()

In [10]:
# Average shot rate
def get_league_avg_shot_rate_by_coordinate(year: str):
    df = shotsDataRetriever.get_season_shots(year)

    single_game_time = 1 # in hours, assumption from google doc
    total_games = df['game_id'].nunique()
    total_game_time = single_game_time * total_games

    shot_location = df.groupby(['x_coord', 'y_coord']).size().reset_index(name='shot_count')
    shot_location['shot_rate'] = shot_location['shot_count'] / total_game_time

    return shot_location

In [11]:
# Shot rate by team
def get_team_avg_shot_rate_by_coordinate(year: str, team_id: int):
    df = shotsDataRetriever.get_season_shots_for_team(year, team_id)

    single_game_time = 1 # in hours, assumption from google doc
    total_games = df['game_id'].nunique()
    total_game_time = single_game_time * total_games

    shot_location = df.groupby(['x_coord', 'y_coord']).size().reset_index(name='shot_count')
    shot_location['shot_rate'] = shot_location['shot_count'] / total_game_time

    return shot_location

In [30]:
def get_team_excess_shot_rate(year: str, team_id: int):
    team_df = get_team_avg_shot_rate_by_coordinate(year, team_id)
    league_df = get_league_avg_shot_rate_by_coordinate(year)

    result_df = team_df
    result_df['shot_rate'].rename('team_shot_rate')
    result_df['league_shot_rate'] = league_df['shot_rate']
    result_df['excess_shot_rate'] = team_df['shot_rate'] - league_df['shot_rate']
    
    return result_df

df = get_team_excess_shot_rate('2017', 10)

# Validate the values - about half of the values should be negative
print(f"positive: {df[df['excess_shot_rate'] >= 0]['excess_shot_rate'].count()}")
print(f"negative: {df[df['excess_shot_rate'] < 0]['excess_shot_rate'].count()}")

print(df)

positive: 1817
negative: 0
      x_coord  y_coord  shot_count  shot_rate  league_shot_rate  \
0         1.0    -41.0           1   0.011236          0.000738   
1         1.0      6.0           2   0.022472          0.001476   
2         1.0     29.0           1   0.011236          0.001476   
3         1.0     35.0           1   0.011236          0.000738   
4         2.0     23.0           1   0.011236          0.000738   
...       ...      ...         ...        ...               ...   
1812     93.0     10.0           1   0.011236          0.001476   
1813     93.0     13.0           1   0.011236          0.001476   
1814     94.0    -15.0           1   0.011236          0.000738   
1815     94.0     10.0           1   0.011236          0.003690   
1816     95.0    -17.0           1   0.011236          0.001476   

      excess_shot_rate  
0             0.010498  
1             0.020996  
2             0.009760  
3             0.010498  
4             0.010498  
...               

In [5]:
# Difference from league average
full_df = pd.DataFrame()
temp_df = pd.DataFrame()
temp_df['team_id'] = shot_location_group['team_id']
temp_df['x_coord'] = shot_location_group['x_coord']
temp_df['y_coord'] = shot_location_group['y_coord']

temp_df = temp_df.groupby('team_id')
for team_group in temp_df:
    team_id = team_group[1]['team_id'].iloc[0]
    x_coord = team_group[1]['x_coord'].iloc[0]
    y_coord = team_group[1]['y_coord'].iloc[0]
    merge_df = pd.merge(
        shot_location_group,
        shot_location_league[['x_coord', 'y_coord', 'shot_rate']].rename(columns={'shot_rate': 'shot_rate_league'}),
        on=['x_coord', 'y_coord'],
        how='left'
    )
    full_df = pd.concat([full_df, shot_location_group[shot_location_group['team_id'] == team_id]])
    full_df = pd.merge(full_df, merge_df, how='left')
    full_df = full_df.drop('shot_count', axis=1)
    full_df = full_df.drop('time_played', axis=1)
full_df.to_csv('full_df.csv', index=False)

In [None]:
import numpy as np
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from tqdm import tqdm
import plotly.graph_objects as go
import os
from PIL import Image
from scipy.ndimage import gaussian_filter

In [None]:
# Now that we have a shot map, we want to plot it

def plot_shot_map_matplotlib(shot_map):
    rink_image = mpimg.imread("../../figures/nhl_rink.png")

    plt.figure(figsize=(10, 8))
    plt.imshow(rink_image, extent=[-100, 100, -42.5, 42.5], aspect='auto')

    for i in tqdm(range(shot_map.shape[0]), desc="Drawing plot"):
        for j in range(shot_map.shape[1]):
            if shot_map[i, j] > 0:
                plt.scatter(i, j - 42, s=shot_map[i, j] * 30, color='blue', alpha=0.5)  # Adjust y-coordinates

    plt.xlim(0, 100)
    plt.ylim(-42.5, 42.5)

    plt.xlabel('X Coordinate')
    plt.ylabel('Y Coordinate')
    plt.title('Shot Distribution on NHL Rink')

    plt.show()

plot_shot_map_matplotlib(shot_map)

In [None]:
def plot_shot_map_plotly(shot_map):
    rink_image_path = os.path.abspath("../../figures/nhl_rink.png")

    fig = go.Figure()

    fig.add_layout_image(
        dict(
            source=Image.open("../../figures/nhl_rink.png"),
            x=-100,
            y=42.5,
            xref="x",
            yref="y",
            sizex=200,  # Total width of the rink in feet
            sizey=85,   # Total height of the rink in feet
            opacity=1,
            sizing="stretch",
            xanchor="left",
            yanchor="top",
            layer="below"  # To ensure the image is below the scatter points
        )
    )

    for i in tqdm(range(shot_map.shape[0]), desc="Processing shots"):  # Iterate over x-coordinates (0 to 100)
        for j in range(shot_map.shape[1]):  # Iterate over y-coordinates (-42.5 to 42.5)
            if shot_map[i, j] > 0:  # Only plot if there's at least one shot
                fig.add_trace(go.Scatter(
                    x=[i],  # x coordinate
                    y=[j - 42],  # y coordinate adjusted for proper positioning
                    mode='markers',
                    marker=dict(size=shot_map[i, j] * 5, opacity=0.5, color='blue'),  # Size based on the shot count
                    showlegend=False
                )) 

    fig.update_layout(
        xaxis=dict(title='X Coordinate (feet)', range=[0, 100]),  # x-axis from 0 to 100
        yaxis=dict(title='Y Coordinate (feet)', range=[-42.5, 42.5]),  # y-axis from -42.5 to 42.5
        title='Shot Distribution on NHL Rink',
        showlegend=False,
        height=425,  # Set the height of the figure
        width=500,    )
    fig.show()

plot_shot_map_plotly(shot_map)

In [None]:

def plot_shot_heatmap_plotly(shot_map):
    x = np.linspace(0, 100, shot_map.shape[0])  # x coordinates
    y = np.linspace(-42, 42, shot_map.shape[1])  # y coordinates
    
    xedges = np.linspace(0, 100, shot_map.shape[0] + 1)  # Adding 1 to include the last edge
    yedges = np.linspace(-42, 42, shot_map.shape[1] + 1)  # Adding 1 to include the last edge

    # Create a 2D histogram (heatmap) of the shot counts
    heatmap, _, _ = np.histogram2d(
        np.repeat(x, shot_map.shape[1]),  # x positions
        np.tile(y, shot_map.shape[0]),    # y positions
        weights=shot_map.flatten(),        # Use shot counts as weights
        bins=[xedges, yedges]
    )

    heatmap = gaussian_filter(heatmap, sigma=3)  # Adjust sigma for smoothing
    #heatmap = np.where(heatmap < 0.001, np.nan, heatmap)

    fig = go.Figure()

    fig.update_layout(
        plot_bgcolor='white',  # Background color of the plot area
        paper_bgcolor='white'  # Background color of the entire figure
    )

    fig.add_layout_image(
        dict(
            source=Image.open("../../figures/nhl_rink.png"),
            x=-100,
            y=42.5,
            xref="x",
            yref="y",
            sizex=200,  # Total width of the rink in feet
            sizey=85,   # Total height of the rink in feet
            opacity=1,
            sizing="stretch",
            xanchor="left",
            yanchor="top",
            layer="below"  # To ensure the image is below the scatter points
        )
    )

    custom_colorscale = [[0, 'rgba(0,0,255,0)'], [0.001, 'rgba(0,0,255,0.05)'], [1, 'rgba(255,0,0,1)']]

    fig.add_trace(go.Heatmap(
        z=heatmap.T,  # Transpose heatmap to match x and y coordinates
        x=xedges[:-1],  # x-axis edges
        y=yedges[:-1],  # y-axis edges
        colorscale=custom_colorscale,  # Choose a color scale
        colorbar=dict(title='Shot Density'),  # Add color bar title
        zmin=0,  # Minimum value for color scale
        zmax=np.max(heatmap),  # Maximum value for color scale
        # opacity=0.5,
        hovertemplate='X: %{x:.2f}<br>Y: %{y:.2f}<br>Shot Density: %{z:.2f}<extra></extra>',
    ))

    fig.update_layout(
        xaxis=dict(title='X Coordinate (feet)', range=[0, 100]),  # x-axis from 0 to 100
        yaxis=dict(title='Y Coordinate (feet)', range=[-42.5, 42.5]),  # y-axis from -42.5 to 42.5
        title='Shot Distribution on NHL Rink',
        showlegend=False,
        height=425,  # Set the height of the figure
        width=500,    )
    fig.show()

plot_shot_heatmap_plotly(shot_map)