In [1]:
import shots_data_retriever
from shots_data_retriever import ShotsDataRetriever
import importlib
import warnings
import pandas as pd

# Filter dtype warnings. Column 10 can contain str, int, and float types 
# which spams a warning in the output
warnings.filterwarnings("ignore", category=pd.errors.DtypeWarning)
importlib.reload(shots_data_retriever)

shotsDataRetriever = ShotsDataRetriever()

In [26]:
# Average shot rate
def get_league_avg_shot_rate_by_coordinate(year: str):
    df = shotsDataRetriever.get_season_shots(year)

    single_game_time = 1 # in hours, assumption from google doc
    total_games = df['game_id'].nunique()
    total_game_time = single_game_time * total_games

    shot_location = df.groupby(['x_coord', 'y_coord']).size().reset_index(name='shot_count')
    shot_location['shot_rate'] = shot_location['shot_count'] / total_game_time

    shot_location = shot_location.sort_values(by=['x_coord', 'y_coord']).reset_index(drop=True)

    return shot_location

87129
      x_coord  y_coord  shot_count  shot_rate
0         0.0    -41.0           1   0.000738
1         0.0    -39.0           2   0.001476
2         0.0    -36.0           2   0.001476
3         0.0    -35.0           1   0.000738
4         0.0    -34.0           1   0.000738
...       ...      ...         ...        ...
6836     98.0    -11.0           1   0.000738
6837     98.0     11.0           2   0.001476
6838     98.0     23.0           1   0.000738
6839     99.0     -4.0           1   0.000738
6840     99.0     10.0           1   0.000738

[6841 rows x 4 columns]


In [27]:
# Shot rate by team
def get_team_avg_shot_rate_by_coordinate(year: str, team_id: int):
    df = shotsDataRetriever.get_season_shots_for_team(year, team_id)

    single_game_time = 1 # in hours, assumption from google doc
    total_games = df['game_id'].nunique()
    total_game_time = single_game_time * total_games

    shot_location = df.groupby(['x_coord', 'y_coord']).size().reset_index(name='shot_count')
    shot_location['shot_rate'] = shot_location['shot_count'] / total_game_time

    shot_location = shot_location.sort_values(by=['x_coord', 'y_coord']).reset_index(drop=True)

    return shot_location

3150
      x_coord  y_coord  shot_count  shot_rate
0         0.0    -31.0           1   0.009434
1         0.0     21.0           1   0.009434
2         0.0     23.0           1   0.009434
3         1.0     -7.0           1   0.009434
4         1.0      0.0           2   0.018868
...       ...      ...         ...        ...
2058     94.0    -26.0           1   0.009434
2059     94.0    -25.0           1   0.009434
2060     94.0    -15.0           1   0.009434
2061     94.0      9.0           1   0.009434
2062     98.0     11.0           1   0.009434

[2063 rows x 4 columns]


In [35]:
def get_team_excess_shot_rate(year: str, team_id: int):
    team_df = get_team_avg_shot_rate_by_coordinate(year, team_id)
    league_df = get_league_avg_shot_rate_by_coordinate(year)

    result_df = pd.merge(league_df, team_df, on=['x_coord', 'y_coord'], how='left', suffixes=('_league', '_team'))

    result_df.loc[:, 'shot_count_team'] = result_df['shot_count_team'].fillna(0)
    result_df.loc[:, 'shot_rate_team'] = result_df['shot_rate_team'].fillna(0)

    result_df['team_shot_rate'] = result_df['shot_rate_team']
    result_df['league_shot_rate_per_side'] = result_df['shot_rate_league'] / 2 # divide by two to account for two teams playing per game

    result_df['excess_shot_rate'] = result_df['team_shot_rate'].sub(result_df['league_shot_rate_per_side'], fill_value=0)

    return result_df

In [36]:
import numpy as np
from tqdm import tqdm
import plotly.graph_objects as go
from PIL import Image
from scipy.ndimage import gaussian_filter

In [37]:
def plot_shot_heatmap_plotly(shot_rate_map: pd.DataFrame):
    x = shot_rate_map['x_coord']
    y = shot_rate_map['y_coord']
    shot_rate = shot_rate_map['excess_shot_rate']

    # Create a 2D histogram (heatmap) of the shot rates - obtained using ChatGPT
    heatmap, xedges, yedges = np.histogram2d(
        x, 
        y, 
        bins=[np.linspace(0, 100, 50), 
              np.linspace(-42, 42, 50)], 
        weights=shot_rate)
    
    print(np.min(heatmap))
    print(np.max(heatmap))
    heatmap = gaussian_filter(heatmap, sigma=3)
    print(np.min(heatmap))
    print(np.max(heatmap))

    fig = go.Figure()

    fig.add_layout_image(
        dict(
            source=Image.open("../../figures/nhl_rink.png"),
            x=-100,
            y=42.5,
            xref="x",
            yref="y",
            sizex=200,
            sizey=85,
            opacity=1,
            sizing="stretch",
            xanchor="left",
            yanchor="top",
            layer="below"
        )
    )

    custom_colorscale = [[0, 'rgba(0,0,255,0)'], [0.02, 'rgba(0,0,255,0.05)'], [1, 'rgba(255,0,0,1)']]

    fig.add_trace(go.Heatmap(
        z=heatmap.T,
        x=xedges[:-1],
        y=yedges[:-1],
        colorscale=custom_colorscale,
        colorbar=dict(title='Excess Shot Rate'),
        zmin=np.min(heatmap),
        zmax=np.max(heatmap),
        hovertemplate='X: %{x:.2f}<br>Y: %{y:.2f}<br>Excess Shot Rate: %{z:.2f}<extra></extra>',
    ))

    fig.update_layout(
        xaxis=dict(title='X Coordinate (feet)', range=[0, 100]),
        yaxis=dict(title='Y Coordinate (feet)', range=[-42.5, 42.5]),
        title='Shot Distribution on NHL Rink',
        showlegend=False,
        height=425,
        width=580,
        plot_bgcolor='white',
        paper_bgcolor='white'
    )
    
    return fig

In [39]:
def generate_shot_rate_plots(year: str):
    teams = shotsDataRetriever.get_season_shots(year)['team_id'].unique()[:3]
    print(teams)

    for i, team_id in tqdm(enumerate(teams), desc="Generating Shot Rate Plots", total=len(teams)):
        if i == 0:
            fig = plot_shot_heatmap_plotly(get_team_excess_shot_rate(year, team_id))
            continue

        df = get_team_excess_shot_rate(year, team_id)
        heatmap_fig = plot_shot_heatmap_plotly(df)
        heatmap = heatmap_fig.data[0]
        heatmap.visible = False
        fig.add_trace(heatmap)
    
    fig.update_layout(
        title = f'Excess Shot Rate for Team {team_id} - {year}',
        updatemenus = [
            {
                "buttons": [
                    {
                        "label": f"Team: {team_id}",
                        "method": "update",
                        "args": [
                            {"visible": [i == j for j in range(len(teams))]},  # Show selected trace
                            {"title": f'Excess Shot Rate for Team {team_id} - {year}'},  # Update title
                        ],
                    } for i, team_id in enumerate(teams)
                ],
                "direction": "down",
                "showactive": True,
            }
        ]
    )

    fig.show()

generate_shot_rate_plots('2017')

[1, 2, 3]


Generating Shot Rate Plots:   0%|          | 0/3 [00:00<?, ?it/s]

Generating Shot Rate Plots: 100%|██████████| 3/3 [01:26<00:00, 28.76s/it]


: 