---
icon: material/trending-up
description: "Learn how to analyze goals saved above expected with chickenstats"
---

# **Tutorial for goals saved above expected**

---

## **Intro**

Use the `chickenstats` library to scrape play-by-play data and analyze goalies' goals saved above expected.

Parts of this tutorial are optional and will be clearly marked as such. For help, or any questions,
please don't hesitate to reach out to [chicken@chickenandstats.com](mailto:chicken@chickenandstats.com) or
[@chickenandstats.com](https://bsky.app/profile/chickenandstats.com) on Blue Sky.

---

## **Housekeeping**

### Import dependencies

Import the dependencies we'll need for the guide

In [None]:
import pandas as pd
import numpy as np

import chickenstats

from chickenstats.chicken_nhl import Season, Scraper
from chickenstats.chicken_nhl.helpers import norm_coords
from chickenstats.chicken_nhl.info import NHL_COLORS, team_codes
import chickenstats.utilities
from chickenstats.api import ChickenStats, ChickenUser, ChickenToken

from hockey_rink import NHLRink
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.lines import Line2D
import matplotlib.patches as patches
import matplotlib.patheffects as mpe
import matplotlib.ticker as ticker
import seaborn as sns

from dotenv import load_dotenv
from pathlib import Path

import datetime as dt

import math

from typing import Optional

### Pandas options

Set different pandas options. This cell is optional

In [None]:
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 100)

### Chickenstats matplotlib style

chickenstats.utilities includes a custom style package - this activates it. This cell is also optional

In [None]:
plt.style.use("chickenstats")

---

## **Scrape data**

### Schedule

Scrape the schedule using the `Season` object

In [None]:
season = Season(2024)

In [None]:
schedule = season.schedule()

### Standings and team names

Scrape the standings and create team name dictionaries to use later

In [None]:
standings = season.standings.copy(deep=True)

In [None]:
team_names = standings.sort_values(by="team_name").team_name.str.upper().tolist()
team_codes = standings.sort_values(by="team_name").team.str.upper().tolist()
team_names_dict = dict(zip(team_codes, team_names))

### Game IDs

Create a list of game IDs to crape

In [None]:
conds = schedule.game_state == "OFF"

game_ids = schedule.loc[conds].game_id.unique().tolist()

### Play-by-play

In [None]:
scraper = Scraper(game_ids)

In [None]:
pbp = scraper.play_by_play

### Stats

In [None]:
scraper.prep_stats(level="season", teammates=True)
stats = scraper.stats

In [None]:
scraper.prep_team_stats(level="game")
team_stats = scraper.team_stats.copy(deep=True)

In [None]:
home_map = dict(zip(schedule.game_id.astype(str), schedule.home_team))

team_stats["is_home"] = team_stats.game_id.map(home_map)

team_stats.is_home = np.where(team_stats.is_home == team_stats.team, 1, 0)

In [None]:
pp_list = ["5v4", "5v3", "4v3"]
sh_list = ["4v5", "3v5", "3v4"]

conditions = [
    team_stats.strength_state == "5v5",
    team_stats.strength_state.isin(pp_list),
    team_stats.strength_state.isin(sh_list),
]

values = ["5v5", "powerplay", "shorthanded"]


team_stats["strength_state2"] = np.select(conditions, values, default=None)

In [None]:
group_columns = ["season", "session", "team", "is_home", "strength_state2"]

stat_cols = {
    x: "sum"
    for x in team_stats.columns
    if x not in group_columns
    and "p60" not in x
    and "percent" not in x
    and team_stats[x].dtype != "object"
}

team_stats_group = team_stats.groupby(group_columns, as_index=False).agg(stat_cols)

In [None]:
group_columns = [
    "season",
    "session",
    # "team",
    "is_home",
    "strength_state2",
]

stat_cols = {
    x: "sum"
    for x in team_stats.columns
    if x not in group_columns
    and "p60" not in x
    and "percent" not in x
    and team_stats[x].dtype != "object"
}

group_stats = team_stats.groupby(group_columns, as_index=False).agg(stat_cols)

In [None]:
group_stats["gf_p60"] = group_stats.gf / group_stats.toi * 60
group_stats["ga_p60"] = group_stats.ga / group_stats.toi * 60

group_stats["xgf_p60"] = group_stats.xgf / group_stats.toi * 60
group_stats["xga_p60"] = group_stats.xga / group_stats.toi * 60

In [None]:
conditions = [
    np.logical_and(
        team_stats_group.strength_state2 == "5v5", team_stats_group.is_home == 1
    ),
    np.logical_and(
        team_stats_group.strength_state2 == "5v5", team_stats_group.is_home == 0
    ),
    np.logical_and(
        team_stats_group.strength_state2 == "powerplay", team_stats_group.is_home == 1
    ),
    np.logical_and(
        team_stats_group.strength_state2 == "powerplay", team_stats_group.is_home == 0
    ),
    np.logical_and(
        team_stats_group.strength_state2 == "shorthanded", team_stats_group.is_home == 1
    ),
    np.logical_and(
        team_stats_group.strength_state2 == "shorthanded", team_stats_group.is_home == 0
    ),
]

values = [
    (
        group_stats.loc[
            np.logical_and(
                group_stats.strength_state2 == "5v5", group_stats.is_home == 1
            )
        ].xgf
        / group_stats.loc[
            np.logical_and(
                group_stats.strength_state2 == "5v5", group_stats.is_home == 1
            )
        ].toi
        * 60
    ),
    (
        group_stats.loc[
            np.logical_and(
                group_stats.strength_state2 == "5v5", group_stats.is_home == 0
            )
        ].xgf
        / group_stats.loc[
            np.logical_and(
                group_stats.strength_state2 == "5v5", group_stats.is_home == 0
            )
        ].toi
        * 60
    ),
    (
        group_stats.loc[
            np.logical_and(
                group_stats.strength_state2 == "powerplay", group_stats.is_home == 1
            )
        ].xgf
        / group_stats.loc[
            np.logical_and(
                group_stats.strength_state2 == "powerplay", group_stats.is_home == 1
            )
        ].toi
        * 60
    ),
    (
        group_stats.loc[
            np.logical_and(
                group_stats.strength_state2 == "powerplay", group_stats.is_home == 0
            )
        ].xgf
        / group_stats.loc[
            np.logical_and(
                group_stats.strength_state2 == "powerplay", group_stats.is_home == 0
            )
        ].toi
        * 60
    ),
    (
        group_stats.loc[
            np.logical_and(
                group_stats.strength_state2 == "shorthanded", group_stats.is_home == 1
            )
        ].xgf
        / group_stats.loc[
            np.logical_and(
                group_stats.strength_state2 == "shorthanded", group_stats.is_home == 1
            )
        ].toi
        * 60
    ),
    (
        group_stats.loc[
            np.logical_and(
                group_stats.strength_state2 == "shorthanded", group_stats.is_home == 0
            )
        ].xgf
        / group_stats.loc[
            np.logical_and(
                group_stats.strength_state2 == "shorthanded", group_stats.is_home == 0
            )
        ].toi
        * 60
    ),
]

team_stats_group["mean_nhl_xgf_p60"] = np.select(conditions, values, default=np.nan)

In [None]:
conditions = [
    np.logical_and(
        team_stats_group.strength_state2 == "5v5", team_stats_group.is_home == 1
    ),
    np.logical_and(
        team_stats_group.strength_state2 == "5v5", team_stats_group.is_home == 0
    ),
    np.logical_and(
        team_stats_group.strength_state2 == "powerplay", team_stats_group.is_home == 1
    ),
    np.logical_and(
        team_stats_group.strength_state2 == "powerplay", team_stats_group.is_home == 0
    ),
    np.logical_and(
        team_stats_group.strength_state2 == "shorthanded", team_stats_group.is_home == 1
    ),
    np.logical_and(
        team_stats_group.strength_state2 == "shorthanded", team_stats_group.is_home == 0
    ),
]

values = [
    (
        group_stats.loc[
            np.logical_and(
                group_stats.strength_state2 == "5v5", group_stats.is_home == 1
            )
        ].xga
        / group_stats.loc[
            np.logical_and(
                group_stats.strength_state2 == "5v5", group_stats.is_home == 1
            )
        ].toi
        * 60
    ),
    (
        group_stats.loc[
            np.logical_and(
                group_stats.strength_state2 == "5v5", group_stats.is_home == 0
            )
        ].xga
        / group_stats.loc[
            np.logical_and(
                group_stats.strength_state2 == "5v5", group_stats.is_home == 0
            )
        ].toi
        * 60
    ),
    (
        group_stats.loc[
            np.logical_and(
                group_stats.strength_state2 == "powerplay", group_stats.is_home == 1
            )
        ].xga
        / group_stats.loc[
            np.logical_and(
                group_stats.strength_state2 == "powerplay", group_stats.is_home == 1
            )
        ].toi
        * 60
    ),
    (
        group_stats.loc[
            np.logical_and(
                group_stats.strength_state2 == "powerplay", group_stats.is_home == 0
            )
        ].xga
        / group_stats.loc[
            np.logical_and(
                group_stats.strength_state2 == "powerplay", group_stats.is_home == 0
            )
        ].toi
        * 60
    ),
    (
        group_stats.loc[
            np.logical_and(
                group_stats.strength_state2 == "shorthanded", group_stats.is_home == 1
            )
        ].xga
        / group_stats.loc[
            np.logical_and(
                group_stats.strength_state2 == "shorthanded", group_stats.is_home == 1
            )
        ].toi
        * 60
    ),
    (
        group_stats.loc[
            np.logical_and(
                group_stats.strength_state2 == "shorthanded", group_stats.is_home == 0
            )
        ].xga
        / group_stats.loc[
            np.logical_and(
                group_stats.strength_state2 == "shorthanded", group_stats.is_home == 0
            )
        ].toi
        * 60
    ),
]

team_stats_group["mean_nhl_xga_p60"] = np.select(conditions, values, default=np.nan)

In [None]:
group_list = ["season", "session", "strength_state2", "team"]

team_stats_group["team_xgf_p60"] = team_stats_group.xgf / team_stats_group.toi * 60

team_stats_group["team_xga_p60"] = team_stats_group.xga / team_stats_group.toi * 60

In [None]:
team_stats_group["team_off_strength"] = (
    team_stats_group.team_xgf_p60 / team_stats_group.mean_nhl_xgf_p60
)
team_stats_group["team_def_strength"] = (
    team_stats_group.team_xga_p60 / team_stats_group.mean_nhl_xga_p60
)

In [None]:
team_stats_group.head(10)

In [None]:
todays_date = dt.datetime.today().strftime("%Y-%m-%d")

In [None]:
todays_games = schedule.loc[schedule.game_date == todays_date].reset_index(drop=True)

In [None]:
todays_games["mean_nhl_5v5_home_xgf_p60"] = (
    group_stats.loc[
        np.logical_and(group_stats.strength_state2 == "5v5", group_stats.is_home == 1)
    ].xgf
    / group_stats.loc[
        np.logical_and(group_stats.strength_state2 == "5v5", group_stats.is_home == 1)
    ].toi
    * 60
).iloc[0]


todays_games["mean_nhl_5v5_away_xgf_p60"] = (
    group_stats.loc[
        np.logical_and(group_stats.strength_state2 == "5v5", group_stats.is_home == 0)
    ].xgf
    / group_stats.loc[
        np.logical_and(group_stats.strength_state2 == "5v5", group_stats.is_home == 0)
    ].toi
    * 60
).iloc[0]

todays_games["mean_nhl_pp_home_xgf_p60"] = (
    group_stats.loc[
        np.logical_and(
            group_stats.strength_state2 == "powerplay", group_stats.is_home == 1
        )
    ].xgf
    / group_stats.loc[
        np.logical_and(
            group_stats.strength_state2 == "powerplay", group_stats.is_home == 1
        )
    ].toi
    * 60
).iloc[0]


todays_games["mean_nhl_pp_away_xgf_p60"] = (
    group_stats.loc[
        np.logical_and(
            group_stats.strength_state2 == "powerplay", group_stats.is_home == 0
        )
    ].xgf
    / group_stats.loc[
        np.logical_and(
            group_stats.strength_state2 == "powerplay", group_stats.is_home == 0
        )
    ].toi
    * 60
).iloc[0]


todays_games["mean_nhl_sh_home_xgf_p60"] = (
    group_stats.loc[
        np.logical_and(
            group_stats.strength_state2 == "shorthanded", group_stats.is_home == 1
        )
    ].xgf
    / group_stats.loc[
        np.logical_and(
            group_stats.strength_state2 == "shorthanded", group_stats.is_home == 1
        )
    ].toi
    * 60
).iloc[0]

todays_games["mean_nhl_sh_away_xgf_p60"] = (
    group_stats.loc[
        np.logical_and(
            group_stats.strength_state2 == "shorthanded", group_stats.is_home == 0
        )
    ].xgf
    / group_stats.loc[
        np.logical_and(
            group_stats.strength_state2 == "shorthanded", group_stats.is_home == 0
        )
    ].toi
    * 60
).iloc[0]

In [None]:
todays_games["mean_nhl_5v5_home_xga_p60"] = (
    group_stats.loc[
        np.logical_and(group_stats.strength_state2 == "5v5", group_stats.is_home == 1)
    ].xga
    / group_stats.loc[
        np.logical_and(group_stats.strength_state2 == "5v5", group_stats.is_home == 1)
    ].toi
    * 60
).iloc[0]


todays_games["mean_nhl_5v5_away_xga_p60"] = (
    group_stats.loc[
        np.logical_and(group_stats.strength_state2 == "5v5", group_stats.is_home == 0)
    ].xga
    / group_stats.loc[
        np.logical_and(group_stats.strength_state2 == "5v5", group_stats.is_home == 0)
    ].toi
    * 60
).iloc[0]

todays_games["mean_nhl_pp_home_xga_p60"] = (
    group_stats.loc[
        np.logical_and(
            group_stats.strength_state2 == "powerplay", group_stats.is_home == 1
        )
    ].xga
    / group_stats.loc[
        np.logical_and(
            group_stats.strength_state2 == "powerplay", group_stats.is_home == 1
        )
    ].toi
    * 60
).iloc[0]


todays_games["mean_nhl_pp_away_xga_p60"] = (
    group_stats.loc[
        np.logical_and(
            group_stats.strength_state2 == "powerplay", group_stats.is_home == 0
        )
    ].xga
    / group_stats.loc[
        np.logical_and(
            group_stats.strength_state2 == "powerplay", group_stats.is_home == 0
        )
    ].toi
    * 60
).iloc[0]


todays_games["mean_nhl_sh_home_xga_p60"] = (
    group_stats.loc[
        np.logical_and(
            group_stats.strength_state2 == "shorthanded", group_stats.is_home == 1
        )
    ].xga
    / group_stats.loc[
        np.logical_and(
            group_stats.strength_state2 == "shorthanded", group_stats.is_home == 1
        )
    ].toi
    * 60
).iloc[0]

todays_games["mean_nhl_sh_away_xga_p60"] = (
    group_stats.loc[
        np.logical_and(
            group_stats.strength_state2 == "shorthanded", group_stats.is_home == 0
        )
    ].xga
    / group_stats.loc[
        np.logical_and(
            group_stats.strength_state2 == "shorthanded", group_stats.is_home == 0
        )
    ].toi
    * 60
).iloc[0]

In [None]:
todays_games["home_5v5_off_strength"] = np.nan
todays_games["home_5v5_def_strength"] = np.nan
todays_games["home_pp_off_strength"] = np.nan
todays_games["home_sh_def_strength"] = np.nan

todays_games["away_5v5_off_strength"] = np.nan
todays_games["away_5v5_def_strength"] = np.nan
todays_games["away_pp_off_strength"] = np.nan
todays_games["away_sh_def_strength"] = np.nan

for team in todays_games.home_team.unique():
    todays_games.home_5v5_off_strength = np.where(
        todays_games.home_team == team,
        team_stats_group.loc[
            np.logical_and.reduce(
                [
                    team_stats_group.is_home == 1,
                    team_stats_group.team == team,
                    team_stats_group.strength_state2 == "5v5",
                ]
            )
        ].team_off_strength,
        todays_games.home_5v5_off_strength,
    )

    todays_games.home_5v5_def_strength = np.where(
        todays_games.home_team == team,
        team_stats_group.loc[
            np.logical_and.reduce(
                [
                    team_stats_group.is_home == 1,
                    team_stats_group.team == team,
                    team_stats_group.strength_state2 == "5v5",
                ]
            )
        ].team_def_strength,
        todays_games.home_5v5_def_strength,
    )

    todays_games.home_pp_off_strength = np.where(
        todays_games.home_team == team,
        team_stats_group.loc[
            np.logical_and.reduce(
                [
                    team_stats_group.is_home == 1,
                    team_stats_group.team == team,
                    team_stats_group.strength_state2 == "powerplay",
                ]
            )
        ].team_off_strength,
        todays_games.home_pp_off_strength,
    )

    todays_games.home_sh_def_strength = np.where(
        todays_games.home_team == team,
        team_stats_group.loc[
            np.logical_and.reduce(
                [
                    team_stats_group.is_home == 1,
                    team_stats_group.team == team,
                    team_stats_group.strength_state2 == "shorthanded",
                ]
            )
        ].team_def_strength,
        todays_games.home_sh_def_strength,
    )

for team in todays_games.away_team.unique():
    todays_games.away_5v5_off_strength = np.where(
        todays_games.away_team == team,
        team_stats_group.loc[
            np.logical_and.reduce(
                [
                    team_stats_group.is_home == 0,
                    team_stats_group.team == team,
                    team_stats_group.strength_state2 == "5v5",
                ]
            )
        ].team_off_strength,
        todays_games.away_5v5_off_strength,
    )

    todays_games.away_5v5_def_strength = np.where(
        todays_games.away_team == team,
        team_stats_group.loc[
            np.logical_and.reduce(
                [
                    team_stats_group.is_home == 0,
                    team_stats_group.team == team,
                    team_stats_group.strength_state2 == "5v5",
                ]
            )
        ].team_def_strength,
        todays_games.away_5v5_def_strength,
    )

    todays_games.away_pp_off_strength = np.where(
        todays_games.away_team == team,
        team_stats_group.loc[
            np.logical_and.reduce(
                [
                    team_stats_group.is_home == 0,
                    team_stats_group.team == team,
                    team_stats_group.strength_state2 == "powerplay",
                ]
            )
        ].team_off_strength,
        todays_games.away_pp_off_strength,
    )

    todays_games.away_sh_def_strength = np.where(
        todays_games.away_team == team,
        team_stats_group.loc[
            np.logical_and.reduce(
                [
                    team_stats_group.is_home == 0,
                    team_stats_group.team == team,
                    team_stats_group.strength_state2 == "shorthanded",
                ]
            )
        ].team_def_strength,
        todays_games.away_sh_def_strength,
    )

In [None]:
todays_games["pred_home_5v5_xgf_p60"] = (
    todays_games.home_5v5_off_strength
    * todays_games.away_5v5_def_strength
    * todays_games.mean_nhl_5v5_home_xgf_p60
)
todays_games["pred_home_5v5_xga_p60"] = (
    todays_games.home_5v5_def_strength
    * todays_games.away_5v5_off_strength
    * todays_games.mean_nhl_5v5_home_xga_p60
)

todays_games["pred_home_pp_xgf_p60"] = (
    todays_games.home_pp_off_strength
    * todays_games.away_sh_def_strength
    * todays_games.mean_nhl_pp_home_xgf_p60
)
todays_games["pred_home_sh_xga_p60"] = (
    todays_games.home_sh_def_strength
    * todays_games.away_pp_off_strength
    * todays_games.mean_nhl_sh_home_xga_p60
)

todays_games["pred_away_5v5_xgf_p60"] = (
    todays_games.home_5v5_def_strength
    * todays_games.away_5v5_off_strength
    * todays_games.mean_nhl_5v5_away_xgf_p60
)
todays_games["pred_away_5v5_xga_p60"] = (
    todays_games.home_5v5_off_strength
    * todays_games.away_5v5_def_strength
    * todays_games.mean_nhl_5v5_away_xga_p60
)

todays_games["pred_away_pp_xgf_p60"] = (
    todays_games.away_pp_off_strength
    * todays_games.home_sh_def_strength
    * todays_games.mean_nhl_pp_away_xgf_p60
)
todays_games["pred_away_sh_xga_p60"] = (
    todays_games.away_sh_def_strength
    * todays_games.home_pp_off_strength
    * todays_games.mean_nhl_sh_away_xga_p60
)

In [None]:
todays_games

In [None]:
team_stats_group.loc[team_stats_group.team.isin(["COL", "CHI"])]

In [None]:
(
    group_stats.loc[
        np.logical_and(group_stats.strength_state2 == "5v5", group_stats.is_home == 1)
    ].xgf
    / group_stats.loc[
        np.logical_and(group_stats.strength_state2 == "5v5", group_stats.is_home == 1)
    ].toi
    * 60
)