In [14]:
import pandas as pd
from tqdm import tqdm

games = pd.read_parquet("../data/games_elo.parquet")
pbp = pd.read_parquet("../data/pbp_reduced.parquet")

# ignore shootout
regular_ot = games[(games["Period"] == 4) & (games["Playoff"] == False)]

In [15]:
valid_events = ["FAC", "BLOCK", "SHOT", "GOAL", "MISS", "HIT", "GIVE", "TAKE"]

events = []

for idx, game in tqdm(regular_ot.iterrows(), total=len(regular_ot)):
    mask = ((pbp["Game_Id"] == game["Game_Id"])
            & (pbp["Season"] == game["Season"])
            & (pbp["Period"] == 4))
    reduced = pbp[mask]

    for idx, play in reduced.iterrows():
        # first event will always be a faceoff
        if play["Event"] in valid_events:
            event = {
                "game": game["Game_Id"],
                "season": game["Season"],
                "away_elo": game["Away_Starting_Elo"],
                "home_elo": game["Home_Starting_Elo"],
                "time_remaining": 300 - play["Seconds_Elapsed"],  # 5 minutes in OT
                "event": play["Event"],
                "team": "home" if play["Ev_Team"] == play["Home_Team"] else "away",
                "event_zone": play["Ev_Zone"],
                "home_zone": play["Home_Zone"],
                "strength": play["Strength"],
                "winner": "home" if game["Home_Score"] > game["Away_Score"] else "away"
            }

            events.append(event)

100%|██████████| 2564/2564 [00:58<00:00, 43.54it/s]


In [16]:
df = pd.DataFrame(events)
df.to_parquet("../data/regular_ot_pbp.parquet")