In [1]:
#Required libraries
import torch
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from collections import defaultdict
from hockey_rink import NHLRink

df = pd.read_csv('Linhac_df_keyed_20_games.csv')
df.head(10)

  from .autonotebook import tqdm as notebook_tqdm


Unnamed: 0,gameid,opposingteamgoalieoniceid,opposingteamid,playerid,teamgoalieoniceid,teamid,teaminpossession,currentpossession,xg,compiledgametime,...,manpowersituation,opposingteamskatersonicecount,outcome,period,playerprimaryposition,scoredifferential,teamskatersonicecount,type,xadjcoord,yadjcoord
0,66445,506563.0,916,358235,940804.0,742,,,,0.0,...,evenStrength,5,failed,1,F,0,5,none,0.305008,-0.252941
1,66445,940804.0,742,586302,506563.0,916,,,,0.0,...,evenStrength,5,successful,1,F,0,5,recoveredwithentry,-0.305008,0.252941
2,66445,506563.0,916,358235,940804.0,742,,,,0.033333,...,evenStrength,5,failed,1,F,0,5,contested,-0.197929,0.752941
3,66445,940804.0,742,689086,506563.0,916,916.0,0.0,,0.1,...,evenStrength,5,successful,1,D,0,5,faceoff,-7.849129,-4.77647
4,66445,940804.0,742,689086,506563.0,916,916.0,0.0,,0.166667,...,evenStrength,5,successful,1,D,0,5,south,-6.843246,-3.267647
5,66445,940804.0,742,591556,506563.0,916,916.0,0.0,,1.0,...,evenStrength,5,successful,1,D,0,5,regular,-23.943245,7.294117
6,66445,940804.0,742,591556,506563.0,916,916.0,0.0,,2.6,...,evenStrength,5,successful,1,D,0,5,south,-21.428535,7.797058
7,66445,940804.0,742,689086,506563.0,916,916.0,0.0,,3.3,...,evenStrength,5,successful,1,D,0,5,regular,-29.475601,-18.355881
8,66445,940804.0,742,689086,506563.0,916,916.0,0.0,,3.633333,...,evenStrength,5,successful,1,D,0,5,none,-24.949127,-24.894119
9,66445,940804.0,742,689086,506563.0,916,,,,3.633333,...,evenStrength,5,successful,1,D,0,5,carrywithplay,-24.949127,-24.894119


In [2]:
#Create a Dataframe that quantifies the frequency that one event leads to another
unique_events = df['eventname'].unique().tolist()
transitions_df = pd.DataFrame(index=unique_events, columns=unique_events)
transitions_df = transitions_df.fillna(0)

unique_games = df['gameid'].unique().tolist()

event_counts = {}

for game in unique_games:
    transition = df[df['gameid'] == game]
    last_event = None
    last_timestamp = None
    for event, timestamp in zip(transition['eventname'], transition['compiledgametime']):
        if last_event == None:
            last_event = event
            last_timestamp = timestamp
            if event in event_counts:
                event_counts[event] += 1
            if event not in event_counts:
                event_counts[event] = 1
        else:
            current_event = event
            current_timestamp = timestamp
            if current_timestamp == last_timestamp and last_event == 'faceoff' \
            and current_event == 'faceoff':
                last_event = event
                last_timestamp = timestamp
                continue
            transitions_df[current_event][last_event] += 1
            last_event = event
            last_timestamp = timestamp
            if event in event_counts:
                event_counts[event] += 1
            if event not in event_counts:
                event_counts[event] = 1

transitions_df.head()

Unnamed: 0,faceoff,lpr,pass,reception,carry,controlledexit,dumpin,block,controlledentryagainst,controlledentry,...,check,penaltydrawn,penalty,assist,goal,icing,offside,soshot,sogoal,sopuckprotection
faceoff,4,1141,0,0,0,0,0,3,0,0,...,0,1,3,2,0,0,0,0,0,0
lpr,142,3731,7234,179,525,112,202,99,32,16,...,183,12,12,8,0,0,17,1,0,0
pass,21,1813,2,12278,0,1138,0,1815,103,92,...,0,11,8,131,0,13,8,0,0,0
reception,24,341,6763,0,1314,1396,196,32,145,211,...,155,8,7,16,0,0,9,0,0,0
carry,2,2,304,0,434,1273,492,3,414,1203,...,33,2,0,0,0,0,9,0,0,0


In [3]:
#Convert the original transitions Dataframe created above into one that provides the likelihood one event
#will lead to another
transitions = transitions_df.astype(float)

for last_play in unique_events:
    for current_play in unique_events:
        transitions[current_play][last_play] =  transitions[current_play][last_play] \
        / transitions_df.loc[last_play].sum()

transitions.head()

Unnamed: 0,faceoff,lpr,pass,reception,carry,controlledexit,dumpin,block,controlledentryagainst,controlledentry,...,check,penaltydrawn,penalty,assist,goal,icing,offside,soshot,sogoal,sopuckprotection
faceoff,0.00346,0.987024,0.0,0.0,0.0,0.0,0.0,0.002595,0.0,0.0,...,0.0,0.000865,0.002595,0.00173,0.0,0.0,0.0,0.0,0.0,0.0
lpr,0.009215,0.242131,0.469466,0.011617,0.034071,0.007268,0.013109,0.006425,0.002077,0.001038,...,0.011876,0.000779,0.000779,0.000519,0.0,0.0,0.001103,6.5e-05,0.0,0.0
pass,0.001203,0.103867,0.000115,0.703409,0.0,0.065196,0.0,0.103982,0.005901,0.005271,...,0.0,0.00063,0.000458,0.007505,0.0,0.000745,0.000458,0.0,0.0,0.0
reception,0.00179,0.025431,0.504363,0.0,0.097994,0.104109,0.014617,0.002386,0.010814,0.015736,...,0.011559,0.000597,0.000522,0.001193,0.0,0.0,0.000671,0.0,0.0,0.0
carry,0.000457,0.000457,0.069486,0.0,0.0992,0.290971,0.112457,0.000686,0.094629,0.274971,...,0.007543,0.000457,0.0,0.0,0.0,0.0,0.002057,0.0,0.0,0.0


In [4]:
#Generate a Q-learning Reinforcement Learning Model
df = df.fillna({'teaminpossession':9999, 'currentpossession':9999})

#A goal is the most desirable outcome over the course of a possession
def calculate_reward(outcome, action):
    if outcome == "successful":
        if action == "goal":
            return 500
        else:
            return 1
    else:
            return -1

#Defining the unique states and actions that occur over the 20 games in the dataset
unique_states = df[['gameid', 'scoredifferential', 'period', 'manpowersituation', 
                    'teaminpossession', 'currentpossession', 'xadjcoord', 'yadjcoord']] \
                   .drop_duplicates() \
                   .to_records(index=False) \
                   .tolist()

unique_actions = df['eventname'].unique().tolist()

#Create dictionaries containing these unique states and actions
state_mapping = {state: idx for idx, state in enumerate(unique_states)}
id_to_state_mapping = {idx: state for idx, state in enumerate(unique_states)}

action_to_id_mapping = {action: idx for idx, action in enumerate(unique_actions)}
id_to_action_mapping = {idx: action for idx, action in enumerate(unique_actions)}

#Get total unique states and actions
num_states = len(unique_states)
num_actions = len(unique_actions)

#Hyperparameters required following literature review of Markov Game modeling
#These can and should be adjusted
alpha = 0.1
gamma = 0.99
epsilon = 0.1
num_episodes = len(df)

#Leverage Q-learning for this task
Q_table = torch.zeros((num_states, num_actions))

#Leverage the epsilon-greedy strategy (commonly used with Q-learning)
def choose_action(state_idx, epsilon):
    if np.random.uniform(0, 1) < epsilon:
        action_idx = np.random.choice(len(action_to_id_mapping))
    else:
        #Leverage the transition matrix created above
        action_idx = np.random.choice(len(action_to_id_mapping), p=transitions.loc[game_data['eventname']])
    return action_idx

#Begin Q-learning approach
df_iterator = iter(df.iterrows())
for _, game_data in df_iterator:
    state = (
        game_data["gameid"],
        game_data["scoredifferential"],
        game_data["period"],
        game_data["manpowersituation"],
        game_data["teaminpossession"],
        game_data["currentpossession"],
        game_data["xadjcoord"],
        game_data["yadjcoord"])
    state_idx = state_mapping[state]

    complete_iteration = False
    while not complete_iteration:
        action = choose_action(state_idx, epsilon)
        action_idx = id_to_action_mapping[action]

        #Obtain the next state following the current state
        _, next_state_data = next(df_iterator, (None, None))
        
        if next_state_data is None:
            complete_iteration = True
        
        else:
        
            next_state = (
            next_state_data["gameid"],
            next_state_data["scoredifferential"],
            next_state_data["period"],
            next_state_data["manpowersituation"],
            next_state_data["teaminpossession"],
            next_state_data["currentpossession"],
            next_state_data["xadjcoord"],
            next_state_data["yadjcoord"])
                     
            reward = calculate_reward(next_state_data["outcome"], next_state_data["eventname"])

            next_state_idx = state_mapping[next_state]

            #Update the Q-learning table
            max_next_Q = torch.max(Q_table[next_state_idx])
            target = reward + gamma * max_next_Q
            Q_table[state_idx, action_to_id_mapping[action_idx]] += alpha * (target - Q_table[state_idx, action_to_id_mapping[action_idx]])

            #Move to the next state that's available
            state_idx = next_state_idx

#Get the predicted/optimal action for each state
optimal_action = {}
for state_idx in range(num_states):
    optimal_action[id_to_state_mapping[state_idx]] = id_to_action_mapping[torch.argmax(Q_table[state_idx]).item()]
    
#optimal_action

In [5]:
#As seen above in the prior cell, the Q-learning model created above often predicts the next optimal
#event to be a loose puck recovery. This will be detailed more in the discussion section but with
#the dataset having most events centered around puck recoveries this type of skew was not unexpected. 
#To help the model learning process, the below logic is similarly a Q-learning model but what
#constitutes a unique state is much more simplistic with the hope being that more information available
#for each state will aid in the learning process and lead to more effective prediction. 

df = df.fillna({'teaminpossession':9999, 'currentpossession':9999})

#A goal is the most desirable outcome over the course of a possession
def calculate_reward(outcome, action):
    if outcome == "successful":
        if action == "goal":
            return 500
        else:
            return 1
    else:
            return -1

#Defining the unique states and actions that occur over the 20 games in the dataset
unique_states = df[['scoredifferential', 'period', 'manpowersituation', 'teaminpossession']] \
                   .drop_duplicates() \
                   .to_records(index=False) \
                   .tolist()

unique_actions = df['eventname'].unique().tolist()

#Create dictionaries containing these unique states and actions
state_mapping = {state: idx for idx, state in enumerate(unique_states)}
id_to_state_mapping = {idx: state for idx, state in enumerate(unique_states)}

action_to_id_mapping = {action: idx for idx, action in enumerate(unique_actions)}
id_to_action_mapping = {idx: action for idx, action in enumerate(unique_actions)}

#Get total unique states and actions
num_states = len(unique_states)
num_actions = len(unique_actions)

#Hyperparameters required following literature review of Markov Game modeling
##These can and should be adjusted
alpha = 0.1
gamma = 0.99
epsilon = 0.1
num_episodes = len(df)

#Leverage Q-learning for this task
Q_table = torch.zeros((num_states, num_actions))

#Leverage the epsilon-greedy strategy (commonly used with Q-learning)
def choose_action(state_idx, epsilon, last_event):
    if np.random.uniform(0, 1) < epsilon:
        action_idx = np.random.choice([i for i in range(len(action_to_id_mapping)) if i != action_to_id_mapping[last_event]])
    else:
        #Leverage the transition matrix
        transition_probs = transitions.loc[last_event].copy()
        last_action_idx = action_to_id_mapping[last_event]
        transition_probs[last_action_idx] = 0
        transition_probs /= transition_probs.sum()
        action_idx = np.random.choice(len(action_to_id_mapping), p=transition_probs)
    return action_idx

#Begin Q-learning approach
df_iterator = iter(df.iterrows())
for _, game_data in df_iterator:
    state = (
        game_data["scoredifferential"],
        game_data["period"],
        game_data["manpowersituation"],
        game_data["teaminpossession"])
    state_idx = state_mapping[state]

    complete_iteration = False
    while not complete_iteration:

        last_event = game_data['eventname']
        action = choose_action(state_idx, epsilon, last_event)
        action_idx = id_to_action_mapping[action]

        #Obtain the next state following the current state
        _, next_state_data = next(df_iterator, (None, None))
        
        if next_state_data is None:
            complete_iteration = True
        
        else:
        
            next_state = (
            next_state_data["scoredifferential"],
            next_state_data["period"],
            next_state_data["manpowersituation"],
            next_state_data["teaminpossession"])
                     
            reward = calculate_reward(next_state_data["outcome"], next_state_data["eventname"])

            next_state_idx = state_mapping[next_state]

            #Update the Q-learning table
            max_next_Q = torch.max(Q_table[next_state_idx])
            target = reward + gamma * max_next_Q
            Q_table[state_idx, action_to_id_mapping[action_idx]] += alpha * (target - Q_table[state_idx, action_to_id_mapping[action_idx]])

            #Move to the next state that's available
            state_idx = next_state_idx

#Get the predicted/optimal action for each state
optimal_action_new = {}
for state_idx in range(num_states):
    optimal_action_new[id_to_state_mapping[state_idx]] = id_to_action_mapping[torch.argmax(Q_table[state_idx]).item()]
    
#optimal_action_new

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=52c67ded-f4f7-4855-8de6-8871afe84448' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>