In [25]:
import pandas as pd
from SQLCode import DatabaseConnection
from SQLCode import DatabaseCredentials as DBC
import numpy as np

from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
# Opening connection
creds = DBC.DataBaseCredentials()
conn = DatabaseConnection.sql_connection(creds.server, creds.database, creds.user, creds.password)
connection = conn.open()
cursor = connection.cursor()

In [3]:
liveFeed = pd.read_sql_query("select * from live_feed where gameID = 2020030312", connection)

In [4]:
# Getting the seasons data
seasons = pd.read_sql_query("select * from schedules", connection)

In [5]:
boxscores = pd.read_sql_query("select gameID, teamID, playerID from box_scores where gameID = 2020030312", connection)
# boxscores = pd.read_sql_query("select gameID, teamID, playerID from box_scores where gameID =2020030312", connection)

# Creating Model Input

In [6]:
# Filtering to regular seasons games and 20102011 onwards (when live data started)
seasonsFiltered = seasons[seasons['seasonID'] >= 20102011]
seasonsFiltered = seasonsFiltered[seasonsFiltered['gameType'] == 'R']

In [7]:
# Getting and filtering the raw data
rawData = pd.merge(liveFeed,seasons, how='left',on='gameID')
rawData = rawData[rawData['seasonID'] >= 20102011]
# rawData = rawData[rawData['gameType'] == 'R']
rawData = rawData[rawData['eventSubID'] == 0]
rawData = rawData[['eventID',
         'gameID',
         'eventTypeID',
         'eventDescription',
         'periodNum',
         'periodTime',
         'xCoordinate',
         'yCoordinate',
         'teamID','homeTeamID','awayTeamID','playerID','penaltyMinutes']]

In [8]:
# Merging box score data
rawData = pd.merge(rawData, 
                   boxscores, 
                   how='left', 
                   left_on=['gameID', 'playerID'], 
                   right_on=['gameID', 'playerID'],
                   suffixes=('', '_box'))

In [9]:
# Getting only the desired events
rawDataFiltered = rawData[rawData['eventTypeID'].isin(['FACEOFF',
                             'SHOT',
                             'MISSED_SHOT',
                             'BLOCKED_SHOT',
                             'TAKEAWAY',
                             'GIVEAWAY',
                             'HIT',
                             'GOAL',
                             'PERIOD_END',
                             'EARLY_INT_START',
                             'PENALTY',
                             'STOP',
                             'EARLY_INT_END',
                             'EARLY_INT_END'])]
# rawDataFiltered = rawDataFiltered[rawDataFiltered['playerType'].isin(['NULL', 
#                                    'Winner', 
#                                    'Loser', 
#                                    'Hitter', 
#                                    'PlayerID','Shooter','Blocker','Unknown','Scorer','PenaltyOn','DrewBy'])]
# Sorting
rawDataFiltered = rawDataFiltered.sort_values(by=['gameID', 'eventID'])

In [10]:
# Converting period number to numeric (seems to be read in as a string)
rawDataFiltered['periodNum'] = pd.to_numeric(rawDataFiltered['periodNum'])

# Filtering to only the 1st threee period for consistency
rawDataFiltered = rawDataFiltered[rawDataFiltered['periodNum'] <= 3]

In [11]:
def team_type(teamID, homeTeamID, awayTeamID):
    if teamID == homeTeamID:
        return 'HOME'
    elif teamID == awayTeamID:
        return 'AWAY'
    else:
        return np.nan

def coordinate_normalization(xCoord, teamType, periodNum):
    if teamType == np.nan:
        return None
    else:
        if int(periodNum)%2 == 1:
            if teamType == 'AWAY':
                return xCoord
            else:
                return -1 * xCoord
        else:
            if teamType == 'AWAY':
                return xCoord
            else:
                return -1 * xCoord
            
def elapsed_seconds(periodNum, periodTime):
    periodTime = pd.Timedelta(periodTime)
    return (int(periodNum) - 1) * 20 * 60 + periodTime.total_seconds()/60

class Queue:
    #CITATION: https://runestone.academy/runestone/books/published/pythonds/BasicDS/ImplementingaQueueinPython.html
    def __init__(self):
        self.queue = []

    def isEmpty(self):
        return self.queue == []

    def enqueue(self, item):
        self.queue.insert(0,item)

    def dequeue(self):
        return self.queue.pop()

    def size(self):
        return len(self.queue)
    
    def get_queue(self):
        return self.queue

    def exchange(self, oldItem, newItem):
        self.queue[self.queue.index(oldItem)] = newItem
        
    def remove(self, item):
        self.queue.remove(item)

In [12]:
rawDataFiltered['teamType'] = rawDataFiltered.apply(lambda row: team_type(row['teamID'], 
                                                                          row['homeTeamID'], 
                                                                          row['awayTeamID']) ,axis=1)

In [13]:
rawDataFiltered['xCoordinate'] = rawDataFiltered.apply(lambda row: coordinate_normalization(row['xCoordinate'], 
                                                           row['teamType'], 
                                                           row['periodNum']), axis=1)

In [14]:
rawDataFiltered['secondsElapsed'] = rawDataFiltered.apply(lambda row: elapsed_seconds(row['periodNum'], row['periodTime']), axis=1)

In [15]:
rawDataFiltered

Unnamed: 0,eventID,gameID,eventTypeID,eventDescription,periodNum,periodTime,xCoordinate,yCoordinate,teamID,homeTeamID,awayTeamID,playerID,penaltyMinutes,teamID_box,teamType,secondsElapsed
3,3,2020030312,FACEOFF,Phillip Danault faceoff won against William Ka...,1,0 days 00:00:00,0.0,0.0,8.0,54,8,8476479.0,,8.0,AWAY,0.0
4,4,2020030312,STOP,Icing,1,0 days 00:11:00,,,,54,8,,,,,11.0
5,5,2020030312,FACEOFF,Phillip Danault faceoff won against William Ka...,1,0 days 00:11:00,69.0,22.0,8.0,54,8,8476479.0,,8.0,AWAY,11.0
6,6,2020030312,BLOCKED_SHOT,Brendan Gallagher shot blocked shot by Alec Ma...,1,0 days 00:16:00,-81.0,9.0,54.0,54,8,8474166.0,,54.0,HOME,16.0
7,7,2020030312,HIT,Brendan Gallagher hit Alex Pietrangelo,1,0 days 00:25:00,34.0,-39.0,8.0,54,8,8475848.0,,8.0,AWAY,25.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
370,370,2020030312,STOP,Puck in Crowd,3,0 days 19:39:00,,,,54,8,,,,,3579.0
371,371,2020030312,FACEOFF,Phillip Danault faceoff won against William Ka...,3,0 days 19:39:00,-69.0,-22.0,8.0,54,8,8476479.0,,8.0,AWAY,3579.0
372,372,2020030312,MISSED_SHOT,Alec Martinez Wide of Net Carey Price,3,0 days 19:49:00,62.0,22.0,54.0,54,8,8474166.0,,54.0,HOME,3589.0
373,373,2020030312,BLOCKED_SHOT,Alex Pietrangelo shot blocked shot by Shea Weber,3,0 days 19:56:00,-76.0,-7.0,8.0,54,8,8470642.0,,8.0,AWAY,3596.0


In [16]:
sequences = []
actionEvents = ['FACEOFF',
                'SHOT',
                'MISSED_SHOT',
                'BLOCKED_SHOT',
                'TAKEAWAY',
                'GIVEAWAY',
                'HIT',
                'GOAL']
startEndEvents = ['PERIOD_START',
                  'PERIOD_END',
                  'EARLY_INT_START',
                  'PENALTY',
                  'STOP',
                  'SHOOTOUT_COMPLETE',
                  'GAME_END',
                  'EARLY_INT_END',
                  'EARLY_INT_END']

sequenceNum = 0
eventNum = 0
context = {'goalDiff':0, 'manpowerDiff':0, 'periodNum':1}
penaltyQueue = Queue()
for index, row in rawDataFiltered.iterrows():
    if index % 100000 == 0:
        print((index/len(rawDataFiltered))*100, '%')
    # Computing if respective team is home or away
    teamType = team_type(row['teamID'], row['homeTeamID'], row['awayTeamID'])
        
    # Catching penalties to update the context
    if row['eventTypeID'] == 'PENALTY':
        if int(row['penaltyMinutes']) != 10:
            # Getting the end time of the penalty (in seconds)
            penaltyStart = elapsed_seconds(row['periodNum'],row['periodTime'])
            penaltyEnd = penaltyStart + row['penaltyMinutes'] * 60
            

            # Determining who took the penalty to update the context
            if teamType == 'HOME':
                context['manpowerDiff'] += 1
            else:
                context['manpowerDiff'] += -1

            # Enqueuing the penalty
            penaltyQueue.enqueue({'team':teamType, 
                                  'penaltyStart': penaltyStart, 
                                  'penaltyEnd':penaltyEnd, 
                                  'penaltyLength': 
                                  row['penaltyMinutes']})
#             print(penaltyQueue.get_queue())
#             print("***************************************************")
    else:
        # If there currently is a penalty in the penalty queue
        if penaltyQueue.size() > 0:
            # Determining the time of the action
            actionTime = elapsed_seconds(row['periodNum'],row['periodTime'])
            
            # Iterating through all  the penalties in the queue from oldest to newest
            for penalty in reversed(penaltyQueue.get_queue()):
                # If the action occured after the penalty ended, we update the context and pop the penalty
                if penalty['penaltyEnd'] < actionTime:
                    # Popping the penalty
                    penaltyQueue.remove(penalty)

                    # Updating the context
                    if penalty['team'] == 'HOME':
                        context['manpowerDiff'] -= 1
                    else:
                        context['manpowerDiff'] -= -1
            
        # If the event type is a goal
        if row['eventTypeID'] == 'GOAL':
            
            # Injecting the shot before the goal# Adding in the next sequency
            sequences.append([row['gameID'],  
                      context['goalDiff'],
                      context['manpowerDiff'],
                      context['periodNum'],
                      sequenceNum, 
                      eventNum, 
                      'SHOT',  # action type
                      teamType,  # Home/Away
#                       row['zone']
                      row['secondsElapsed'],
                          row['xCoordinate'],
                          row['yCoordinate']]) #Neutral/Offensive/Defensive/NULL
            # Incrementing the event number
            eventNum += 1
            
            # Updating the context
            if teamType == 'HOME':
                context['goalDiff'] -= 1
            else:
                context['goalDiff'] -= -1

            # Defining home/away flags to only pop off the minimum number of penalties
            FLAGS = {'HOME':True, 'AWAY':True}
            
            # Determining the time of the action
            actionTime = elapsed_seconds(row['periodNum'],row['periodTime'])
            
            # If there currently is a penalty in the penalty queue
            if penaltyQueue.size() > 0:
                
                # Iterating through all  the penalties in the queue from oldest to newest
                for penalty in reversed(penaltyQueue.get_queue()):
                    
                    # If the penalty is a 5 minute major, the player must serve the full 5 minutes (no change needed)
                    # If the penalty is over it would have been popped in the above if statement
                    if penalty['penaltyLength'] == 5:
                        continue
                    else:
                        
                        # Making sure its not a shorthanded goal and we haven't already popped a penalty for this goal/team
                        if (penalty['team'] != teamType) & (FLAGS[penalty['team']]):
                            
                            # Creating the updated penalty
                            newPenalty = penalty
                            newPenalty['penaltyStart'] = actionTime
                            newPenalty['penaltyLength'] += -120
                            newPenalty['penaltyEnd'] = newPenalty['penaltyStart'] + newPenalty['penaltyLength']
        
                            if penalty['penaltyLength'] <= 0:
                
                                # Popping the penalty off
                                penaltyQueue.remove(penalty)
                        
                                # Updating the context
                                if penalty['team'] == 'HOME':
                                    context['manpowerDiff'] -= 1
                                else:
                                    context['manpowerDiff'] -= -1
                            else:
                                
                                # replacing the old penalty info with the new one
                                penaltyQueue.exchange(penalty, newPenalty)
                                
                            FLAGS[penalty['team']] == False 
                            
    # Updating the context if needed
    if row['periodNum'] != context['periodNum']:
        context['periodNum'] = row['periodNum']
    
    # Adding in the next sequency
    if (row['eventTypeID'] not in startEndEvents) | (row['eventTypeID'] == 'PENALTY'):
        sequences.append([row['gameID'],  
                          context['goalDiff'],
                          context['manpowerDiff'],
                          context['periodNum'],
                          sequenceNum, 
                          eventNum, 
                          row['eventTypeID'],  # action type
                          teamType,  # Home/Away
                          row['secondsElapsed'],
                          row['xCoordinate'],
                          row['yCoordinate']]) #Neutral/Offensive/Defensive/NULL
    if row['eventTypeID'] in actionEvents:
        eventNum += 1
    else:
        sequenceNum += 1
        eventNum = 0
#     break
sequenceData = pd.DataFrame(sequences, 
                            columns=['gameID', 
                                     'goalDiff', 
                                     'manpowerDiff',
                                     'periodNum', 
                                     'sequenceNum',
                                     'eventNum',
                                     'event', 
                                     'team',
                                     'secondsElapsed',
                                     'xCoord',
                                     'yCoord'])

In [35]:
sequenceData

Unnamed: 0,gameID,goalDiff,manpowerDiff,periodNum,sequenceNum,eventNum,event,team,secondsElapsed,xCoord,yCoord
0,2020030312,0,0,1,0,0,FACEOFF,AWAY,0.0,0.0,0.0
1,2020030312,0,0,1,1,0,FACEOFF,AWAY,11.0,69.0,22.0
2,2020030312,0,0,1,1,1,BLOCKED_SHOT,HOME,16.0,-81.0,9.0
3,2020030312,0,0,1,1,2,HIT,AWAY,25.0,34.0,-39.0
4,2020030312,0,0,1,2,0,FACEOFF,AWAY,41.0,69.0,-22.0
...,...,...,...,...,...,...,...,...,...,...,...
312,2020030312,1,0,3,54,1,BLOCKED_SHOT,AWAY,3553.0,-76.0,2.0
313,2020030312,1,0,3,54,2,HIT,HOME,3568.0,-42.0,-37.0
314,2020030312,1,0,3,55,0,FACEOFF,AWAY,3579.0,-69.0,-22.0
315,2020030312,1,0,3,55,1,MISSED_SHOT,HOME,3589.0,62.0,22.0


# Creating Model

In [23]:
def get_device():
    if torch.cuda.is_available():
        device = torch.device('cuda:0')
    else:
        device = torch.device('cpu') # don't have GPU 
    return device

# convert a df to tensor to be used in pytorch
def df_to_tensor(df):
    device = get_device()
    return torch.from_numpy(df.values).float().to(device)

In [37]:
# Creating the data loader
data = DataLoader(sequenceData[['gameID', 'goalDiff']].to_numpy(), 
                  batch_size=10,
                  shuffle=False)

## Model Definition

In [51]:

# Creating the data loader
data = DataLoader(sequenceData[['gameID', 'goalDiff']].to_numpy(), 
                  batch_size=10,
                  shuffle=False)

class DQN(nn.Module):
    def __init__(self, numInputs):
        super().__init__()
        
        self.hidden1 = nn.Linear(in_features=numInputs, out_features=24)   
        self.hidden2 = nn.Linear(in_features=24, out_features=32)
        self.output = nn.Linear(in_features=32, out_features=2)
        
    def forward(self, t):
        t = t.flatten(start_dim=1)
        t = F.relu(self.fc1(t))
        t = F.relu(self.fc2(t))
        t = self.out(t)
        return t
    
    

In [56]:
for epoch in range(0,1):
    for batch in iter(data):
        
        
        
        break

tensor([[2020030312,          0],
        [2020030312,          0],
        [2020030312,          0],
        [2020030312,          0],
        [2020030312,          0],
        [2020030312,          0],
        [2020030312,          0],
        [2020030312,          0],
        [2020030312,          0],
        [2020030312,          0]])


In [54]:
x.hidden1.weight

Parameter containing:
tensor([[-0.3773,  0.4216,  0.2554,  0.0848, -0.2575],
        [-0.0505,  0.0757,  0.4043, -0.1637, -0.0729],
        [ 0.2107,  0.2404, -0.0229, -0.2013,  0.4209],
        [-0.2649,  0.3277,  0.4037,  0.3222,  0.1661],
        [ 0.2586, -0.2329,  0.2711,  0.0842,  0.2630],
        [-0.4032,  0.2871,  0.0596,  0.3396, -0.0819],
        [-0.2062, -0.2569, -0.1458,  0.3468,  0.0007],
        [ 0.2522,  0.3693, -0.4261, -0.0793,  0.1072],
        [-0.1214, -0.4367,  0.4050, -0.1142, -0.3767],
        [ 0.3603, -0.4130, -0.3699, -0.0504, -0.1474],
        [ 0.3940,  0.0663, -0.3356, -0.1324,  0.1299],
        [ 0.1489,  0.1741,  0.1990,  0.2746, -0.3800],
        [ 0.4360, -0.0019,  0.3932, -0.1191, -0.3712],
        [-0.0553, -0.2671, -0.2409,  0.3725, -0.1248],
        [-0.3919,  0.2505,  0.0121,  0.4357, -0.2503],
        [ 0.3803,  0.2123, -0.0383, -0.0466, -0.0882],
        [-0.4381,  0.2540, -0.3387, -0.3482, -0.2942],
        [ 0.4175,  0.3421,  0.2214, -0.2753