In [3]:
import pandas as pd
from SQLCode import DatabaseConnection
from SQLCode import DatabaseCredentials as DBC
import numpy as np


from sklearn.feature_extraction.text import CountVectorizer

from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch

from sklearn import preprocessing

In [4]:
# Opening connection
creds = DBC.DataBaseCredentials()
conn = DatabaseConnection.sql_connection(creds.server, creds.database, creds.user, creds.password)
connection = conn.open()
cursor = connection.cursor()

In [5]:
# liveFeed = pd.read_sql_query("select * from live_feed", connection)
liveFeed = pd.read_sql_query("select * from live_feed where gameID = 2020020634", connection)

In [6]:
# Getting the seasons data
seasons = pd.read_sql_query("select * from schedules", connection)

In [7]:
# boxscores = pd.read_sql_query("select gameID, teamID, playerID from box_scores", connection)
boxscores = pd.read_sql_query("select gameID, teamID, playerID from box_scores where gameID = 2020020634", connection)

# Creating Model Input

In [8]:
# Filtering to regular seasons games and 20102011 onwards (when live data started)
seasonsFiltered = seasons[seasons['seasonID'] >= 20102011]
seasonsFiltered = seasonsFiltered[seasonsFiltered['gameType'] == 'R']

In [None]:
# Getting and filtering the raw data
rawData = pd.merge(liveFeed,seasons, how='inner',on='gameID')
rawData = rawData[rawData['seasonID'] >= 20102011]
# rawData = rawData[rawData['gameType'] == 'R']
# Keeping assists so we can add them in
rawData = rawData[(rawData['eventSubID'] == 0) | (rawData['playerType'] == 'Assist')]
rawData = rawData[['eventID',
         'eventSubID',
         'gameID',
         'eventTypeID',
         'playerType',
         'eventDescription',
         'periodNum',
         'periodTime',
         'xCoordinate',
         'yCoordinate',
         'teamID',
         'homeTeamID','awayTeamID','playerID','penaltyMinutes']]

In [None]:
# Adding in assist
rawData['eventTypeID'] = np.where((rawData['eventTypeID'] == 'GOAL') & (rawData['playerType'] == 'Assist'), 'ASSIST', rawData['eventTypeID'])

In [None]:
# Dont need this column anymore
rawData = rawData.drop('playerType',axis=1)

In [None]:
# Merging box score data
rawData = pd.merge(rawData, 
                   boxscores, 
                   how='left', 
                   left_on=['gameID', 'playerID'], 
                   right_on=['gameID', 'playerID'],
                   suffixes=('', '_box'))

In [None]:
# Getting only the desired events
rawDataFiltered = rawData[rawData['eventTypeID'].isin(['FACEOFF',
                             'SHOT',
                             'MISSED_SHOT',
                             'BLOCKED_SHOT',
                             'TAKEAWAY',
                             'GIVEAWAY',
                             'HIT',
                             'GOAL',
                             'ASSIST',
                             'PERIOD_END',
                             'EARLY_INT_START',
                             'PENALTY',
                             'STOP',
                             'EARLY_INT_END',
                             'EARLY_INT_END'])]
# rawDataFiltered = rawDataFiltered[rawDataFiltered['playerType'].isin(['NULL', 
#                                    'Winner', 
#                                    'Loser', 
#                                    'Hitter', 
#                                    'PlayerID','Shooter','Blocker','Unknown','Scorer','PenaltyOn','DrewBy'])]


# Sorting
rawDataFiltered = rawDataFiltered.sort_values(by=['gameID', 'eventID','eventSubID'],ascending=[True,True,False])

In [None]:
# Don't need this column anymore
rawDataFiltered = rawDataFiltered.drop('eventSubID',axis=1)

In [None]:
# Converting period number to numeric (seems to be read in as a string)
rawDataFiltered['periodNum'] = pd.to_numeric(rawDataFiltered['periodNum'])

# Filtering to only the 1st threee period for consistency
rawDataFiltered = rawDataFiltered[rawDataFiltered['periodNum'] <= 3]

In [None]:
def team_type(teamID, homeTeamID, awayTeamID):
    if teamID == homeTeamID:
        return 'HOME'
    elif teamID == awayTeamID:
        return 'AWAY'
    else:
        return np.nan

def coordinate_normalization(xCoord, teamType, periodNum):
    if teamType == np.nan:
        return None
    else:
        if int(periodNum)%2 == 1:
            if teamType == 'AWAY':
                return xCoord
            else:
                return -1 * xCoord
        else:
            if teamType == 'AWAY':
                return -1 * xCoord
            else:
                return xCoord
            
def elapsed_seconds(periodNum, periodTime):
    periodTime = pd.Timedelta(periodTime)
    return (int(periodNum) - 1) * 20 * 60 + periodTime.total_seconds()/60

class Queue:
    #CITATION: https://runestone.academy/runestone/books/published/pythonds/BasicDS/ImplementingaQueueinPython.html
    def __init__(self):
        self.queue = []

    def isEmpty(self):
        return self.queue == []

    def enqueue(self, item):
        self.queue.insert(0,item)

    def dequeue(self):
        return self.queue.pop()

    def size(self):
        return len(self.queue)
    
    def get_queue(self):
        return self.queue

    def exchange(self, oldItem, newItem):
        self.queue[self.queue.index(oldItem)] = newItem
        
    def remove(self, item):
        self.queue.remove(item)

In [None]:
rawDataFiltered['teamType'] = rawDataFiltered.apply(lambda row: team_type(row['teamID'], 
                                                                          row['homeTeamID'], 
                                                                          row['awayTeamID']) ,axis=1)

In [None]:
rawDataFiltered['xCoordinate'] = rawDataFiltered.apply(lambda row: coordinate_normalization(row['xCoordinate'], 
                                                           row['teamType'], 
                                                           row['periodNum']), axis=1)

In [None]:
rawDataFiltered['secondsElapsed'] = rawDataFiltered.apply(lambda row: elapsed_seconds(row['periodNum'], row['periodTime']), axis=1)

In [None]:
rawDataFiltered[rawDataFiltered['eventTypeID'] == 'ASSIST']

In [None]:
sequences = []
actionEvents = ['FACEOFF',
                'SHOT',
                'MISSED_SHOT',
                'BLOCKED_SHOT',
                'TAKEAWAY',
                'GIVEAWAY',
                'HIT',
                'ASSIST',
                'GOAL']
startEndEvents = ['PERIOD_START',
                  'PERIOD_END',
                  'EARLY_INT_START',
                  'PENALTY',
                  'STOP',
                  'SHOOTOUT_COMPLETE',
                  'GAME_END',
                  'EARLY_INT_END',
                  'EARLY_INT_END']

sequenceNum = 0
eventNum = 0
penaltyQueue = Queue()
gameID = 0
context = {'goalDiff':0, 'manpowerDiff':0, 'periodNum':1}
for index, row in rawDataFiltered.iterrows():
    if index % 100000 == 0:
        print((index/len(rawDataFiltered))*100, '%')
    
    # Resetting the context
    if gameID != row['gameID']:
        gameID = row['gameID']
        context = {'goalDiff':0, 'manpowerDiff':0, 'periodNum':1}
        
    # Updating the context if needed
    if row['periodNum'] != context['periodNum']:
        context['periodNum'] = row['periodNum']
        
    # Computing if respective team is home or away
    teamType = team_type(row['teamID'], row['homeTeamID'], row['awayTeamID'])
    
     # If there currently is a penalty in the penalty queue
    if penaltyQueue.size() > 0:
        # Determining the time of the action
        actionTime = elapsed_seconds(row['periodNum'],row['periodTime'])

        # Iterating through all  the penalties in the queue from oldest to newest
        for penalty in reversed(penaltyQueue.get_queue()):
            # If the action occured after the penalty ended, we update the context and pop the penalty
            if penalty['penaltyEnd'] < actionTime:
                # Popping the penalty
                penaltyQueue.remove(penalty)

                # Updating the context
                if penalty['team'] == 'HOME':
                    context['manpowerDiff'] -= 1
                else:
                    context['manpowerDiff'] -= -1
        
    # Catching penalties to update the context
    if row['eventTypeID'] == 'PENALTY':
        if int(row['penaltyMinutes']) != 10:
            # Getting the end time of the penalty (in seconds)
            penaltyStart = elapsed_seconds(row['periodNum'],row['periodTime'])
            penaltyEnd = penaltyStart + row['penaltyMinutes'] * 60
            

            # Determining who took the penalty to update the context
            if teamType == 'HOME':
                context['manpowerDiff'] += 1
            else:
                context['manpowerDiff'] += -1

            # Enqueuing the penalty
            penaltyQueue.enqueue({'team':teamType, 
                                  'penaltyStart': penaltyStart, 
                                  'penaltyEnd':penaltyEnd, 
                                  'penaltyLength': 
                                  row['penaltyMinutes']})
            
    # Adding in the event if it is a goal or assist (doing it here in since the context will be updated below)
    if (row['eventTypeID'] == 'ASSIST'):
        sequences.append([row['gameID'],  
                          context['goalDiff'],
                          context['manpowerDiff'],
                          context['periodNum'],
                          sequenceNum, 
                          eventNum, 
                          row['eventTypeID'],  # action type
                          teamType,  # Home/Away
                          row['secondsElapsed'],
                          row['xCoordinate'],
                          row['yCoordinate'],
                          row['playerID']]) #Neutral/Offensive/Defensive/NULL

           
    # If the event type is a goal
    if row['eventTypeID'] == 'GOAL':

        # Injecting the shot before the goal# Adding in the next sequency
        sequences.append([row['gameID'],  
                  context['goalDiff'],
                  context['manpowerDiff'],
                  context['periodNum'],
                  sequenceNum, 
                  eventNum, 
                  'SHOT',  # action type
                  teamType,  # Home/Away
#                       row['zone']
                  row['secondsElapsed'],
                      row['xCoordinate'],
                      row['yCoordinate'],
                  row['playerID']]) #Neutral/Offensive/Defensive/NULL
        # Incrementing the event number
        eventNum += 1
        
        # Adding in the goal
        sequences.append([row['gameID'],  
                      context['goalDiff'],
                      context['manpowerDiff'],
                      context['periodNum'],
                      sequenceNum, 
                      eventNum, 
                      row['eventTypeID'],  # action type
                      teamType,  # Home/Away
                      row['secondsElapsed'],
                      row['xCoordinate'],
                      row['yCoordinate'],
                      row['playerID']]) #Neutral/Offensive/Defensive/NULL

        # Updating the context
        if teamType == 'HOME':
            context['goalDiff'] -= 1
        else:
            context['goalDiff'] += 1

        # Defining home/away flags to only pop off the minimum number of penalties
        FLAGS = {'HOME':True, 'AWAY':True}

        # Determining the time of the action
        actionTime = elapsed_seconds(row['periodNum'],row['periodTime'])

        # If there currently is a penalty in the penalty queue
        if penaltyQueue.size() > 0:

            # Iterating through all  the penalties in the queue from oldest to newest
            for penalty in reversed(penaltyQueue.get_queue()):

                # If the penalty is a 5 minute major, the player must serve the full 5 minutes (no change needed)
                # If the penalty is over it would have been popped in the above if statement
                if penalty['penaltyLength'] == 5:
                    continue
                else:

                    # Making sure its not a shorthanded goal and we haven't already popped a penalty for this goal/team
                    if (penalty['team'] != teamType) & (FLAGS[penalty['team']]):

                        # Creating the updated penalty
                        newPenalty = penalty
                        newPenalty['penaltyStart'] = actionTime
                        newPenalty['penaltyLength'] += -120
                        newPenalty['penaltyEnd'] = newPenalty['penaltyStart'] + newPenalty['penaltyLength']

                        if penalty['penaltyLength'] <= 0:

                            # Popping the penalty off
                            penaltyQueue.remove(penalty)

                            # Updating the context
                            if penalty['team'] == 'HOME':
                                context['manpowerDiff'] -= 1
                            else:
                                context['manpowerDiff'] -= -1
                        else:

                            # replacing the old penalty info with the new one
                            penaltyQueue.exchange(penalty, newPenalty)

                        FLAGS[penalty['team']] = False 
                            
#     # Updating the context if needed
#     if row['periodNum'] != context['periodNum']:
#         context['periodNum'] = row['periodNum']
    
    # Adding in the next sequency
    if (((row['eventTypeID'] not in startEndEvents) | 
         (row['eventTypeID'] == 'PENALTY')) & 
        (row['eventTypeID'] != 'GOAL') & 
        (row['eventTypeID'] != 'ASSIST')):
        sequences.append([row['gameID'],  
                          context['goalDiff'],
                          context['manpowerDiff'],
                          context['periodNum'],
                          sequenceNum, 
                          eventNum, 
                          row['eventTypeID'],  # action type
                          teamType,  # Home/Away
                          row['secondsElapsed'],
                          row['xCoordinate'],
                          row['yCoordinate'],
                          row['playerID']]) #Neutral/Offensive/Defensive/NULL
    if row['eventTypeID'] in actionEvents:
        eventNum += 1
    else:
        sequenceNum += 1
        eventNum = 0
#     break
sequenceData = pd.DataFrame(sequences, 
                            columns=['gameID', 
                                     'goalDiff', 
                                     'manpowerDiff',
                                     'periodNum', 
                                     'sequenceNum',
                                     'eventNum',
                                     'event', 
                                     'team',
                                     'secondsElapsed',
                                     'xCoord',
                                     'yCoord',
                                     'playerID'])

In [None]:
# Removing any rows that contain a null value
sequenceData = sequenceData[~sequenceData['sequenceNum'].isin(sequenceData[sequenceData.isnull().any(axis=1)]['sequenceNum'].values)]

In [None]:
# Vectorizing the events
eventVectorizer = CountVectorizer()
eventVectorizer.fit(sequenceData['event'])
vectorizedEvents = eventVectorizer.transform(sequenceData['event']).toarray()

# List of actions in the correct order according to its position on the vectorized array
actions = [action[0] for action in sorted(eventVectorizer.vocabulary_.items())]

In [None]:
# Adding in the vectorized columns
for action in range(0,len(actions)):
    sequenceData.insert(column=actions[action], value=vectorizedEvents[:,action], loc=len(sequenceData.columns))

In [None]:
# # List of actions in the correct order according to its position on the vectorized array
# actions = [action[0] for action in sorted(eventVectorizer.vocabulary_.items())]

# # Adding the vectorized events onto the sequence data
# sequenceData = pd.concat([sequenceData,
#                           pd.DataFrame(data=vectorizedEvents,
#                                        columns=actions)],axis=1)

# Dropping event as it is no longer needed
sequenceData = sequenceData.drop(['event'],axis=1)

In [None]:
# Filling NAs
sequenceData['team'] = sequenceData['team'].fillna(value='neither')

In [None]:
# Vectorizing the teams
teamVectorizer = CountVectorizer()
teamVectorizer.fit(sequenceData['team'])
vectorizedTeams = teamVectorizer.transform(sequenceData['team']).toarray()

# List of teams in the correct order according to its position on the vectorized array
teams = [action[0] for action in sorted(teamVectorizer.vocabulary_.items())]


In [None]:
# Adding in the vectorized columns
for team in range(0,len(teams)):
    sequenceData.insert(column=teams[team], value=vectorizedTeams[:,team], loc=len(sequenceData.columns))

In [None]:
# # List of teams in the correct order according to its position on the vectorized array
# teams = [action[0] for action in sorted(teamVectorizer.vocabulary_.items())]

# Adding the vectorized events onto the sequence data
# sequenceData = pd.concat([sequenceData,
#                           pd.DataFrame(data=vectorizedTeams,
#                                        columns=teams)],axis=1)

# Dropping event as it is no longer needed
sequenceData = sequenceData.drop(['team'],axis=1)

In [None]:
colsTransformed = ['goalDiff', 
              'manpowerDiff',
          'periodNum',
          'secondsElapsed', 
          'xCoord', 
          'yCoord']

In [None]:
scaler = preprocessing.StandardScaler().fit(sequenceData[colsTransformed].values)

In [None]:
scaledData = scaler.transform(sequenceData[colsTransformed].values)

In [None]:
for i in range(0,len(colsTransformed)):
    sequenceData[colsTransformed[i]] = scaledData[:,i]

In [None]:
# Defining a sequence data object
sequenceDataTMinusOne = sequenceData.drop(['goalDiff', 'manpowerDiff', 'periodNum'],axis=1).copy(deep=True)

# Incrementing the eventnumber to use to join below
sequenceDataTMinusOne['eventNum'] -= 1

In [None]:
# Merging the time t and t+1 datasets
sequenceDataComplete = pd.merge(left=sequenceData, 
         right=sequenceDataTMinusOne, 
         how='left',
         left_on=['gameID', 'sequenceNum', 'eventNum'],
         right_on=['gameID', 'sequenceNum', 'eventNum'],
        suffixes=('','_TMinusOne'))

In [None]:
columns = ['gameID', 
          'sequenceNum',
          'eventNum', 
          'goalDiff', 
          'manpowerDiff',
          'periodNum',
          'secondsElapsed', 
          'xCoord', 
          'yCoord', 
          'blocked_shot',
          'faceoff', 
          'giveaway', 
          'goal', 
          'assist',
          'hit',
          'missed_shot',
          'penalty', 
          'shot',
          'takeaway',
          'away', 
          'home',
          'playerID',
          'secondsElapsed_TMinusOne',
          'xCoord_TMinusOne',
          'yCoord_TMinusOne', 
          'blocked_shot_TMinusOne',
          'faceoff_TMinusOne', 
          'giveaway_TMinusOne', 
          'goal_TMinusOne',
          'assist_TMinusOne',
          'hit_TMinusOne', 
          'missed_shot_TMinusOne',
          'penalty_TMinusOne',
          'shot_TMinusOne', 
          'takeaway_TMinusOne', 
          'away_TMinusOne',
          'home_TMinusOne']

In [None]:
# Adding in any missing columns (setting them to be 0)
for col in columns:
    if col not in sequenceDataComplete.columns:
        sequenceDataComplete[col] = np.zeros((len(sequenceDataComplete)))

In [None]:
# Re-ordering columns
sequenceDataComplete = sequenceDataComplete[columns]

In [None]:
sequenceDataComplete.columns

# Model Creation
## Model Definition

In [None]:
# determine the supported device
def get_device():
    if torch.cuda.is_available():
        device = torch.device('cuda:0')
    else:
        device = torch.device('cpu') # don't have GPU 
    return device

# convert a df to tensor to be used in pytorch
def df_to_tensor(df):
    device = get_device()
    data = torch.from_numpy(df.values)
    data = data.type(torch.float64)
    data = data.to(device)
    return data

def custom_loss(modelInput, model, i):
    t = modelInput[0:i+1,:,3:21]
    tPlusOne = modelInput[0:i+1,:,[3,4,5,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36]]
    
    # If goal scored by home team (if event is a goal, and team is home)
    if (modelInput[i,:,[12]] == 1) & (modelInput[i,:,[20]] == 1):
        R = torch.tensor([1,0,0]).to(get_device())
        
    # If goal scored by away team (if event is a goal, and team is away) 
    elif (modelInput[i,:,[12]] == 1) & (modelInput[i,:,[19]] == 1):
        R = torch.tensor([0,1,0]).to(get_device())
        
    # No one scored
    else:
        R = torch.tensor([0,0,1]).to(get_device())
    
    # If there was no previous action
#     if torch.sum(torch.isnan(tPlusOne)) > 0:
#         loss = R + model(t)
#     else: 
#         loss = R + model(tPlusOne) - model(t) 
        
    if torch.sum(torch.isnan(tPlusOne)) > 0:
        loss = R - model(t)
    else: 
        loss = R + model(tPlusOne) - model(t) 
    
    
    return torch.mean(torch.square(loss))

In [None]:
class DQN(nn.Module):
    def __init__(self):
        super().__init__()
        self.inputSize = 18
        self.numLSTMNodes = 1000
        self.numLSTMLayers = 1
        
        self.lstmLayer = nn.LSTM(input_size=self.inputSize, 
                                  hidden_size=self.numLSTMNodes,
                                  num_layers=self.numLSTMLayers, 
                                  bias=True, 
                                  dropout=0, 
                                  batch_first=True).double()
        self.hidden1 = nn.Linear(in_features=self.numLSTMNodes, out_features=1000).double()   
        self.hidden2 = nn.Linear(in_features=1000, out_features=1000).double()   
        self.hidden3 = nn.Linear(in_features=1000, out_features=1000).double()   
        self.hidden4 = nn.Linear(in_features=1000, out_features=1000).double()
        self.output = nn.Linear(in_features=1000, out_features=3).double()
        
    def forward(self, modelInput):
        hidden = (
                    torch.cuda.FloatTensor(self.numLSTMLayers , 1, self.numLSTMNodes).normal_().double() ,
                    torch.cuda.FloatTensor(self.numLSTMLayers , 1, self.numLSTMNodes).normal_().double() 
                  )
        for sequence in modelInput:
            out, hidden = self.lstmLayer(sequence.view(1,1,-1), hidden)
        t = F.relu(out)
        t = F.relu(self.hidden1(t))
        t = F.relu(self.hidden2(t))
        t = F.relu(self.hidden3(t))
        t = F.relu(self.hidden4(t))
        t = F.softmax(self.output(t),dim=2)
        return t   

In [None]:
# convert a df to tensor to be used in pytorch
def df_to_tensor(df):
    device = get_device()
    data = torch.from_numpy(df.values)
    data = data.type(torch.float64)
    data = data.to(device)
    return data

In [None]:
# Moving the df to GPU
dfGPU = df_to_tensor(sequenceDataComplete)

In [None]:
# Reshaping
# (batch size, sequence length, num features)
dfGPU = dfGPU.reshape((-1,1,37))

## Model Training

In [None]:
network = DQN()
device = get_device()
network.to(device)

network = network.type(torch.float64)

optimizer = optim.Adam(network.parameters(), lr=0.0001)
    
# Each Episode
for gameID in sequenceDataComplete['gameID'].unique():
    print(gameID)
    gameDataGPU = dfGPU[dfGPU[:,:,0][:,0] == gameID,:,:]
    gameData = sequenceDataComplete[sequenceDataComplete['gameID'] == gameID]
    for sequenceNum in gameData['sequenceNum'].unique():

       
        modelInput = gameDataGPU[gameDataGPU[:,:,1][:,0] == sequenceNum,:,:]
            
        i = 0
        while i <  modelInput.shape[0]:
            # calculate the loss
            loss = custom_loss(modelInput, network, i)
        
            i += 1

#             if sequenceNum > 0:
#                 break
#             print('**********************')


            # zero gradients
            optimizer.zero_grad()

            # perform backprop and update weights
            loss.backward()
            optimizer.step()

        

        

## Compiling Results

In [None]:
GIM = []
# Each Episode
for gameID in sequenceDataComplete['gameID'].unique():
    print(gameID)
    gameDataGPU = dfGPU[dfGPU[:,:,0][:,0] == gameID,:,:]
    gameData = sequenceDataComplete[sequenceDataComplete['gameID'] == gameID]
    for sequenceNum in gameData['sequenceNum'].unique():
        modelInput = gameDataGPU[gameDataGPU[:,:,1][:,0] == sequenceNum,:,:]
        i = 0
        while i <  modelInput.shape[0]:
            if  i == 0:
                Q_t = network(modelInput[0:i+1,:,3:21])
                gim_t = Q_t
#                 print(modelInput[0:i+1,:,3:20])
#                 print(Q_t)
            else:
                Q_tMinusOne = Q_t
                Q_t = network(modelInput[0:i+1,:,3:21])
#                 gim_t = Q_t - Q_tMinusOne
                gim_t = Q_t
            i += 1
            GIM.append([int(modelInput[-1,0,0].item()),# GameID
                        modelInput[-1,0,21].item(), #PlayerID
                        modelInput[-1,0,19].item(), # Away
                        modelInput[-1,0,20].item(), # Home
                        gim_t[0,0,0].item(), # Home Probability
                        gim_t[0,0,1].item(), # Away Probability
                        gim_t[0,0,2].item()]) # Neither Probability
            
#             break
#         break
#     break

       
results = pd.DataFrame(GIM, columns=['gameID', 
                                     'playerID', 
                                     'awayTeam',
                                     'homeTeam', 
                                     'homeProbability',
                                     'awayProbability',
                                     'neitherProbability'])

In [None]:
torch.save(network.state_dict(),'model_all_games.pt')

In [None]:
results['value'] = np.where(results['awayTeam'] == 1, results['awayProbability'], results['homeProbability'])

In [None]:
results.to_csv('deep_rl_results_all_games.csv')

In [None]:
resultsComplete = pd.merge(results, seasons[['seasonID','gameID']], left_on=['gameID'],right_on=['gameID'])

In [None]:
resultsComplete = resultsComplete[['seasonID','playerID','value']].groupby(['seasonID','playerID']).sum('value').reset_index().sort_values('value',ascending=False)

In [None]:
players = pd.read_sql_query("select playerID, firstName, lastName from players", connection)

In [None]:
pd.merge(resultsComplete, players,on='playerID').sort_values('value',ascending=False).to_csv('deep_rl_results_all_games_aggregated.csv')