In [5]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

# All game data
gameData = {
    'games': {}
}
    

# Loads a raw CSV file and creates a DF from it
def loadFile(filePath):
    inputDir = "./mens-machine-learning-competition-2018"
    return pd.read_csv(inputDir + "/" + filePath)

def getTeamId(teamName):
    return gameData['teams'][(gameData['teams']['TeamName'] == teamName)]['TeamID'].values[0]

def getTeamName(teamId):
    return gameData['teams'][(gameData['teams']['TeamID'] == teamId)]['TeamName'].values[0]

def getTeamWins(teamId):
    return gameData['annotatedDF'][(gameData['annotatedDF']['WTeamID'] == teamId)]

def getTeamLosses(teamId):
    return gameData['annotatedDF'][(gameData['annotatedDF']['LTeamID'] == teamId)]
    
def getGames(teamId):
    return teamWins(teamId).append(teamLosses(teamId))

def getGamesByYear(teamId, year):
    gamesWon = gameData['annotatedDF'][(gameData['annotatedDF']['Season'] == year) & (gameData['annotatedDF']['WTeamID'] == teamId)]
    gamesLost = gameData['annotatedDF'][(gameData['annotatedDF']['Season'] == year) & (gameData['annotatedDF']['LTeamID'] == teamId)]
    allGames = gamesWon.append(gamesLost)
    teams = gamesWon['LTeamID'].append(gamesLost['WTeamID'])
    games = {
        'wins': gamesWon,
        'losses': gamesLost,
        'full': allGames,
        'opponents': teams.unique()
    }
    return games

def getPointsPerPossession(teamData):
    offensive = teamData['wins'].loc[:,'WPPPOSS'].append(teamData['losses'].loc[:,'LPPPOSS'])
    defensive = teamData['wins'].loc[:,'LPPPOSS'].append(teamData['losses'].loc[:,'WPPPOSS'])
    retVal = {
        'offensive': {
            'ave': offensive.mean(),
            'std': offensive.std()
        },
        'defensive': {
            'ave': defensive.mean(),
            'std': defensive.std()
           
        }
    }
    return retVal

def haveStats(teamId):
    return str(teamId) in gameData['games']

def displayStats(teamId):
    if haveStats(teamId):
        teamName = getTeamName(teamId)
        teamStats = getTeamStats(teamId)['ppp']
        offensiveAve = teamStats['offensive']['ave']
        offensiveStd = teamStats['offensive']['std']
        defensiveAve = teamStats['defensive']['ave']
        defensiveStd = teamStats['defensive']['std']

        print(teamName + ' (' + str(teamId) + '):')
        print('  Offensive:  ave: ' + str(offensiveAve) + '   std: ' + str(offensiveStd))
        print('  Defensive:  ave: ' + str(defensiveAve) + '   std: ' + str(defensiveStd))
        print('')
    else:
        print('No status for: ' + getTeamName(teamId))
    
def addData(df):
    augmented = df.assign(
        WFGPERC = df.WFGM / df.WFGA, 
        WFGPERC3 = df.WFGM3 / df.WFGA3,
        LFGPERC = df.LFGM / df.LFGA,
        LFGPERC3 = df.LFGM3 / df.LFGA3,
        WORPERC = df.WOR / (df.WFGA + df.WFGA3),
        LORPERC = df.LOR / (df.LFGA + df.LFGA3),
        WPOSS = df.WFGA + df.WFGA3 + df.LTO,
        LPOSS = df.LFGA + df.LFGA3 + df.WTO,
        MOV = df.WScore - df.LScore
    )
    second = augmented.assign(
        WPPPOSS = augmented.WScore / augmented.WPOSS,
        LPPPOSS = augmented.LScore / augmented.LPOSS,
        MOP = augmented.WPOSS - augmented.LPOSS
    )

    finalFrame = second.assign(
        MOPPPOSS = second.WPPPOSS - second.LPPPOSS
    )
    
    return finalFrame

def addTeam(teamId):
    gameData['games'][str(teamId)] = {}
    
def getTeamStats(teamId):
    return gameData['games'][str(teamId)]

def addAllGames(teamId):
    if not haveStats(teamId):
        addTeam(teamId)
        team = getTeamStats(teamId)
        teamGames = getGamesByYear(teamId, 2017)
        team['games'] = teamGames
        team['ppp'] = getPointsPerPossession(teamGames)
        addAllOpponents(teamId)

def addAllOpponents(teamId):
    for oppId in getTeamStats(teamId)['games']['opponents']:
        addAllGames(oppId)
    
def dumpTeamStats(teamId):
    for opponentId in getTeamStats(teamId)['games']['opponents']:
        displayStats(opponentId)
    
seasonsFile = "Seasons.csv"
playersFile = "Players_2018.csv"
eventsFile = "Events_2018.csv"
resultsFile = "RegularSeasonCompactResults.csv"
teamsFile= 'Teams.csv'

# Team name/ids map
gameData['teams'] = loadFile(teamsFile)

# Annotate box scores
gameData['annotatedDF'] = addData(loadFile('RegularSeasonDetailedResults.csv'))


In [6]:
def findGames(team1, team2):
    team1Id = getTeamId(team1)
    team2Id = getTeamId(team2)
    
addAllGames(getTeamId('Utah'))

displayStats(getTeamId('Utah'))
displayStats(getTeamId('UCLA'))
displayStats(getTeamId('Arizona'))
displayStats(getTeamId('USC'))


Utah (1428):
  Offensive:  ave: 0.8898025929375443   std: 0.1360295258279601
  Defensive:  ave: 0.7648216348830016   std: 0.11945143221256599

UCLA (1417):
  Offensive:  ave: 0.8918413827305158   std: 0.108748457395003
  Defensive:  ave: 0.7522619174765589   std: 0.12561142224485178

Arizona (1112):
  Offensive:  ave: 0.9109980316189769   std: 0.11150286061617755
  Defensive:  ave: 0.7356656367713568   std: 0.11496998881638619

USC (1425):
  Offensive:  ave: 0.8424689675926142   std: 0.11516208368077571
  Defensive:  ave: 0.7563945555085675   std: 0.10161778615743602

