In [12]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

# All game data
gameData = {
    'games': {}
}
    

# Loads a raw CSV file and creates a DF from it
def loadFile(filePath):
    inputDir = "./mens-machine-learning-competition-2018"
    return pd.read_csv(inputDir + "/" + filePath)

def getTeamId(teamName):
    return gameData['teams'][(gameData['teams']['TeamName'] == teamName)]['TeamID'].values[0]

def getTeamName(teamId):
    return gameData['teams'][(gameData['teams']['TeamID'] == teamId)]['TeamName'].values[0]

def getTeamWins(teamId):
    return gameData['annotatedDF'][(gameData['annotatedDF']['WTeamID'] == teamId)]

def getTeamLosses(teamId):
    return gameData['annotatedDF'][(gameData['annotatedDF']['LTeamID'] == teamId)]
    
def getGames(teamId):
    return teamWins(teamId).append(teamLosses(teamId))

def getGamesByYear(teamId, year):
    gamesWon = gameData['annotatedDF'][(gameData['annotatedDF']['Season'] == year) & (gameData['annotatedDF']['WTeamID'] == teamId)]
    gamesLost = gameData['annotatedDF'][(gameData['annotatedDF']['Season'] == year) & (gameData['annotatedDF']['LTeamID'] == teamId)]
    allGames = gamesWon.append(gamesLost)
    teams = gamesWon['LTeamID'].append(gamesLost['WTeamID'])
    games = {
        'wins': gamesWon,
        'losses': gamesLost,
        'full': allGames,
        'opponents': teams.unique()
    }
    return games

def getPointsPerPossession(teamData):
    offensive = teamData['wins'].loc[:,'WPPPOSS'].append(teamData['losses'].loc[:,'LPPPOSS'])
    defensive = teamData['wins'].loc[:,'LPPPOSS'].append(teamData['losses'].loc[:,'WPPPOSS'])
    retVal = {
        'offensive': {
            'ave': offensive.mean(),
            'std': offensive.std()
        },
        'defensive': {
            'ave': defensive.mean(),
            'std': defensive.std()
           
        }
    }
    return retVal

def haveStats(teamId, year):
    return (str(year) in gameData['games']) and (str(teamId) in gameData['games'][str(year)])

def displayStats(teamId, year):
    if haveStats(teamId, year):
        teamName = getTeamName(teamId)
        teamStats = getTeamStats(teamId, year)['ppp']
        offensiveAve = teamStats['offensive']['ave']
        offensiveStd = teamStats['offensive']['std']
        defensiveAve = teamStats['defensive']['ave']
        defensiveStd = teamStats['defensive']['std']

        print(str(year) + ' - ' + teamName + ' (' + str(teamId) + '):')
        print('  Offensive:  ave: ' + str(offensiveAve) + '   std: ' + str(offensiveStd))
        print('  Defensive:  ave: ' + str(defensiveAve) + '   std: ' + str(defensiveStd))
        print('')
    else:
        print('No status for: ' + getTeamName(teamId))
    
def addData(df):
    augmented = df.assign(
        WFGPERC = df.WFGM / df.WFGA, 
        WFGPERC3 = df.WFGM3 / df.WFGA3,
        LFGPERC = df.LFGM / df.LFGA,
        LFGPERC3 = df.LFGM3 / df.LFGA3,
        WORPERC = df.WOR / (df.WFGA + df.WFGA3),
        LORPERC = df.LOR / (df.LFGA + df.LFGA3),
        WPOSS = df.WFGA + df.WFGA3 + df.LTO,
        LPOSS = df.LFGA + df.LFGA3 + df.WTO,
        MOV = df.WScore - df.LScore
    )
    second = augmented.assign(
        WPPPOSS = augmented.WScore / augmented.WPOSS,
        LPPPOSS = augmented.LScore / augmented.LPOSS,
        MOP = augmented.WPOSS - augmented.LPOSS
    )

    finalFrame = second.assign(
        MOPPPOSS = second.WPPPOSS - second.LPPPOSS
    )
    
    return finalFrame

def addTeam(teamId,year):
    if not str(year) in gameData['games']:
        gameData['games'][str(year)] = {}
    gameData['games'][str(year)][str(teamId)] = {}
    
def getTeamStats(teamId,year):
    return gameData['games'][str(year)][str(teamId)]

def addAllGames(teamId,year):
    if not haveStats(teamId, year):
        addTeam(teamId,year)
        team = getTeamStats(teamId, year)
        teamGames = getGamesByYear(teamId, year)
        team['games'] = teamGames
        team['ppp'] = getPointsPerPossession(teamGames)
        addAllOpponents(teamId,year)

def addAllOpponents(teamId,year):
    for oppId in getTeamStats(teamId, year)['games']['opponents']:
        addAllGames(oppId,year)
    
def displayOpponents(teamId, year):
    for opponentId in getTeamStats(teamId, year)['games']['opponents']:
        displayStats(opponentId, year)
    
seasonsFile = "Seasons.csv"
playersFile = "Players_2018.csv"
eventsFile = "Events_2018.csv"
resultsFile = "RegularSeasonCompactResults.csv"
teamsFile= 'Teams.csv'

# Team name/ids map
gameData['teams'] = loadFile(teamsFile)

# Annotate box scores
gameData['annotatedDF'] = addData(loadFile('RegularSeasonDetailedResults.csv'))


In [18]:
addAllGames(getTeamId('Utah'),2017)
foo = findGamesBetweenTeams('Utah', 'Colorado', 2017)
print(foo)

# addAllGames(getTeamId('Utah'), 2016)
# displayStats(getTeamId('Utah'),2017)
# displayStats(getTeamId('Utah'),2016)

# displayStats(getTeamId('UCLA'),2017)
# displayStats(getTeamId('Arizona'),2017)
# displayStats(getTeamId('USC'),2017)
# displayStats(getTeamId('North Carolina'),2017)


       Season  DayNum  WTeamID  WScore  LTeamID  LScore WLoc  NumOT  WFGM  \
73443    2017      62     1428      76     1160      60    H      0    32   
75942    2017     115     1428      86     1160      81    A      0    26   

       WFGA    ...     LFGPERC3   WORPERC   LORPERC  WPOSS  LPOSS  MOV  \
73443    61    ...     0.333333  0.090909  0.120000     92     85   16   
75942    47    ...     0.333333  0.075758  0.144578     76     93    5   

        WPPPOSS   LPPPOSS  MOP  MOPPPOSS  
73443  0.826087  0.705882    7  0.120205  
75942  1.131579  0.870968  -17  0.260611  

[2 rows x 47 columns]
