In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

# All game data
gameData = {
    'games': {}
}
   
# Loads a raw CSV file and creates a DF from it
def loadFile(filePath):
    inputDir = "./mens-machine-learning-competition-2018"
    return pd.read_csv(inputDir + "/" + filePath)

def getTeamId(teamName):
    return gameData['teams'][(gameData['teams']['TeamName'] == teamName)]['TeamID'].values[0]

def getTeamName(teamId):
    return gameData['teams'][(gameData['teams']['TeamID'] == teamId)]['TeamName'].values[0]

def getTeamWins(teamId):
    return gameData['annotatedDF'][(gameData['annotatedDF']['WTeamID'] == teamId)]

def getTeamLosses(teamId):
    return gameData['annotatedDF'][(gameData['annotatedDF']['LTeamID'] == teamId)]
    
def getGames(teamId):
    return teamWins(teamId).append(teamLosses(teamId))

def getGamesByYear(teamId, year):
    gamesWon = gameData['annotatedDF'][(gameData['annotatedDF']['Season'] == year) & (gameData['annotatedDF']['WTeamID'] == teamId)]
    gamesLost = gameData['annotatedDF'][(gameData['annotatedDF']['Season'] == year) & (gameData['annotatedDF']['LTeamID'] == teamId)]
    allGames = gamesWon.append(gamesLost)
    teams = gamesWon['LTeamID'].append(gamesLost['WTeamID'])
    games = {
        'wins': gamesWon,
        'losses': gamesLost,
        'full': allGames,
        'opponents': teams.unique()
    }
    return games

def haveStats(teamId, year):
    return (str(year) in gameData['games']) and (str(teamId) in gameData['games'][str(year)])

def getPointsPerPossession(teamData):
    offensive = teamData['wins'].loc[:,'WPPPOSS'].append(teamData['losses'].loc[:,'LPPPOSS'])
    defensive = teamData['wins'].loc[:,'LPPPOSS'].append(teamData['losses'].loc[:,'WPPPOSS'])
    retVal = {
        'offensive': {
            'ave': offensive.mean(),
            'std': offensive.std()
        },
        'defensive': {
            'ave': defensive.mean(),
            'std': defensive.std()
           
        }
    }
    return retVal

#  Go through the dataframe and add possession information to the box score
def addData(df):
    augmented = df.assign(
        WFGPERC = df.WFGM / df.WFGA, 
        WFGPERC3 = df.WFGM3 / df.WFGA3,
        LFGPERC = df.LFGM / df.LFGA,
        LFGPERC3 = df.LFGM3 / df.LFGA3,
        WORPERC = df.WOR / (df.WFGA + df.WFGA3),
        LORPERC = df.LOR / (df.LFGA + df.LFGA3),
        WPOSS = df.WFGA + df.WFGA3 + df.LTO,
        LPOSS = df.LFGA + df.LFGA3 + df.WTO,
        MOV = df.WScore - df.LScore
    )
    second = augmented.assign(
        WPPPOSS = augmented.WScore / augmented.WPOSS,
        LPPPOSS = augmented.LScore / augmented.LPOSS,
        MOP = augmented.WPOSS - augmented.LPOSS
    )

    finalFrame = second.assign(
        MOPPPOSS = second.WPPPOSS - second.LPPPOSS
    )
    
    return finalFrame

# Add team to the given year
def addTeam(teamId,year):
    if not str(year) in gameData['games']:
        gameData['games'][str(year)] = {}
    gameData['games'][str(year)][str(teamId)] = {}

# Add the team and add the team's games and opponents for the given year
def addAllGames(team,year):
    teamId=getTeamId(team)
    if not haveStats(teamId, year):
        addTeam(teamId,year)
        team = getTeamStats(teamId, year)
        teamGames = getGamesByYear(teamId, year)
        team['games'] = teamGames
        addAllOpponents(teamId,year)

# Add all the opponents for a team for the given year
def addAllOpponents(teamId,year):
    for oppId in getTeamStats(teamId, year)['games']['opponents']:
        addAllGames(getTeamName(oppId),year)

# Get the stats for the given team/year
def getTeamStats(teamId,year):
    return gameData['games'][str(year)][str(teamId)]

# Return games between two teams in a year        
def findGamesBetweenTeams(team1, team2, year):
    team1Id = getTeamId(team1)
    team2Id = getTeamId(team2)
    return gameData['annotatedDF'][(gameData['annotatedDF']['Season'] == year) & (
        (gameData['annotatedDF']['WTeamID'] == team1Id) & (gameData['annotatedDF']['LTeamID'] == team2Id) | 
        (gameData['annotatedDF']['WTeamID'] == team2Id) & (gameData['annotatedDF']['LTeamID'] == team1Id))
        ]

# ******************************
# Display functions
# ******************************
def displayOpponents(teamId, year):
    for opponentId in getTeamStats(teamId, year)['games']['opponents']:
        displayStats(opponentId, year)

def displayStats(teamId, year):
    if haveStats(teamId, year):
        teamName = getTeamName(teamId)
        teamStats = getTeamStats(teamId, year)
        print(teamStats)
    else:
        print('No status for: ' + getTeamName(teamId))
    
# ******************************
# Main
# ******************************
seasonsFile = "Seasons.csv"
playersFile = "Players_2018.csv"
eventsFile = "Events_2018.csv"
resultsFile = "RegularSeasonCompactResults.csv"
teamsFile= 'Teams.csv'

# Team name/ids map
gameData['teams'] = loadFile(teamsFile)

# Annotate box scores
gameData['annotatedDF'] = addData(loadFile('RegularSeasonDetailedResults.csv'))


In [2]:
def getGameStats(team, year):
    teamId = getTeamId(team)
    data = {}
    data['opponents'] = {}
    if(haveStats(teamId, year)):
        annotated = gameData['annotatedDF'][(gameData['annotatedDF']['Season'] == year)]
        season = annotated[(annotated['Season'] == year)]
        wins = season[(season['WTeamID'] == teamId)]
        losses = season[(season['LTeamID'] == teamId)]
        allOpps = wins.loc[:,'LTeamID'].append(losses.loc[:,'WTeamID']).unique()
        for oppTeam in allOpps:
            # Get all of the opponent's games that weren't against our team
            oppName = getTeamName(oppTeam)
            data['opponents'][oppName] = {}
            
            # Opponent's stats against their opponents
            oppWins = season[(season['WTeamID'] == oppTeam) & (season['LTeamID'] != teamId)]
            oppLoss = season[(season['LTeamID'] == oppTeam) & (season['WTeamID'] != teamId)]
            oppPoss = oppWins.loc[:, 'WPOSS'].append(oppLoss.loc[:, 'LPOSS'])
            oppPPPoss = oppWins.loc[:, 'WPPPOSS'].append(oppLoss.loc[:, 'LPPPOSS'])
            oppPossAve = oppPoss.mean()
            oppPossStd = oppPoss.std()
            oppPPPossAve = oppPPPoss.mean()
            oppPPPossStd = oppPPPoss.std()
            data['opponents'][oppName]['oppPossAve'] = oppPossAve
            data['opponents'][oppName]['oppPossStd'] = oppPossStd
            data['opponents'][oppName]['oppPPPossAve'] = oppPPPossAve
            data['opponents'][oppName]['oppPPPossStd'] = oppPPPossStd
            
            # Opponent's stats vs us
            gamesVsWins = season[(season['WTeamID'] == oppTeam) & (season['LTeamID'] == teamId)]
            gamesVsLoss = season[(season['LTeamID'] == oppTeam) & (season['WTeamID'] == teamId)]
            gameVsPoss = gamesVsWins.loc[:, 'WPOSS'].append(gamesVsLoss.loc[:, 'LPOSS'])
            gameVsPossAve = gameVsPoss.mean()
            gameVsPPPoss = gamesVsWins.loc[:, 'WPPPOSS'].append(gamesVsLoss.loc[:, 'LPPPOSS'])
            gameVsPPPossAve = gameVsPPPoss.mean();
            data['opponents'][oppName]['possAveDelta'] = gameVsPossAve - oppPossAve
            data['opponents'][oppName]['PPPossAveDelta'] = gameVsPPPossAve - oppPPPossAve
            
            # Our stats against all other teams not this one
            teamWins = season[(season['WTeamID'] == teamId) & (season['LTeamID'] != oppTeam)]
            teamLoss = season[(season['LTeamID'] == teamId) & (season['WTeamID'] != oppTeam)]
            teamPoss = teamWins.loc[:, 'WPOSS'].append(teamLoss.loc[:, 'LPOSS'])
            teamPossAve = teamPoss.mean()
            teamPossStd = teamPoss.std()
            teamPPPoss = teamWins.loc[:, 'WPPPOSS'].append(teamLoss.loc[:, 'LPPPOSS'])
            teamPPPossAve = teamPPPoss.mean();
            teamPPPossStd = teamPPPoss.std();
            data['opponents'][oppName]['teamPossAve'] = teamPossAve
            data['opponents'][oppName]['teamPossStd'] = teamPossStd
            data['opponents'][oppName]['teamPPPossAve'] = teamPPPossAve
            data['opponents'][oppName]['teamPPPossStd'] = teamPPPossStd
            
            # our stats against this team
            teamVsWins = season[(season['WTeamID'] == teamId) & (season['LTeamID'] == oppTeam)]
            teamVsLoss = season[(season['LTeamID'] == teamId) & (season['WTeamID'] == oppTeam)]
            teamVsPoss = teamVsWins.loc[:, 'WPOSS'].append(teamVsLoss.loc[:, 'LPOSS'])
            teamVsPossAve = teamVsPoss.mean()
            teamVsPPPoss = teamVsWins.loc[:, 'WPPPOSS'].append(teamVsLoss.loc[:, 'LPPPOSS'])
            teamVsPPPossAve = teamVsPPPoss.mean()
            data['opponents'][oppName]['teamPossAveDelta'] = teamVsPossAve - teamPossAve
            data['opponents'][oppName]['teamPPPossAveDelta'] = teamVsPPPossAve - teamPPPossAve
            
            games = teamVsWins.append(teamVsLoss)
            if teamVsWins.empty:
                gamesWon = 0
            else:
                gamesWon = teamVsWins.shape[0]
            
            if teamVsLoss.empty:
                gamesLost = 0
            else:
                gamesLost = teamVsLoss.shape[0]
            
            data['opponents'][oppName]['numGames'] = gamesWon + gamesLost
            data['opponents'][oppName]['gamesWon'] = gamesWon
            data['opponents'][oppName]['gamesLost'] = gamesLost
#             print('--------------------------------------------------------------------')
#             print(team + ' vs ' + oppName)
#             print('# of games: ' + str(games.shape[0]))
#             print('Record: ' + str(gamesWon) + '-' + str(gamesLost))
#             print('--------------------------------------------------------------------')
#             print(oppName + ' numbers against other teams')
#             print('Possessions Ave    : ' + str(oppPossAve))
#             print('Possessions Std    : ' + str(oppPossStd))
#             print('Points Per Poss Ave: ' + str(oppPPPossAve))
#             print('Points Per Poss Std: ' + str(oppPPPossStd))
#             print('')
#             print(team + ' numbers against other teams')
#             print('Possessions Ave    : ' + str(teamPossAve))
#             print('Possessions Std    : ' + str(teamPossStd))
#             print('Points Per Poss Ave: ' + str(teamPPPossAve))
#             print('Points Per Poss Std: ' + str(teamPPPossStd))
#             print('')
#             print(oppName + ' numbers against ' + team)
#             print('Possessions Ave    : ' + str(gameVsPossAve))
#             print('Points Per Poss Ave: ' + str(gameVsPPPossAve))
            
#             print('')
#             print(team + ' numbers against ' + oppName)
#             print('Possessions Ave    : ' + str(teamVsPossAve))
#             print('Points Per Poss Ave: ' + str(teamVsPPPossAve))
#             print('')
    else:
        print('No status for: ' + getTeamName(teamId))
    return data
        

addAllGames('Utah',2017)
data = getGameStats('Utah', 2017)
print(data['opponents']['Arizona'])


#foo = findGamesBetweenTeams('Utah', 'Colorado', 2017)
#print(foo)

#addAllGames(getTeamId('Utah'), 2017)
#displayStats(getTeamId('Utah'),2017)
# displayStats(getTeamId('Utah'),2016)

# displayStats(getTeamId('UCLA'),2017)
# displayStats(getTeamId('Arizona'),2017)
# displayStats(getTeamId('USC'),2017)
# displayStats(getTeamId('North Carolina'),2017)


{'oppPossAve': 84.24242424242425, 'oppPossStd': 7.9727281365536316, 'oppPPPossAve': 0.9115770055501007, 'oppPPPossStd': 0.11317977886591486, 'possAveDelta': -10.24242424242425, 'PPPossAveDelta': -0.019685113658208797, 'teamPossAve': 88.25, 'teamPossStd': 7.872243785746362, 'teamPPPossAve': 0.8991093468586684, 'teamPPPossStd': 0.12878074537672074, 'teamPossAveDelta': 0.75, 'teamPPPossAveDelta': -0.269895863712601, 'numGames': 1, 'gamesWon': 0, 'gamesLost': 1}
