In [1]:
from __future__ import division
import subprocess

import goldsberry
import pandas as pd

games = goldsberry.GameIDs()
game_logs = pd.DataFrame(games.game_list())

games.SET_parameters(PlayerOrTeam = 'P')
games.GET_raw_data()
player_logs = pd.DataFrame(games.game_list())

game_logs.sort_values('GAME_ID', inplace=True)
game_logs.reset_index(drop=True, inplace=True)

player_logs.sort_values(['GAME_ID', 'PLAYER_ID'], inplace=True)
player_logs.reset_index(drop=True, inplace=True)

game_logs = game_logs.merge(game_logs.groupby(['TEAM_ID', 'GAME_DATE'])['AST', 'FGA', 'FGM', 'PTS'].cumsum(), 
                    left_index = True, right_index = True, suffixes = ('', '_CUM_TOTALS'))

game_logs['GAME_NUMBER'] = game_logs.groupby(['TEAM_ID']).cumcount()+1 

game_logs = game_logs.merge(
    (game_logs
        .groupby(['GAME_DATE'])['AST', 'FGA', 'FGM', 'REB', 'OREB', 'TOV', 'FTA', 'FTM', 'PF', 'PTS']
        .sum()),
    left_on='GAME_DATE', right_index=True, 
    suffixes=('', '_LEAGUE'))

g = game_logs.groupby('GAME_DATE')['TEAM_ID'].nunique()
g.name = 'NUM_TEAMS'
game_logs = game_logs.join(g, on='GAME_DATE')

cum_cols = ['AST_LEAGUE', 'FGA_LEAGUE', 'FGM_LEAGUE', 'REB_LEAGUE', 
            'OREB_LEAGUE', 'TOV_LEAGUE', 'FTA_LEAGUE', 'FTM_LEAGUE', 
            'PF_LEAGUE', 'PTS_LEAGUE', 'NUM_TEAMS']

game_logs = game_logs.merge(game_logs.groupby('GAME_DATE')[cum_cols].cumsum(), 
               left_index=True, right_index=True,
               suffixes = ('', '_CUM'))

game_logs = game_logs.join(game_logs.groupby('GAME_ID')['PTS'].sum(), on='GAME_ID', rsuffix='_GAMETOTAL')

def compute_FTM_PF(row):
    return row['FTM_LEAGUE_CUM']/row['PF_LEAGUE_CUM']

def compute_FTA_PF(row):
    return row['FTA_LEAGUE_CUM']/row['PF_LEAGUE_CUM']

def compute_factor(row):
    return (2/3) - (.5*row['AST_LEAGUE_CUM']/row['FGM_LEAGUE_CUM']/(2*row['FGM_LEAGUE_CUM']/row['FTM_LEAGUE_CUM']))

def compute_VOP(row):
    return row['PTS_LEAGUE_CUM']/(row['FGA_LEAGUE_CUM']-row['OREB_LEAGUE_CUM']+row['TOV_LEAGUE_CUM']+0.44*row['FTA_LEAGUE_CUM'])

def compute_DRB(row):
    return (row['REB_LEAGUE_CUM']-row['OREB_LEAGUE_CUM'])/row['REB_LEAGUE_CUM']

game_logs['FACTOR'] = game_logs.apply(compute_factor, axis = 1)
game_logs['VOP'] = game_logs.apply(compute_VOP, axis = 1)
game_logs['DRB_PCT'] = game_logs.apply(compute_DRB, axis = 1)
game_logs['FTM_PF'] = game_logs.apply(compute_FTM_PF, axis = 1)
game_logs['FTA_PF'] = game_logs.apply(compute_FTA_PF, axis = 1)
game_logs['PACE_ADJUST'] = (2*game_logs.PTS_LEAGUE_CUM/game_logs.NUM_TEAMS_CUM)/game_logs.PTS_GAMETOTAL

def compute_assisted_FG(row):
    return row['AST_TEAM_DAILY']/row['FGM_TEAM_DAILY']

log_cols = ['PLAYER_ID', 'PLAYER_NAME', 'GAME_ID', 'MATCHUP', 'GAME_DATE',
            'MIN', 'FG3M', 'AST', 'FGM', 'FTM',
            'TOV', 'FGA', 'FTA', 'FTM', 'REB', 
            'OREB', 'STL', 'BLK', 'PF', 'TEAM_ABBREVIATION', 'TEAM_NAME']

game_cols = ['GAME_ID', 'MATCHUP','VOP', 'FACTOR', 'DRB_PCT', 'FTM_PF', 'FTA_PF', 'PACE_ADJUST']

player_logs = (player_logs[log_cols]
    .merge(game_logs[game_cols], left_on=['GAME_ID', 'MATCHUP'], right_on=['GAME_ID', 'MATCHUP'])
    .merge(game_logs[['GAME_ID', 'MATCHUP', 'AST', 'FGM']], left_on=['GAME_ID', 'MATCHUP'], right_on=['GAME_ID', 'MATCHUP'], 
           suffixes=('', '_TEAM_DAILY')))

def line_1(row):
    return 1/row['MIN']

def line_2(row):
    return row['FG3M']

def line_3(row):
    return 2/3*row['AST']

def line_4(row):
    return (2 - row['FACTOR'] * compute_assisted_FG(row)) * row['FGM']

def line_5a(row):
    return row['FTM']*.5

def line_5b(row):
    return 1 + (1 - compute_assisted_FG(row))

def line_5c(row):
    return 2/3*compute_assisted_FG(row)

def line_5(row):
    return line_5a(row)*line_5b(row) + line_5c(row)

def line_6(row):
    return row['VOP']*row['TOV']

def line_7(row):
    return row['VOP']*row['DRB_PCT']*(row['FGA'] - row['FGM'])

def line_8(row):
    return row['VOP']*.44*(.44 + (.56*row['DRB_PCT']))*(row['FTA']-row['FTM'])

def line_9(row):
    return row['VOP']*(1 - row['DRB_PCT'])*(row['REB']-row['OREB'])

def line_10(row):
    return row['VOP']*row['DRB_PCT']*row['OREB']

def line_11(row):
    return row['VOP']*row['STL']

def line_12(row):
    return row['VOP']*row['DRB_PCT']*row['BLK']

def line_13(row):
    return row['PF']*(row['FTM_PF'] - .44*row['FTA_PF']*row['VOP'])

def uPER(row):
    uPER = (line_2(row) + 
        line_3(row) + 
        line_4(row) + 
        line_5(row) -
        line_6(row) -
        line_7(row) -
        line_8(row) +
        line_9(row) +
        line_10(row)+
        line_11(row)+
        line_12(row)-
        line_13(row))
    return uPER[0]

player_logs['uPER'] = player_logs.apply(uPER, axis = 1)

player_logs['aPER'] = player_logs.uPER*player_logs.PACE_ADJUST

player_logs['PER'] = 0

player_logs.loc[player_logs.MIN > 0, 'PER'] = player_logs.loc[player_logs.MIN > 0, 'aPER']/player_logs.loc[player_logs.MIN > 0, 'MIN']

player_logs = player_logs.loc[player_logs.MIN >=8].drop(['PER_CUM_SUM', 'PER_CUM_COUNT'], axis=1)

player_logs = (player_logs
    .join(player_logs.sort_values('GAME_DATE').groupby('GAME_DATE').PER.sum().cumsum(), on='GAME_DATE', rsuffix='_CUM_SUM')
    .join(player_logs.sort_values('GAME_DATE').groupby('GAME_DATE').PER.count().cumsum(), on='GAME_DATE', rsuffix='_CUM_COUNT'))

player_logs['AVG_PER'] = player_logs['PER_CUM_SUM']/player_logs['PER_CUM_COUNT']
player_logs['HollingerPER'] = player_logs['PER']*(15/player_logs['AVG_PER'])
player_logs['SEASON_SEGMENT'] = 'Pre All-Star'
player_logs.loc[player_logs['GAME_DATE'] < '2016-02-12', 'SEASON_SEGMENT'] = 'Post All-Star'
player_logs.sort_values('GAME_DATE').reset_index(drop=True, inplace=True)
player_logs['avg_10day'] = player_logs.groupby('PLAYER_ID')['HollingerPER'].apply(pd.rolling_mean, 10, min_periods = 0)
player_logs['avg_7day'] = player_logs.groupby('PLAYER_ID')['HollingerPER'].apply(pd.rolling_mean, 7, min_periods = 0)
player_logs['avg_5day'] = player_logs.groupby('PLAYER_ID')['HollingerPER'].apply(pd.rolling_mean, 5, min_periods = 0)
player_logs['avg_3day'] = player_logs.groupby('PLAYER_ID')['HollingerPER'].apply(pd.rolling_mean, 3, min_periods = 0)
player_logs.to_csv('data/cardinal_PER.csv')

player_logs.groupby(['PLAYER_NAME', 'SEASON_SEGMENT']).HollingerPER.mean().unstack(level=-1).to_csv('data/cardinal_PER-prepost-allstar.csv')

subprocess.check_call(['aws','s3','sync','~/data/','s3://cardinal-advising/paul-robbins', '--acl=public-read']))

'0.8.0.1'