In [3]:
from IPython.core.display import HTML
HTML("<style>.container { width:90% !important; }</style>")

Py-Goldsberry - Player-Level Box Score Data
===

This tutorial walks through using the py-goldsberry package to collect box score data at the player level.

To get started, we need to import py-goldsberry and we're going to go ahead import pandas so we can explore the data quickly once we have it collected.

In [4]:
from __future__ import division

import goldsberry
import pandas as pd
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100
goldsberry.__version__

'0.8.0.1'

## Getting List of All Games

In [5]:
game_list = goldsberry.GameIDs()

In [6]:
game_list.SET_parameters(Season = '2014-15')

In [7]:
game_list.GET_raw_data()

In [8]:
df_games = pd.DataFrame(game_list.game_list())

In [9]:
df_games.head()

Unnamed: 0,AST,BLK,DREB,FG3A,FG3M,FG3_PCT,FGA,FGM,FG_PCT,FTA,FTM,FT_PCT,GAME_DATE,GAME_ID,MATCHUP,MIN,OREB,PF,PLUS_MINUS,PTS,REB,SEASON_ID,STL,TEAM_ABBREVIATION,TEAM_ID,TEAM_NAME,TOV,VIDEO_AVAILABLE,WL
0,39,6,38,29,11,0.379,109,57,0.523,24,19,0.792,2015-04-10,21401184,DAL @ DEN,290,11,23,1,144,49,22014,8,DAL,1610612742,Dallas Mavericks,17,1,W
1,35,1,30,15,4,0.267,91,56,0.615,24,19,0.792,2015-04-01,21401120,DAL @ OKC,240,4,23,4,135,34,22014,8,DAL,1610612742,Dallas Mavericks,6,1,W
2,31,3,31,20,15,0.75,84,56,0.667,16,12,0.75,2014-11-14,21400128,NOP vs. MIN,240,3,30,48,139,34,22014,13,NOP,1610612740,New Orleans Pelicans,13,1,W
3,29,1,34,43,15,0.349,125,53,0.424,25,22,0.88,2015-04-10,21401184,DEN vs. DAL,290,27,26,-1,143,61,22014,10,DEN,1610612743,Denver Nuggets,15,1,L
4,42,6,23,36,20,0.556,88,53,0.602,7,4,0.571,2015-03-09,21400939,ATL vs. SAC,240,10,18,25,130,33,22014,8,ATL,1610612737,Atlanta Hawks,13,1,W


In [10]:
team_cols = ['TEAM_ID','TEAM_NAME','TEAM_ABBREVIATION','SEASON_ID',
             'GAME_DATE','GAME_ID','MATCHUP','WL',
             'PTS','REB','STL','TOV','AST','BLK','DREB','FG3A','FG3M',
             'FG3_PCT','FGA','FGM','FG_PCT','FTA','FTM','FT_PCT','MIN',
             'OREB','PF','PLUS_MINUS','VIDEO_AVAILABLE']

df_games = df_games[team_cols]

## Getting Player Level Data

In [11]:
player_list = goldsberry.PlayerList()

In [12]:
player_list.SET_parameters(Season = '2014-15')

In [13]:
df_players = pd.DataFrame(player_list.players())

In [14]:
df_players.head()

Unnamed: 0,DISPLAY_FIRST_LAST,DISPLAY_LAST_COMMA_FIRST,FROM_YEAR,GAMES_PLAYED_FLAG,PERSON_ID,PLAYERCODE,ROSTERSTATUS,TEAM_ABBREVIATION,TEAM_CITY,TEAM_CODE,TEAM_ID,TEAM_NAME,TO_YEAR
0,Quincy Acy,"Acy, Quincy",2012,Y,203112,quincy_acy,1,SAC,Sacramento,kings,1610612758,Kings,2015
1,Jordan Adams,"Adams, Jordan",2014,Y,203919,jordan_adams,1,MEM,Memphis,grizzlies,1610612763,Grizzlies,2015
2,Steven Adams,"Adams, Steven",2013,Y,203500,steven_adams,1,OKC,Oklahoma City,thunder,1610612760,Thunder,2015
3,Arron Afflalo,"Afflalo, Arron",2007,Y,201167,arron_afflalo,1,NYK,New York,knicks,1610612752,Knicks,2015
4,Alexis Ajinca,"Ajinca, Alexis",2008,Y,201582,alexis_ajinca,1,NOP,New Orleans,pelicans,1610612740,Pelicans,2015


## Getting game logs for the entire league

Now that we know how to get the game log data for a single person, we can combine that knowledge with information in the `df_players` to loop through the entire league and create a dataset of player-level game logs for the entire league.

To do this, we're going to iterate over the `PERSON_ID` column in our `df_players`. We're going to save the results of each iteration to an ever expanding list, `league_logs`. Once we're done with the loop, we're going to convert it to a dataframe. 

In [15]:
player_log = goldsberry.player.game_logs(203112)

In [16]:
player_log.SET_parameters(Season='2014-15')

In [17]:
league_logs = []
for _ , pid in df_players.PERSON_ID.iteritems():
    player_log.SET_parameters(PlayerID = pid)
    player_log.GET_raw_data()
    league_logs[0:0] = player_log.logs()

df_gamelogs = pd.DataFrame(league_logs)

Because we don't remember all of the Player's names by their ID, we're going to use our `df_players` data frame to append the appropriate name and team to the game log data. Finally, we're going to rearrange the columns because it doesn't make sense to keep them in alphabetical order. The table will be easier to understand if it's in some reasonable order given the nature of the data. 

In [18]:
df_gamelogs = pd.merge(df_gamelogs, df_players.loc[:,['DISPLAY_FIRST_LAST', 'PERSON_ID']], left_on = 'Player_ID', right_on='PERSON_ID')

In [19]:
col_order = ['Player_ID','DISPLAY_FIRST_LAST',
             'SEASON_ID','GAME_DATE','Game_ID','MATCHUP','WL',
             'PTS','REB','STL','TOV','AST','BLK','DREB','FG3A','FG3M',
             'FG3_PCT','FGA','FGM','FG_PCT','FTA','FTM','FT_PCT','MIN',
             'OREB','PF','PLUS_MINUS','VIDEO_AVAILABLE']
df_gamelogs = df_gamelogs[col_order]

# Cumulative totals by team

This number can potentially be computed at the game level or at the season level and reapplied to PER retroactively. 

In [20]:
df_games.loc[df_games.GAME_ID == '0021500391']

Unnamed: 0,TEAM_ID,TEAM_NAME,TEAM_ABBREVIATION,SEASON_ID,GAME_DATE,GAME_ID,MATCHUP,WL,PTS,REB,STL,TOV,AST,BLK,DREB,FG3A,FG3M,FG3_PCT,FGA,FGM,FG_PCT,FTA,FTM,FT_PCT,MIN,OREB,PF,PLUS_MINUS,VIDEO_AVAILABLE


In [21]:
df_games.sort_values('GAME_ID', inplace=True)
df_games.reset_index(drop=True, inplace=True)

In [22]:
df_games = df_games.merge(df_games.groupby(['TEAM_ID', 'GAME_DATE'])['AST', 'FGA', 'FGM', 'PTS'].cumsum(),left_index=True, right_index=True, suffixes=('', '_CUM_TOTALS'))

In [23]:
df_games['GAME_NUMBER'] = df_games.groupby(['TEAM_ID', 'GAME_DATE']).cumcount()+1

In [24]:
df_games.head()

Unnamed: 0,TEAM_ID,TEAM_NAME,TEAM_ABBREVIATION,SEASON_ID,GAME_DATE,GAME_ID,MATCHUP,WL,PTS,REB,STL,TOV,AST,BLK,DREB,FG3A,FG3M,FG3_PCT,FGA,FGM,FG_PCT,FTA,FTM,FT_PCT,MIN,OREB,PF,PLUS_MINUS,VIDEO_AVAILABLE,AST_CUM_TOTALS,FGA_CUM_TOTALS,FGM_CUM_TOTALS,PTS_CUM_TOTALS,GAME_NUMBER
0,1610612753,Orlando Magic,ORL,22014,2014-10-28,21400001,ORL @ NOP,L,84,56,5,18,17,9,40,11,4,0.364,84,32,0.381,21,16,0.762,240,16,25,-17,1,17,84,32,84,1
1,1610612740,New Orleans Pelicans,NOP,22014,2014-10-28,21400001,NOP vs. ORL,W,101,62,10,9,20,17,36,17,4,0.235,101,41,0.406,31,15,0.484,240,26,17,17,1,20,101,41,101,1
2,1610612759,San Antonio Spurs,SAS,22014,2014-10-28,21400002,SAS vs. DAL,W,101,38,5,21,23,3,29,28,14,0.5,70,37,0.529,16,13,0.813,240,9,20,1,1,23,70,37,101,1
3,1610612742,Dallas Mavericks,DAL,22014,2014-10-28,21400002,DAL @ SAS,L,100,33,9,10,17,3,24,21,8,0.381,78,38,0.487,19,16,0.842,240,9,20,-1,1,17,78,38,100,1
4,1610612745,Houston Rockets,HOU,22014,2014-10-28,21400003,HOU @ LAL,W,108,47,7,14,22,3,33,29,12,0.414,73,31,0.425,50,34,0.68,240,14,30,18,1,22,73,31,108,1


In [25]:
df_games = df_games.merge(
    (df_games
        .groupby(['GAME_DATE'])['AST', 'FGA', 'FGM', 'REB', 'OREB', 'TOV', 'FTA', 'FTM', 'PF', 'PTS']
        .sum()),
    left_on='GAME_DATE', right_index=True, 
    suffixes=('', '_LEAGUE'))

In [26]:
g = df_games.groupby('GAME_DATE')['TEAM_ID'].nunique()

In [27]:
g.name = 'NUM_TEAMS'
df_games = df_games.join(g, on='GAME_DATE')

In [28]:
cum_cols = ['AST_LEAGUE', 'FGA_LEAGUE', 'FGM_LEAGUE', 'REB_LEAGUE', 
            'OREB_LEAGUE', 'TOV_LEAGUE', 'FTA_LEAGUE', 'FTM_LEAGUE', 
            'PF_LEAGUE', 'PTS_LEAGUE', 'NUM_TEAMS']

d = df_games.merge(df_games.groupby('GAME_DATE')[cum_cols].cumsum(), 
               left_index=True, right_index=True,
               suffixes = ('', '_CUM'))

In [29]:
d.columns

Index([          u'TEAM_ID',         u'TEAM_NAME', u'TEAM_ABBREVIATION',
               u'SEASON_ID',         u'GAME_DATE',           u'GAME_ID',
                 u'MATCHUP',                u'WL',               u'PTS',
                     u'REB',               u'STL',               u'TOV',
                     u'AST',               u'BLK',              u'DREB',
                    u'FG3A',              u'FG3M',           u'FG3_PCT',
                     u'FGA',               u'FGM',            u'FG_PCT',
                     u'FTA',               u'FTM',            u'FT_PCT',
                     u'MIN',              u'OREB',                u'PF',
              u'PLUS_MINUS',   u'VIDEO_AVAILABLE',    u'AST_CUM_TOTALS',
          u'FGA_CUM_TOTALS',    u'FGM_CUM_TOTALS',    u'PTS_CUM_TOTALS',
             u'GAME_NUMBER',        u'AST_LEAGUE',        u'FGA_LEAGUE',
              u'FGM_LEAGUE',        u'REB_LEAGUE',       u'OREB_LEAGUE',
              u'TOV_LEAGUE',        u'FTA_LEAGUE', 

In [30]:
d.info(verbose=False)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2460 entries, 0 to 2459
Columns: 56 entries, TEAM_ID to TOV_LEAGUE_CUM
dtypes: float64(3), int64(46), object(7)
memory usage: 1.1+ MB


In [31]:
df_players.info(verbose=False)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 473 entries, 0 to 472
Columns: 13 entries, DISPLAY_FIRST_LAST to TO_YEAR
dtypes: int64(3), object(10)
memory usage: 51.7+ KB


In [32]:
df_gamelogs.info(verbose=False)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 23630 entries, 0 to 23629
Columns: 28 entries, Player_ID to VIDEO_AVAILABLE
dtypes: float64(3), int64(19), object(6)
memory usage: 5.2+ MB


## Home and Away PPG

In [33]:
d = d.join(d.groupby('GAME_ID')['PTS'].sum(), on='GAME_ID', rsuffix='_GAMETOTAL')

In [34]:
df_gamelogs.columns

Index([         u'Player_ID', u'DISPLAY_FIRST_LAST',          u'SEASON_ID',
                u'GAME_DATE',            u'Game_ID',            u'MATCHUP',
                       u'WL',                u'PTS',                u'REB',
                      u'STL',                u'TOV',                u'AST',
                      u'BLK',               u'DREB',               u'FG3A',
                     u'FG3M',            u'FG3_PCT',                u'FGA',
                      u'FGM',             u'FG_PCT',                u'FTA',
                      u'FTM',             u'FT_PCT',                u'MIN',
                     u'OREB',                 u'PF',         u'PLUS_MINUS',
          u'VIDEO_AVAILABLE'],
      dtype='object')

In [35]:
d.columns

Index([          u'TEAM_ID',         u'TEAM_NAME', u'TEAM_ABBREVIATION',
               u'SEASON_ID',         u'GAME_DATE',           u'GAME_ID',
                 u'MATCHUP',                u'WL',               u'PTS',
                     u'REB',               u'STL',               u'TOV',
                     u'AST',               u'BLK',              u'DREB',
                    u'FG3A',              u'FG3M',           u'FG3_PCT',
                     u'FGA',               u'FGM',            u'FG_PCT',
                     u'FTA',               u'FTM',            u'FT_PCT',
                     u'MIN',              u'OREB',                u'PF',
              u'PLUS_MINUS',   u'VIDEO_AVAILABLE',    u'AST_CUM_TOTALS',
          u'FGA_CUM_TOTALS',    u'FGM_CUM_TOTALS',    u'PTS_CUM_TOTALS',
             u'GAME_NUMBER',        u'AST_LEAGUE',        u'FGA_LEAGUE',
              u'FGM_LEAGUE',        u'REB_LEAGUE',       u'OREB_LEAGUE',
              u'TOV_LEAGUE',        u'FTA_LEAGUE', 

# Computing PER

    uPER = (1 / MP) *
    [ 3P
    + (2/3) * AST
    + (2 - factor * (team_AST / team_FG)) * FG(FT *0.5 * (1 + (1 - (team_AST / team_FG)) + (2/3) * (team_AST / team_FG))) - PF * ((lg_FT / lg_PF) - 0.44 * (lg_FTA / lg_PF) * VOP) ]
    
    factor = (2 / 3) - (0.5 * (lg_AST / lg_FG)) / (2 * (lg_FG / lg_FT))
    VOP    = lg_PTS / (lg_FGA - lg_ORB + lg_TOV + 0.44 * lg_FTA)
    DRB%   = (lg_TRB - lg_ORB) / lg_TRB
    
Computing PER requires the construction of several league-wide statistics. I'm not sure if the league aggregations are updated as the season progresses or if they are relative to the day the game was played. To this point, I'll compute various possibilities, and use each of them in a separate PER rating.

In [36]:
def compute_FTM_PF(row):
    return row['FTM_LEAGUE_CUM']/row['PF_LEAGUE_CUM']

def compute_FTA_PF(row):
    return row['FTA_LEAGUE_CUM']/row['PF_LEAGUE_CUM']

In [37]:
def compute_factor(row):
    return (2/3) - (.5*row['AST_LEAGUE_CUM']/row['FGM_LEAGUE_CUM']/(2*row['FGM_LEAGUE_CUM']/row['FTM_LEAGUE_CUM']))

In [38]:
def compute_VOP(row):
    return row['PTS_LEAGUE_CUM']/(row['FGA_LEAGUE_CUM']-row['OREB_LEAGUE_CUM']+row['TOV_LEAGUE_CUM']+0.44*row['FTA_LEAGUE_CUM'])

In [39]:
def compute_DRB(row):
    return (row['REB_LEAGUE_CUM']-row['OREB_LEAGUE_CUM'])/row['REB_LEAGUE_CUM']

In [40]:
d['FACTOR'] = d.apply(compute_factor, axis = 1)
d['VOP'] = d.apply(compute_VOP, axis = 1)
d['DRB_PCT'] = d.apply(compute_DRB, axis = 1)
d['FTM_PF'] = d.apply(compute_FTM_PF, axis = 1)
d['FTA_PF'] = d.apply(compute_FTA_PF, axis = 1)

In [41]:
d['PACE_ADJUST'] = (2*d.PTS_LEAGUE_CUM/d.NUM_TEAMS_CUM)/d.PTS_GAMETOTAL

In [42]:
def compute_assisted_FG(row):
    return row['AST_TEAM_DAILY']/row['FGM_TEAM_DAILY']

In [43]:
log_cols = ['Player_ID', 'DISPLAY_FIRST_LAST', 'Game_ID', 'MATCHUP', 'GAME_DATE',
            'MIN', 'FG3M', 'AST', 'FGM', 'FTM',
            'TOV', 'FGA', 'FTA', 'FTM', 'REB', 
            'OREB', 'STL', 'BLK', 'PF']

In [44]:
game_cols = ['GAME_ID', 'MATCHUP','VOP', 'FACTOR', 'DRB_PCT', 'FTM_PF', 'FTA_PF', 'PACE_ADJUST']

In [45]:
df_PER = (df_gamelogs[log_cols]
    .merge(d[game_cols], left_on=['Game_ID', 'MATCHUP'], right_on=['GAME_ID', 'MATCHUP'])
    .merge(d[['GAME_ID', 'MATCHUP', 'AST', 'FGM']], left_on=['Game_ID', 'MATCHUP'], right_on=['GAME_ID', 'MATCHUP'], suffixes=('', '_TEAM_DAILY')))

In [46]:
df_PER.head()

Unnamed: 0,Player_ID,DISPLAY_FIRST_LAST,Game_ID,MATCHUP,GAME_DATE,MIN,FG3M,AST,FGM,FTM,TOV,FGA,FTA,FTM.1,REB,OREB,STL,BLK,PF,GAME_ID,VOP,FACTOR,DRB_PCT,FTM_PF,FTA_PF,PACE_ADJUST,GAME_ID_TEAM_DAILY,AST_TEAM_DAILY,FGM_TEAM_DAILY
0,203092,Tyler Zeller,21401221,BOS @ MIL,"APR 15, 2015",14,0,1,4,0,0,7,0,0,3,2,0,0,1,21401221,1.050759,0.604605,0.745394,0.874296,1.202627,1.016725,21401221,37,42
1,203923,James Young,21401221,BOS @ MIL,"APR 15, 2015",24,1,2,3,2,0,7,4,2,1,0,0,0,3,21401221,1.050759,0.604605,0.745394,0.874296,1.202627,1.016725,21401221,37,42
2,202323,Evan Turner,21401221,BOS @ MIL,"APR 15, 2015",18,0,6,3,1,3,5,2,1,2,0,1,0,1,21401221,1.050759,0.604605,0.745394,0.874296,1.202627,1.016725,21401221,37,42
3,203096,Jared Sullinger,21401221,BOS @ MIL,"APR 15, 2015",28,1,4,5,0,2,14,2,0,4,1,1,3,2,21401221,1.050759,0.604605,0.745394,0.874296,1.202627,1.016725,21401221,37,42
4,203935,Marcus Smart,21401221,BOS @ MIL,"APR 15, 2015",24,2,6,2,1,0,5,2,1,4,1,3,1,1,21401221,1.050759,0.604605,0.745394,0.874296,1.202627,1.016725,21401221,37,42


In [47]:
def line_1(row):
    return 1/row['MIN']

def line_2(row):
    return row['FG3M']

def line_3(row):
    return 2/3*row['AST']

def line_4(row):
    return (2 - row['FACTOR'] * compute_assisted_FG(row)) * row['FGM']

def line_5a(row):
    return row['FTM']*.5

def line_5b(row):
    return 1 + (1 - compute_assisted_FG(row))

def line_5c(row):
    return 2/3*compute_assisted_FG(row)

def line_5(row):
    return line_5a(row)*line_5b(row) + line_5c(row)

def line_6(row):
    return row['VOP']*row['TOV']

def line_7(row):
    return row['VOP']*row['DRB_PCT']*(row['FGA'] - row['FGM'])

def line_8(row):
    return row['VOP']*.44*(.44 + (.56*row['DRB_PCT']))*(row['FTA']-row['FTM'])

def line_9(row):
    return row['VOP']*(1 - row['DRB_PCT'])*(row['REB']-row['OREB'])

def line_10(row):
    return row['VOP']*row['DRB_PCT']*row['OREB']

def line_11(row):
    return row['VOP']*row['STL']

def line_12(row):
    return row['VOP']*row['DRB_PCT']*row['BLK']

def line_13(row):
    return row['PF']*(row['FTM_PF'] - .44*row['FTA_PF']*row['VOP'])

In [48]:
def uPER(row):
    uPER = (line_2(row) + 
        line_3(row) + 
        line_4(row) + 
        line_5(row) -
        line_6(row) -
        line_7(row) -
        line_8(row) +
        line_9(row) +
        line_10(row)+
        line_11(row)+
        line_12(row)-
        line_13(row))
    return uPER[0]

In [49]:
df_PER['uPER'] = df_PER.apply(uPER, axis = 1)

In [50]:
df_PER['aPER'] = df_PER.uPER*df_PER.PACE_ADJUST

In [51]:
df_PER['PER'] = 0

In [52]:
df_PER.loc[df_PER.MIN > 0, 'PER'] = df_PER.loc[df_PER.MIN > 0, 'aPER']/df_PER.loc[df_PER.MIN > 0, 'MIN']

In [61]:
df_PER.head()

Unnamed: 0,Player_ID,DISPLAY_FIRST_LAST,Game_ID,MATCHUP,GAME_DATE,MIN,FG3M,AST,FGM,FTM,TOV,FGA,FTA,FTM.1,REB,OREB,STL,BLK,PF,GAME_ID,VOP,FACTOR,DRB_PCT,FTM_PF,FTA_PF,PACE_ADJUST,GAME_ID_TEAM_DAILY,AST_TEAM_DAILY,FGM_TEAM_DAILY,uPER,aPER,PER
0,203092,Tyler Zeller,21401221,BOS @ MIL,2015-04-15,14,0,1,4,0,0,7,0,0,3,2,0,0,1,21401221,1.050759,0.604605,0.745394,0.874296,1.202627,1.016725,21401221,37,42,6.289475,6.394665,0.456762
1,203923,James Young,21401221,BOS @ MIL,2015-04-15,24,1,2,3,2,0,7,4,2,1,0,0,0,3,21401221,1.050759,0.604605,0.745394,0.874296,1.202627,1.016725,21401221,37,42,3.828739,3.892773,0.162199
2,202323,Evan Turner,21401221,BOS @ MIL,2015-04-15,18,0,6,3,1,3,5,2,1,2,0,1,0,1,21401221,1.050759,0.604605,0.745394,0.874296,1.202627,1.016725,21401221,37,42,5.701329,5.796682,0.322038
3,203096,Jared Sullinger,21401221,BOS @ MIL,2015-04-15,28,1,4,5,0,2,14,2,0,4,1,1,3,2,21401221,1.050759,0.604605,0.745394,0.874296,1.202627,1.016725,21401221,37,42,5.997121,6.097421,0.217765
4,203935,Marcus Smart,21401221,BOS @ MIL,2015-04-15,24,2,6,2,1,0,5,2,1,4,1,3,1,1,21401221,1.050759,0.604605,0.745394,0.874296,1.202627,1.016725,21401221,37,42,12.538511,12.748215,0.531176


In [54]:
df_PER['GAME_DATE'] = pd.to_datetime(df_PER.GAME_DATE)

In [55]:
df_min_filter = df_PER.loc[df_PER.MIN >=8].drop(['PER_CUM_SUM', 'PER_CUM_COUNT'], axis=1)

In [56]:
df_min_filter = (df_min_filter
    .join(df_min_filter.sort_values('GAME_DATE').groupby('GAME_DATE').PER.sum().cumsum(), on='GAME_DATE', rsuffix='_CUM_SUM')
    .join(df_min_filter.sort_values('GAME_DATE').groupby('GAME_DATE').PER.count().cumsum(), on='GAME_DATE', rsuffix='_CUM_COUNT'))

In [57]:
df_min_filter['AVG_PER'] = df_min_filter['PER_CUM_SUM']/df_min_filter['PER_CUM_COUNT']

In [62]:
df_min_filter.head()

Unnamed: 0,Player_ID,DISPLAY_FIRST_LAST,Game_ID,MATCHUP,GAME_DATE,MIN,FG3M,AST,FGM,FTM,TOV,FGA,FTA,FTM.1,REB,OREB,STL,BLK,PF,GAME_ID,VOP,FACTOR,DRB_PCT,FTM_PF,FTA_PF,PACE_ADJUST,GAME_ID_TEAM_DAILY,AST_TEAM_DAILY,FGM_TEAM_DAILY,uPER,aPER,PER,PER_CUM_SUM,PER_CUM_COUNT,AVG_PER,HollingerPER
0,203092,Tyler Zeller,21401221,BOS @ MIL,2015-04-15,14,0,1,4,0,0,7,0,0,3,2,0,0,1,21401221,1.050759,0.604605,0.745394,0.874296,1.202627,1.016725,21401221,37,42,6.289475,6.394665,0.456762,6045.023742,21669,0.278971,24.559634
1,203923,James Young,21401221,BOS @ MIL,2015-04-15,24,1,2,3,2,0,7,4,2,1,0,0,0,3,21401221,1.050759,0.604605,0.745394,0.874296,1.202627,1.016725,21401221,37,42,3.828739,3.892773,0.162199,6045.023742,21669,0.278971,8.721275
2,202323,Evan Turner,21401221,BOS @ MIL,2015-04-15,18,0,6,3,1,3,5,2,1,2,0,1,0,1,21401221,1.050759,0.604605,0.745394,0.874296,1.202627,1.016725,21401221,37,42,5.701329,5.796682,0.322038,6045.023742,21669,0.278971,17.315661
3,203096,Jared Sullinger,21401221,BOS @ MIL,2015-04-15,28,1,4,5,0,2,14,2,0,4,1,1,3,2,21401221,1.050759,0.604605,0.745394,0.874296,1.202627,1.016725,21401221,37,42,5.997121,6.097421,0.217765,6045.023742,21669,0.278971,11.709013
4,203935,Marcus Smart,21401221,BOS @ MIL,2015-04-15,24,2,6,2,1,0,5,2,1,4,1,3,1,1,21401221,1.050759,0.604605,0.745394,0.874296,1.202627,1.016725,21401221,37,42,12.538511,12.748215,0.531176,6045.023742,21669,0.278971,28.560792


In [58]:
df_min_filter['HollingerPER'] = df_min_filter['PER']*(15/df_min_filter['AVG_PER'])

In [59]:
df_min_filter.loc[(df_min_filter.GAME_DATE == '2015-10-27'), ['DISPLAY_FIRST_LAST', 'HollingerPER']].sort_values('HollingerPER', ascending=False).head(15).reset_index()

Unnamed: 0,index,DISPLAY_FIRST_LAST,HollingerPER


In [66]:
!pwd

/home/jovyan/work/_Cardinal/analyzing-basketball-data/py-Goldsberry/docs


In [67]:
df_min_filter.to_csv('2014_holligerPER.csv')