In [78]:
import json
from urllib.parse import urlencode
from urllib.request import urlretrieve
import pandas as pd
import numpy as np
import requests
from lxml import html

In [2]:
def get_nba_data(endpt, params, return_url=False):

    ## endpt: https://github.com/seemethere/nba_py/wiki/stats.nba.com-Endpoint-Documentation
    ## params: dictionary of parameters: i.e., {'LeagueID':'00'}

    useragent = "\"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9\""

    dataurl = "\"" + "http://stats.nba.com/stats/" + endpt + "?" + urlencode(params) + "\""
    
    # for debugging: just return the url
    if return_url:
        return(dataurl)
    
    jsonstr = !wget -q -O - --user-agent={useragent} {dataurl}
    
    data = json.loads(jsonstr[0])
    
    h = data['resultSets'][0]['headers']
    d = data['resultSets'][0]['rowSet']
    
    return(pd.DataFrame(d, columns=h))

In [3]:
games = get_nba_data('commonplayoffseries',{'LeagueID':'00','Season':'2017-18'})
games = games.set_index('GAME_ID')

In [4]:
gameIDs = games.index
gameIDs

Index(['0041700101', '0041700102', '0041700103', '0041700104', '0041700105',
       '0041700106', '0041700111', '0041700112', '0041700113', '0041700114',
       '0041700115', '0041700116', '0041700117', '0041700121', '0041700122',
       '0041700123', '0041700124', '0041700125', '0041700131', '0041700132',
       '0041700133', '0041700134', '0041700135', '0041700136', '0041700137',
       '0041700141', '0041700142', '0041700143', '0041700144', '0041700145',
       '0041700151', '0041700152', '0041700153', '0041700154', '0041700155',
       '0041700161', '0041700162', '0041700163', '0041700164', '0041700171',
       '0041700172', '0041700173', '0041700174', '0041700175', '0041700176',
       '0041700201', '0041700202', '0041700203', '0041700204', '0041700211',
       '0041700212', '0041700213', '0041700214', '0041700215', '0041700221',
       '0041700222', '0041700223', '0041700224', '0041700225', '0041700231',
       '0041700232', '0041700233', '0041700234', '0041700235', '0041700301',

In [5]:
game_ID = '0041700101'
params = {'GameID':game_ID,
          'StartPeriod':'0','EndPeriod':'0',
          'StartRange':'0','EndRange':'0',
          'RangeType':'0'
         }
boxScore = get_nba_data('boxscoretraditionalv2',params)

In [6]:
boxScore

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,START_POSITION,COMMENT,MIN,FGM,...,OREB,DREB,REB,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS
0,41700101,1610612764,WAS,Washington,203490,Otto Porter Jr.,F,,31:51,4.0,...,0.0,5.0,5.0,1.0,1.0,1.0,0.0,1.0,9.0,-3.0
1,41700101,1610612764,WAS,Washington,202693,Markieff Morris,F,,37:57,9.0,...,2.0,9.0,11.0,6.0,0.0,0.0,0.0,5.0,22.0,-12.0
2,41700101,1610612764,WAS,Washington,101162,Marcin Gortat,C,,28:30,6.0,...,2.0,4.0,6.0,0.0,0.0,0.0,1.0,3.0,12.0,-6.0
3,41700101,1610612764,WAS,Washington,203078,Bradley Beal,G,,41:14,8.0,...,0.0,2.0,2.0,4.0,3.0,0.0,1.0,1.0,19.0,-10.0
4,41700101,1610612764,WAS,Washington,202322,John Wall,G,,39:09,6.0,...,0.0,3.0,3.0,15.0,4.0,2.0,5.0,3.0,23.0,2.0
5,41700101,1610612764,WAS,Washington,101133,Ian Mahinmi,,,2:36,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,2.0,2.0,-4.0
6,41700101,1610612764,WAS,Washington,203118,Mike Scott,,,26:57,7.0,...,2.0,1.0,3.0,0.0,0.0,0.0,2.0,2.0,14.0,6.0
7,41700101,1610612764,WAS,Washington,1626162,Kelly Oubre Jr.,,,16:09,1.0,...,0.0,5.0,5.0,1.0,1.0,0.0,2.0,3.0,3.0,-5.0
8,41700101,1610612764,WAS,Washington,203107,Tomas Satoransky,,,11:46,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,2.0,-6.0
9,41700101,1610612764,WAS,Washington,204025,Tim Frazier,,,3:51,0.0,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,-2.0


We're going to want to compile stats on a series basis. Going to want PLAYER_ID, SERIES_ID, GAME_ID,GAME_NUM, , box score stats.

In [7]:
game_ID = '0041700101'
game_num = games.GAME_NUM[game_ID]
series_ID = games.SERIES_ID[game_ID]
game_data = games.loc[[game_ID],['SERIES_ID','GAME_NUM']]

BS_Params = {'GameID':game_ID,
          'StartPeriod':'0','EndPeriod':'0',
          'StartRange':'0','EndRange':'0',
          'RangeType':'0'
         }
boxScore = get_nba_data('boxscoretraditionalv2',BS_Params)
boxScore = boxScore[['GAME_ID','PLAYER_ID','MIN','FGM','FGA','FG3M','FG3A','FTM','FTA','FT_PCT','OREB','DREB','AST',
         'STL','BLK','TO','PF','PTS','PLUS_MINUS'
        ]]
combined = pd.merge(game_data,boxScore,left_index=True,right_on='GAME_ID')
combined.set_index(['PLAYER_ID','SERIES_ID','GAME_NUM'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,GAME_ID,MIN,FGM,FGA,FG3M,FG3A,FTM,FTA,FT_PCT,OREB,DREB,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS
PLAYER_ID,SERIES_ID,GAME_NUM,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
203490,4170010,1,41700101,31:51,4.0,7.0,1.0,3.0,0.0,0.0,0.0,0.0,5.0,1.0,1.0,1.0,0.0,1.0,9.0,-3.0
202693,4170010,1,41700101,37:57,9.0,15.0,1.0,4.0,3.0,3.0,1.0,2.0,9.0,6.0,0.0,0.0,0.0,5.0,22.0,-12.0
101162,4170010,1,41700101,28:30,6.0,9.0,0.0,0.0,0.0,0.0,0.0,2.0,4.0,0.0,0.0,0.0,1.0,3.0,12.0,-6.0
203078,4170010,1,41700101,41:14,8.0,17.0,2.0,6.0,1.0,1.0,1.0,0.0,2.0,4.0,3.0,0.0,1.0,1.0,19.0,-10.0
202322,4170010,1,41700101,39:09,6.0,20.0,3.0,5.0,8.0,10.0,0.8,0.0,3.0,15.0,4.0,2.0,5.0,3.0,23.0,2.0
101133,4170010,1,41700101,2:36,0.0,0.0,0.0,0.0,2.0,2.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,2.0,2.0,-4.0
203118,4170010,1,41700101,26:57,7.0,10.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,2.0,2.0,14.0,6.0
1626162,4170010,1,41700101,16:09,1.0,4.0,1.0,2.0,0.0,0.0,0.0,0.0,5.0,1.0,1.0,0.0,2.0,3.0,3.0,-5.0
203107,4170010,1,41700101,11:46,0.0,3.0,0.0,1.0,2.0,2.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,2.0,-6.0
204025,4170010,1,41700101,3:51,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,-2.0


In [8]:
for game_ID in gameIDs:
    game_num = games.GAME_NUM[game_ID]
    series_ID = games.SERIES_ID[game_ID]
    game_data = games.loc[[game_ID],['SERIES_ID','GAME_NUM']]

    BS_Params = {'GameID':game_ID,
              'StartPeriod':'0','EndPeriod':'0',
              'StartRange':'0','EndRange':'0',
              'RangeType':'0'
             }
    boxScore = get_nba_data('boxscoretraditionalv2',BS_Params)
    boxScore = boxScore[['GAME_ID','PLAYER_ID','MIN','FGM','FGA','FG3M','FG3A','FTM','FTA','FT_PCT','OREB','DREB','AST',
             'STL','BLK','TO','PF','PTS','PLUS_MINUS'
            ]]
    newRows = pd.merge(game_data,boxScore,left_index=True,right_on='GAME_ID')
    combined = pd.concat([combined,newRows])
combined = combined.set_index(['PLAYER_ID','SERIES_ID','GAME_ID'])
combined = combined.drop_duplicates()

In [9]:
idx = pd.IndexSlice
combined.loc[idx[2544,:,:],:]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,GAME_NUM,MIN,FGM,FGA,FG3M,FG3A,FTM,FTA,FT_PCT,OREB,DREB,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS
PLAYER_ID,SERIES_ID,GAME_ID,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2544,4170013,41700131,1,43:56,7.0,17.0,0.0,4.0,10.0,14.0,0.714,3.0,7.0,12.0,1.0,1.0,3.0,2.0,24.0,-13.0
2544,4170013,41700132,2,39:53,17.0,24.0,2.0,5.0,10.0,13.0,0.769,1.0,11.0,5.0,2.0,0.0,3.0,1.0,46.0,7.0
2544,4170013,41700133,3,41:41,10.0,22.0,4.0,7.0,4.0,6.0,0.667,3.0,9.0,8.0,1.0,1.0,6.0,1.0,28.0,-1.0
2544,4170013,41700134,4,46:09,12.0,22.0,0.0,5.0,8.0,9.0,0.889,2.0,11.0,7.0,0.0,2.0,1.0,3.0,32.0,3.0
2544,4170013,41700135,5,41:49,14.0,24.0,1.0,4.0,15.0,15.0,1.0,3.0,7.0,8.0,1.0,1.0,5.0,3.0,44.0,2.0
2544,4170013,41700136,6,31:11,7.0,16.0,3.0,6.0,5.0,5.0,1.0,0.0,5.0,7.0,1.0,2.0,4.0,3.0,22.0,-24.0
2544,4170013,41700137,7,43:25,16.0,25.0,2.0,3.0,11.0,15.0,0.733,2.0,6.0,7.0,4.0,0.0,4.0,4.0,45.0,-2.0
2544,4170020,41700201,1,46:56,12.0,30.0,1.0,8.0,1.0,6.0,0.167,0.0,11.0,13.0,1.0,2.0,1.0,2.0,26.0,5.0
2544,4170020,41700202,2,40:30,19.0,28.0,1.0,3.0,4.0,8.0,0.5,1.0,7.0,14.0,1.0,0.0,1.0,1.0,43.0,20.0
2544,4170020,41700203,3,41:16,14.0,26.0,1.0,4.0,9.0,11.0,0.818,1.0,5.0,7.0,3.0,1.0,4.0,1.0,38.0,2.0


In [11]:
minutes = pd.Series(combined['MIN'].str.split(":").str.get(0),dtype=float)
seconds = pd.Series(combined['MIN'].str.split(":").str.get(1),dtype=float)
combined['MIN']=minutes+seconds/60
combined['Percent_Minutes']= np.NAN
def percentMinutes(x):
    x['Percent_Minutes']=x['MIN'].sum()/(x['GAME_NUM'].max()*48)
    return x
    
combined = pd.DataFrame(combined.sort_index().groupby(['PLAYER_ID','SERIES_ID']).apply(percentMinutes))

In [12]:
combined

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,GAME_NUM,MIN,FGM,FGA,FG3M,FG3A,FTM,FTA,FT_PCT,OREB,DREB,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS,Percent_Minutes
PLAYER_ID,SERIES_ID,GAME_ID,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1891,004170011,0041700111,1,18.200000,1.0,1.0,1.0,1.0,0.0,0.0,0.000,0.0,0.0,0.0,1.0,0.0,1.0,2.0,3.0,0.0,0.130109
1891,004170011,0041700112,2,5.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.000,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,-3.0,0.130109
1891,004170011,0041700117,7,20.383333,1.0,4.0,1.0,4.0,0.0,0.0,0.000,1.0,0.0,1.0,0.0,0.0,0.0,0.0,3.0,4.0,0.130109
1938,004170015,0041700151,1,16.700000,3.0,6.0,2.0,2.0,1.0,2.0,0.500,0.0,3.0,1.0,3.0,0.0,3.0,1.0,9.0,-15.0,0.444097
1938,004170015,0041700152,2,22.833333,4.0,9.0,0.0,3.0,2.0,2.0,1.000,0.0,2.0,3.0,2.0,0.0,0.0,3.0,10.0,6.0,0.444097
1938,004170015,0041700153,3,16.950000,0.0,5.0,0.0,4.0,0.0,0.0,0.000,1.0,1.0,0.0,2.0,1.0,1.0,3.0,0.0,-10.0,0.444097
1938,004170015,0041700154,4,25.250000,5.0,10.0,3.0,5.0,3.0,4.0,0.750,0.0,3.0,5.0,0.0,0.0,1.0,3.0,16.0,18.0,0.444097
1938,004170015,0041700155,5,24.850000,3.0,7.0,1.0,4.0,3.0,3.0,1.000,0.0,5.0,7.0,0.0,0.0,2.0,3.0,10.0,-3.0,0.444097
2037,004170014,0041700141,1,26.116667,4.0,11.0,3.0,7.0,4.0,4.0,1.000,0.0,2.0,2.0,0.0,0.0,0.0,0.0,15.0,5.0,0.512639
2037,004170014,0041700142,2,29.116667,3.0,9.0,0.0,2.0,2.0,3.0,0.667,1.0,3.0,2.0,0.0,0.0,0.0,1.0,8.0,-18.0,0.512639


In [231]:
user_agent = "\"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9\""

headers = {'User-Agent':user_agent}
data_url = 'https://widgets.sports-reference.com/wg.fcgi?json=1&site=bbr&url=%2Fplayoffs%2FNBA_2018_advanced.html&div=div_advanced_stats'

page = requests.get(data_url,headers=headers)

In [148]:
tree = html.fromstring(page.text)

In [232]:
str(page.text).split('document.write(')[3][1:(n-4)]

'<table class="sortable stats_table" data-cols-to-freeze="2" id="advanced_stats"><caption>Advanced Table</caption><colgroup><col /><col /><col /><col /><col /><col /><col /><col /><col /><col /><col /><col /><col /><col /><col /><col /><col /><col /><col /><col /><col /><col /><col /><col /><col /><col /><col /><col /><col /></colgroup><thead><tr><th aria-label="Rank" class="ranker poptip sort_default_asc show_partial_when_sorting center" data-stat="ranker" data-tip="Rank" scope="col">Rk</th><th aria-label="Player" class=" poptip sort_default_asc center" data-stat="player" scope="col">Player</th><th aria-label="Position" class=" poptip sort_default_asc center" data-stat="pos" data-tip="Position" scope="col">Pos</th><th aria-label="Age of Player at the start of February 1st of that season." class=" poptip sort_default_asc center" data-stat="age" data-tip="Age of Player at the start of February 1st of that season." scope="col">Age</th><th aria-label="Team" class=" poptip sort_default_asc

In [233]:
data_url = 'https://www.basketball-reference.com/playoffs/NBA_2018_advanced.html'
user_agent = "\"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9\""

headers = {'User-Agent':user_agent}
page = requests.get(data_url,headers=headers)

In [234]:
tree = html.fromstring(page.content)

In [374]:
namesTeam = tree.xpath('//tr[@class="full_table"]/td/a')
namesTeam

[<Element a at 0x7f11087394f8>,
 <Element a at 0x7f1108739638>,
 <Element a at 0x7f11087395e8>,
 <Element a at 0x7f1108739688>,
 <Element a at 0x7f11087396d8>,
 <Element a at 0x7f1108739728>,
 <Element a at 0x7f1108739778>,
 <Element a at 0x7f11087397c8>,
 <Element a at 0x7f1108739818>,
 <Element a at 0x7f1108739868>,
 <Element a at 0x7f11087398b8>,
 <Element a at 0x7f1108739908>,
 <Element a at 0x7f1108739958>,
 <Element a at 0x7f11087399a8>,
 <Element a at 0x7f11087399f8>,
 <Element a at 0x7f1108739a48>,
 <Element a at 0x7f1108739a98>,
 <Element a at 0x7f1108739ae8>,
 <Element a at 0x7f1108739b38>,
 <Element a at 0x7f1108739b88>,
 <Element a at 0x7f1108739bd8>,
 <Element a at 0x7f1108739c28>,
 <Element a at 0x7f1108739c78>,
 <Element a at 0x7f1108739cc8>,
 <Element a at 0x7f1108739d18>,
 <Element a at 0x7f1108739d68>,
 <Element a at 0x7f1108739db8>,
 <Element a at 0x7f1108739e08>,
 <Element a at 0x7f1108739e58>,
 <Element a at 0x7f1108739ea8>,
 <Element a at 0x7f1108739ef8>,
 <Elemen

In [403]:
cols = tree.xpath('//*[@id="advanced_stats"]/thead/tr/th/text()')[1:]
cols = cols[0:18]+cols[19:23]+cols[24:]
fullDfBBallRef = pd.DataFrame(columns=cols)
fullDfBBallRef

for i in range(0,len(namesTeam)//2):
    row = tree.xpath('//tr[@class="full_table"]/td')[28*i:(28*i+28)]
    row = [val.text for val in row]
    row2 = [namesTeam[2*i].text]+row[1:3]+[namesTeam[2*i+1].text]+row[4:18]+row[19:23]+row[24:]
#    print(row2)
    row2 = pd.DataFrame(pd.Series(row2,index=cols)).transpose()
    fullDfBBallRef=pd.concat([fullDfBBallRef,row2])

fullDfBBallRef

Unnamed: 0,Player,Pos,Age,Tm,G,MP,PER,TS%,3PAr,FTr,...,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP
0,Alex Abrines,SG,24,OKC,6,110,9.5,.575,.650,.100,...,4.6,8.7,0.1,0.1,0.2,.107,-1.5,1.6,0.1,0.1
0,Steven Adams,C,24,OKC,6,200,16.0,.609,.000,.283,...,7.2,12.2,0.5,0.2,0.7,.158,0.2,1.5,1.7,0.2
0,Bam Adebayo,C,20,MIA,5,77,6.1,.402,.067,.933,...,8.6,13.0,-0.1,0.0,-0.1,-0.036,-8.4,-2.1,-10.5,-0.2
0,LaMarcus Aldridge,C,32,SAS,5,177,24.8,.599,.063,.525,...,8.4,27.8,0.6,0.1,0.7,.189,1.8,1.8,3.6,0.3
0,Al-Farouq Aminu,PF,27,POR,4,131,21.3,.652,.577,.038,...,10.2,19.2,0.4,0.0,0.4,.152,3.0,0.4,3.4,0.2
0,Justin Anderson,SF,24,PHI,7,33,2.5,.500,.875,.000,...,20.0,12.7,0.0,0.0,0.0,.014,-5.9,-3.9,-9.7,-0.1
0,Kyle Anderson,SF,24,SAS,5,73,17.5,.620,.200,.200,...,12.1,15.5,0.1,0.1,0.2,.130,-0.6,5.1,4.5,0.1
0,Ryan Anderson,PF,29,HOU,11,95,7.0,.475,.750,.000,...,13.0,10.8,0.0,0.1,0.1,.049,-2.6,0.8,-1.7,0.0
0,Giannis Antetokounmpo,PF,23,MIL,7,280,26.6,.620,.116,.455,...,10.5,28.2,1.0,0.4,1.4,.239,4.5,2.2,6.8,0.6
0,Carmelo Anthony,PF,33,OKC,6,194,9.7,.452,.389,.208,...,7.1,19.1,-0.2,0.3,0.1,.018,-5.0,0.2,-4.8,-0.1


In [370]:
fullDfBBallRef

Unnamed: 0,Player,Pos,Age,Tm,G,MP,PER,TS%,3PAr,FTr,...,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP
0,Alex Abrines,SG,24,OKC,6,110,9.5,.575,.650,.100,...,4.6,8.7,0.1,0.1,0.2,.107,-1.5,1.6,0.1,0.1
0,Steven Adams,C,24,OKC,6,200,16.0,.609,.000,.283,...,7.2,12.2,0.5,0.2,0.7,.158,0.2,1.5,1.7,0.2
0,Bam Adebayo,C,20,MIA,5,77,6.1,.402,.067,.933,...,8.6,13.0,-0.1,0.0,-0.1,-0.036,-8.4,-2.1,-10.5,-0.2
0,LaMarcus Aldridge,C,32,SAS,5,177,24.8,.599,.063,.525,...,8.4,27.8,0.6,0.1,0.7,.189,1.8,1.8,3.6,0.3
0,Al-Farouq Aminu,PF,27,POR,4,131,21.3,.652,.577,.038,...,10.2,19.2,0.4,0.0,0.4,.152,3.0,0.4,3.4,0.2
0,Justin Anderson,SF,24,PHI,7,33,2.5,.500,.875,.000,...,20.0,12.7,0.0,0.0,0.0,.014,-5.9,-3.9,-9.7,-0.1
0,Kyle Anderson,SF,24,SAS,5,73,17.5,.620,.200,.200,...,12.1,15.5,0.1,0.1,0.2,.130,-0.6,5.1,4.5,0.1
0,Ryan Anderson,PF,29,HOU,11,95,7.0,.475,.750,.000,...,13.0,10.8,0.0,0.1,0.1,.049,-2.6,0.8,-1.7,0.0
0,Giannis Antetokounmpo,PF,23,MIL,7,280,26.6,.620,.116,.455,...,10.5,28.2,1.0,0.4,1.4,.239,4.5,2.2,6.8,0.6
0,Carmelo Anthony,PF,33,OKC,6,194,9.7,.452,.389,.208,...,7.1,19.1,-0.2,0.3,0.1,.018,-5.0,0.2,-4.8,-0.1


In [342]:
cols = tree.xpath('//*[@id="advanced_stats"]/thead/tr/th/text()')[1:]
cols = cols[0:18]+cols[19:23]+cols[24:]
fullDfBBallRef = pd.DataFrame(columns=cols)
fullDfBBallRef

Unnamed: 0,Player,Pos,Age,Tm,G,MP,PER,TS%,3PAr,FTr,...,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP


In [367]:
row = tree.xpath('//tr[@class="full_table"]/td/text()')[0:24]
row2 = ["Alex Abrines"]+row[0:2]+["OKC"]+row[2:]
row2 = pd.DataFrame(pd.Series(row2,index=cols)).transpose()
pd.concat([fullDfBBallRef,row2])

Unnamed: 0,Player,Pos,Age,Tm,G,MP,PER,TS%,3PAr,FTr,...,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP
0,Alex Abrines,SG,24,OKC,6,110,9.5,0.575,0.65,0.1,...,4.6,8.7,0.1,0.1,0.2,0.107,-1.5,1.6,0.1,0.1


In [390]:
i = 0
row = tree.xpath('//tr[@class="full_table"]/td')[28*i:(28*i+28)]

In [400]:
[val.text for val in row][24:]

['-1.5', '1.6', '0.1', '0.1']