# Goal: 
Improve on basic boxscore dataset and join in more detailed box score statistics.

# Import libraries

Here, we import the libraries we need in order to perform data exploration and carpentry on our nba box score data. The Sports Reference package allows us to interact with the data. 

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from sportsreference.nba.teams import Teams
from sportsreference.nba.roster import Roster, Player
from sportsreference.nba.boxscore import Boxscore, Boxscores
pd.set_option('display.max_columns', 500)

Read in the nba box scores data from the source folder

In [2]:
boxscores = pd.read_csv('../Data/nba_boxscores.csv')

Let's see what the data looks like for this dataset. The dataset tracks the win and lose scores of NBA games during the NBA season.

In [3]:
boxscores.head()

Unnamed: 0,boxscore,away_name,away_abbr,away_score,home_name,home_abbr,home_score,winning_name,winning_abbr,losing_name,losing_abbr,season
0,201810160BOS,Philadelphia,PHI,87,Boston,BOS,105,Boston,BOS,Philadelphia,PHI,2019
1,201810160GSW,Oklahoma City,OKC,100,Golden State,GSW,108,Golden State,GSW,Oklahoma City,OKC,2019
2,201810170CHO,Milwaukee,MIL,113,Charlotte,CHO,112,Milwaukee,MIL,Charlotte,CHO,2019
3,201810170DET,Brooklyn,BRK,100,Detroit,DET,103,Detroit,DET,Brooklyn,BRK,2019
4,201810170HOU,New Orleans,NOP,131,Houston,HOU,112,New Orleans,NOP,Houston,HOU,2019


We can see from the tail command that the box scores cover NBA games from 1947 to 2019 (by looking at the boxscore variable).

In [5]:
boxscores.tail()

Unnamed: 0,boxscore,away_name,away_abbr,away_score,home_name,home_abbr,home_score,winning_name,winning_abbr,losing_name,losing_abbr,season
68401,194704160PHW,Chicago,CHS,71,Philadelphia,PHW,84,Philadelphia,PHW,Chicago,CHS,1947
68402,194704170PHW,Chicago,CHS,74,Philadelphia,PHW,85,Philadelphia,PHW,Chicago,CHS,1947
68403,194704190CHS,Philadelphia,PHW,75,Chicago,CHS,72,Philadelphia,PHW,Chicago,CHS,1947
68404,194704200CHS,Philadelphia,PHW,73,Chicago,CHS,74,Chicago,CHS,Philadelphia,PHW,1947
68405,194704220PHW,Chicago,CHS,80,Philadelphia,PHW,83,Philadelphia,PHW,Chicago,CHS,1947


In [6]:
advanced_boxscores = pd.DataFrame()

In this for loop, we are attempting ot produce a more detailed boxscore dataset. We combine the results of our win/loss basic box scores with more detailed statistics (offensive/defense performance metrics for example). The way we do this is by using the IDs from the basic boxscore dataset and use those IDs to query the more advanced statistics from the SportsReference package. We decide to pass on three rows due to missing value errors in the SportsReference package. We also write to csv every 10000 rows for performance.

In [7]:
for n in range(len(boxscores)+1):
    print(len(boxscores) - n)
    if n in [42740,43115,43122]:
        pass
    else:
        game = Boxscore(boxscores.boxscore[n])
        box = game.dataframe.reset_index().rename(columns={'index':'boxscore'})
        advanced_boxscores = pd.concat([advanced_boxscores,box])
    if n in range(10000,len(boxscores),10000):
        advanced_boxscores.to_csv('../Data/advanced_boxscores.csv')

68406
68405
68404
68403
68402
68401
68400
68399
68398
68397
68396
68395
68394
68393
68392
68391
68390
68389
68388
68387
68386
68385
68384
68383
68382
68381
68380
68379
68378
68377
68376
68375
68374
68373
68372
68371
68370
68369
68368
68367
68366
68365
68364
68363
68362
68361
68360
68359
68358
68357
68356
68355
68354
68353
68352
68351
68350
68349
68348
68347
68346
68345
68344
68343
68342
68341
68340
68339
68338
68337
68336
68335
68334
68333
68332
68331
68330
68329
68328
68327
68326
68325
68324
68323
68322
68321
68320
68319
68318
68317
68316
68315
68314
68313
68312
68311
68310
68309
68308
68307
68306
68305
68304
68303
68302
68301
68300
68299
68298
68297
68296
68295
68294
68293
68292
68291
68290
68289
68288
68287
68286
68285
68284
68283
68282
68281
68280
68279
68278
68277
68276
68275
68274
68273
68272
68271
68270
68269
68268
68267
68266
68265
68264
68263
68262
68261
68260
68259
68258
68257
68256
68255
68254
68253
68252
68251
68250
68249
68248
68247
68246
68245
68244
68243
68242
68241
6824

67040
67039
67038
67037
67036
67035
67034
67033
67032
67031
67030
67029
67028
67027
67026
67025
67024
67023
67022
67021
67020
67019
67018
67017
67016
67015
67014
67013
67012
67011
67010
67009
67008
67007
67006
67005
67004
67003
67002
67001
67000
66999
66998
66997
66996
66995
66994
66993
66992
66991
66990
66989
66988
66987
66986
66985
66984
66983
66982
66981
66980
66979
66978
66977
66976
66975
66974
66973
66972
66971
66970
66969
66968
66967
66966
66965
66964
66963
66962
66961
66960
66959
66958
66957
66956
66955
66954
66953
66952
66951
66950
66949
66948
66947
66946
66945
66944
66943
66942
66941
66940
66939
66938
66937
66936
66935
66934
66933
66932
66931
66930
66929
66928
66927
66926
66925
66924
66923
66922
66921
66920
66919
66918
66917
66916
66915
66914
66913
66912
66911
66910
66909
66908
66907
66906
66905
66904
66903
66902
66901
66900
66899
66898
66897
66896
66895
66894
66893
66892
66891
66890
66889
66888
66887
66886
66885
66884
66883
66882
66881
66880
66879
66878
66877
66876
66875
6687

65674
65673
65672
65671
65670
65669
65668
65667
65666
65665
65664
65663
65662
65661
65660
65659
65658
65657
65656
65655
65654
65653
65652
65651
65650
65649
65648
65647
65646
65645
65644
65643
65642
65641
65640
65639
65638
65637
65636
65635
65634
65633
65632
65631
65630
65629
65628
65627
65626
65625
65624
65623
65622
65621
65620
65619
65618
65617
65616
65615
65614
65613
65612
65611
65610
65609
65608
65607
65606
65605
65604
65603
65602
65601
65600
65599
65598
65597
65596
65595
65594
65593
65592
65591
65590
65589
65588
65587
65586
65585
65584
65583
65582
65581
65580
65579
65578
65577
65576
65575
65574
65573
65572
65571
65570
65569
65568
65567
65566
65565
65564
65563
65562
65561
65560
65559
65558
65557
65556
65555
65554
65553
65552
65551
65550
65549


KeyboardInterrupt: 

In [13]:
advanced_boxscores.shape

(43605, 83)

Below we will look at the more detailed boxscore dataframe. Notice that we have added more advanced metrics to the original basic box score. Before, we only had info on which teams played and who won versus who lost. Now we have in-depth game metrics that we can investigate. 

In [14]:
advanced_boxscores.head()

Unnamed: 0,boxscore,away_assist_percentage,away_assists,away_block_percentage,away_blocks,away_defensive_rating,away_defensive_rebound_percentage,away_defensive_rebounds,away_effective_field_goal_percentage,away_field_goal_attempts,away_field_goal_percentage,away_field_goals,away_free_throw_attempt_rate,away_free_throw_attempts,away_free_throw_percentage,away_free_throws,away_losses,away_minutes_played,away_offensive_rating,away_offensive_rebound_percentage,away_offensive_rebounds,away_personal_fouls,away_points,away_steal_percentage,away_steals,away_three_point_attempt_rate,away_three_point_field_goal_attempts,away_three_point_field_goal_percentage,away_three_point_field_goals,away_total_rebound_percentage,away_total_rebounds,away_true_shooting_percentage,away_turnover_percentage,away_turnovers,away_two_point_field_goal_attempts,away_two_point_field_goal_percentage,away_two_point_field_goals,away_wins,date,home_assist_percentage,home_assists,home_block_percentage,home_blocks,home_defensive_rating,home_defensive_rebound_percentage,home_defensive_rebounds,home_effective_field_goal_percentage,home_field_goal_attempts,home_field_goal_percentage,home_field_goals,home_free_throw_attempt_rate,home_free_throw_attempts,home_free_throw_percentage,home_free_throws,home_losses,home_minutes_played,home_offensive_rating,home_offensive_rebound_percentage,home_offensive_rebounds,home_personal_fouls,home_points,home_steal_percentage,home_steals,home_three_point_attempt_rate,home_three_point_field_goal_attempts,home_three_point_field_goal_percentage,home_three_point_field_goals,home_total_rebound_percentage,home_total_rebounds,home_true_shooting_percentage,home_turnover_percentage,home_turnovers,home_two_point_field_goal_attempts,home_two_point_field_goal_percentage,home_two_point_field_goals,home_wins,location,losing_abbr,losing_name,pace,winner,winning_abbr,winning_name
0,201810160BOS,52.9,18,8.3,5,100.7,77.4,41,0.42,87,0.391,34,0.264,23,0.609,14,0,240,83.4,12.2,6,20,87,7.7,8,0.299,26,0.192,5,46.1,47,0.448,14.1,16,61,0.475,29,0,"8:00 PM, October 16, 2018",50.0,21,8.2,5,83.4,87.8,43,0.49,97,0.433,42,0.144,14,0.714,10,0,240,100.7,22.6,12,20,105,6.7,7,0.381,37,0.297,11,53.9,55,0.509,11.9,14,60,0.517,31,1,"TD Garden, Boston, Massachusetts",PHI,Philadelphia 76ers,104.3,Home,BOS,Boston Celtics
0,201810160GSW,63.6,21,8.7,6,105.6,63.0,29,0.418,91,0.363,33,0.407,37,0.649,24,1,240,97.7,28.1,16,21,100,11.7,12,0.407,37,0.27,10,43.7,45,0.466,11.5,14,54,0.426,23,0,"10:30 PM, October 16, 2018",66.7,28,13.0,7,97.7,71.9,41,0.479,95,0.442,42,0.189,18,0.944,17,0,240,105.6,37.0,17,29,108,6.8,7,0.274,26,0.269,7,56.3,58,0.525,16.9,21,69,0.507,35,1,"Oracle Arena, Oakland, California",OKC,Oklahoma City Thunder,102.3,Home,GSW,Golden State Warriors
0,201810170CHO,61.9,26,7.4,4,109.2,83.6,46,0.576,85,0.494,42,0.235,20,0.75,15,0,240,110.2,25.6,11,25,113,4.9,5,0.4,34,0.412,14,58.2,57,0.602,18.3,21,51,0.549,28,1,"7:00 PM, October 17, 2018",51.2,21,17.6,9,110.2,74.4,32,0.533,92,0.446,41,0.239,22,0.636,14,1,240,109.2,16.4,9,19,112,7.8,8,0.413,38,0.421,16,41.8,41,0.551,9.8,11,54,0.463,25,0,"Spectrum Center, Charlotte, North Carolina",CHO,Charlotte Hornets,102.6,Away,MIL,Milwaukee Bucks
0,201810170DET,70.0,28,7.4,5,103.0,70.8,34,0.518,82,0.488,40,0.268,22,0.682,15,1,240,100.0,13.5,5,23,100,9.0,9,0.329,27,0.185,5,45.9,39,0.545,15.6,17,55,0.636,35,0,"7:00 PM, October 17, 2018",53.8,21,9.1,5,100.0,86.5,32,0.457,92,0.424,39,0.239,22,0.864,19,0,240,103.0,29.2,14,20,103,5.0,5,0.261,24,0.25,6,54.1,46,0.506,12.1,14,68,0.485,33,1,"Little Caesars Arena, Detroit, Michigan",BRK,Brooklyn Nets,100.0,Home,DET,Detroit Pistons
0,201810170HOU,69.2,36,6.8,3,108.8,83.3,40,0.582,98,0.531,52,0.224,22,0.773,17,0,240,127.2,32.6,14,25,131,7.8,8,0.255,25,0.4,10,59.3,54,0.608,10.0,12,73,0.575,42,1,"8:00 PM, October 17, 2018",53.8,21,9.6,7,127.2,67.4,29,0.511,92,0.424,39,0.261,24,0.75,18,1,240,108.8,16.7,8,22,112,7.8,8,0.522,48,0.333,16,40.7,37,0.546,9.7,11,44,0.523,23,0,"Toyota Center, Houston, Texas",HOU,Houston Rockets,103.0,Away,NOP,New Orleans Pelicans


In [15]:
advanced_boxscores.tail()

Unnamed: 0,boxscore,away_assist_percentage,away_assists,away_block_percentage,away_blocks,away_defensive_rating,away_defensive_rebound_percentage,away_defensive_rebounds,away_effective_field_goal_percentage,away_field_goal_attempts,away_field_goal_percentage,away_field_goals,away_free_throw_attempt_rate,away_free_throw_attempts,away_free_throw_percentage,away_free_throws,away_losses,away_minutes_played,away_offensive_rating,away_offensive_rebound_percentage,away_offensive_rebounds,away_personal_fouls,away_points,away_steal_percentage,away_steals,away_three_point_attempt_rate,away_three_point_field_goal_attempts,away_three_point_field_goal_percentage,away_three_point_field_goals,away_total_rebound_percentage,away_total_rebounds,away_true_shooting_percentage,away_turnover_percentage,away_turnovers,away_two_point_field_goal_attempts,away_two_point_field_goal_percentage,away_two_point_field_goals,away_wins,date,home_assist_percentage,home_assists,home_block_percentage,home_blocks,home_defensive_rating,home_defensive_rebound_percentage,home_defensive_rebounds,home_effective_field_goal_percentage,home_field_goal_attempts,home_field_goal_percentage,home_field_goals,home_free_throw_attempt_rate,home_free_throw_attempts,home_free_throw_percentage,home_free_throws,home_losses,home_minutes_played,home_offensive_rating,home_offensive_rebound_percentage,home_offensive_rebounds,home_personal_fouls,home_points,home_steal_percentage,home_steals,home_three_point_attempt_rate,home_three_point_field_goal_attempts,home_three_point_field_goal_percentage,home_three_point_field_goals,home_total_rebound_percentage,home_total_rebounds,home_true_shooting_percentage,home_turnover_percentage,home_turnovers,home_two_point_field_goal_attempts,home_two_point_field_goal_percentage,home_two_point_field_goals,home_wins,location,losing_abbr,losing_name,pace,winner,winning_abbr,winning_name
0,198405080MIL,52.2,12,2.7,2,97.3,62.5,25.0,0.343,70,0.329,23,0.743,52,0.654,34,3,240,84.9,24.0,12.0,29,82,6.2,6,0.086,6,0.333,2,41.1,37,0.441,16.2,18,64,0.328,21,2,"May 8, 1984",67.6,23,4.7,3,84.9,76.0,38.0,0.437,79,0.43,34,0.494,39,0.641,25,2,240,97.3,37.5,15.0,37,94,9.3,9,0.063,5,0.2,1,58.9,53,0.489,17.2,20,74,0.446,33,3,,NJN,New Jersey Nets,96.6,Home,MIL,Milwaukee Bucks
0,198405080UTA,52.4,22,4.7,4,120.5,60.5,23.0,0.457,94,0.447,42,0.277,26,0.769,20,2,240,108.2,34.0,16.0,25,106,9.2,9,0.053,5,0.4,2,45.9,39,0.503,11.0,13,89,0.449,40,3,"May 8, 1984",59.6,28,11.2,10,108.2,66.0,31.0,0.533,90,0.522,47,0.267,24,0.917,22,3,240,120.5,39.5,15.0,20,118,7.1,7,0.056,5,0.4,2,54.1,46,0.587,13.7,16,85,0.529,45,2,,PHO,Phoenix Suns,97.9,Home,UTA,Utah Jazz
0,198405090BOS,61.5,24,3.8,3,124.2,61.1,22.0,0.5,78,0.5,39,0.423,33,0.636,21,3,240,101.6,39.0,16.0,34,99,5.1,5,0.013,1,0.0,0,49.4,38,0.535,19.2,22,77,0.506,39,2,"May 9, 1984",59.1,26,11.7,9,101.6,61.0,25.0,0.549,81,0.543,44,0.506,41,0.78,32,2,240,124.2,38.9,14.0,31,121,15.4,15,0.012,1,1.0,1,50.6,39,0.611,13.9,16,80,0.537,43,3,,NYK,New York Knicks,97.5,Home,BOS,Boston Celtics
0,198405100NJN,56.8,21,2.7,2,100.6,77.3,34.0,0.425,87,0.425,37,0.356,31,0.774,24,2,240,101.6,45.8,22.0,28,98,8.3,8,0.069,6,0.0,0,60.9,56,0.487,17.3,21,81,0.457,37,4,"May 10, 1984",68.6,24,11.1,9,101.6,54.2,26.0,0.461,76,0.461,35,0.526,40,0.675,27,4,240,100.6,22.7,10.0,27,97,6.2,6,0.013,1,0.0,0,39.1,36,0.518,13.8,15,75,0.467,35,2,,NJN,New Jersey Nets,96.5,Away,MIL,Milwaukee Bucks
0,198405100PHO,61.3,19,6.3,5,,,,0.417,78,0.397,31,0.321,25,0.68,17,4,240,,,,24,82,,5,0.09,7,0.429,3,42.9,36,0.461,16.0,17,71,0.394,28,2,"May 10, 1984",68.3,28,8.5,6,,,,0.513,80,0.513,41,0.288,23,0.87,20,2,240,,,,28,102,,10,0.0,0,,0,57.1,48,0.566,17.4,19,80,0.512,41,4,,UTA,Utah Jazz,,Home,PHO,Phoenix Suns
0,198405110NYK,59.5,25,2.8,2,107.9,72.2,26.0,0.447,94,0.447,42,0.266,25,0.8,20,3,240,105.9,38.3,18.0,30,104,9.2,9,0.0,0,,0,53.0,44,0.495,13.2,16,94,0.447,42,3,"May 11, 1984",59.0,23,5.3,5,105.9,61.7,29.0,0.527,74,0.527,39,0.514,38,0.737,28,3,240,107.9,27.8,10.0,29,106,5.1,5,0.041,3,0.0,0,47.0,39,0.584,17.3,19,71,0.549,39,3,,BOS,Boston Celtics,98.2,Home,NYK,New York Knicks
0,198405120LAL,79.5,31,5.8,5,114.2,66.7,24.0,0.465,85,0.459,39,0.212,18,0.833,15,1,240,97.6,42.5,17.0,22,94,9.3,9,0.024,2,0.5,1,53.9,41,0.506,19.8,23,83,0.458,38,0,"May 12, 1984",67.3,33,3.6,3,97.6,57.5,23.0,0.544,91,0.538,49,0.176,16,0.688,11,0,240,114.2,33.3,12.0,23,110,14.5,14,0.055,5,0.2,1,46.1,35,0.561,14.0,16,86,0.558,48,1,,PHO,Phoenix Suns,96.4,Home,LAL,Los Angeles Lakers
0,198405130BOS,50.0,18,3.5,3,126.6,54.1,20.0,0.456,79,0.456,36,0.468,37,0.865,32,4,240,108.8,29.3,12.0,27,104,7.3,7,0.038,3,0.0,0,41.0,32,0.546,12.0,13,76,0.474,36,3,"May 13, 1984",60.0,27,3.9,3,108.8,70.7,29.0,0.523,88,0.511,45,0.398,35,0.829,29,3,240,126.6,45.9,17.0,29,121,8.4,8,0.034,3,0.667,2,59.0,46,0.585,14.1,17,85,0.506,43,4,,NYK,New York Knicks,95.6,Home,BOS,Boston Celtics
0,198405150BOS,40.5,15,7.5,6,125.6,72.1,31.0,0.416,89,0.416,37,0.303,27,0.815,22,1,240,101.4,32.6,14.0,29,96,6.3,6,0.034,3,0.0,0,52.3,45,0.476,9.8,11,86,0.43,37,0,"May 15, 1984",57.1,24,4.7,4,101.4,67.4,29.0,0.519,81,0.519,42,0.506,41,0.854,35,0,240,125.6,27.9,12.0,26,119,5.3,5,0.012,1,0.0,0,47.7,41,0.601,10.0,11,80,0.525,42,1,,MIL,Milwaukee Bucks,94.7,Home,BOS,Boston Celtics
0,198405150LAL,61.0,25,3.3,3,116.9,70.6,24.0,0.5,84,0.488,41,0.298,25,0.72,18,2,240,101.0,26.1,12.0,16,102,8.9,9,0.036,3,0.667,2,45.0,36,0.537,16.7,19,81,0.481,39,0,"May 15, 1984",69.8,37,4.9,4,101.0,73.9,34.0,0.576,92,0.576,53,0.141,13,0.923,12,0,240,116.9,29.4,10.0,22,118,10.9,11,0.022,2,0.0,0,55.0,44,0.604,14.1,16,90,0.589,53,2,,PHO,Phoenix Suns,101.0,Home,LAL,Los Angeles Lakers


Finally, we would like to write this to a csv in order to have access to it in the future. 

In [16]:
advanced_boxscores.to_csv('../Data/advanced_boxscores.csv',index=False)