In [1]:
import pandas as pd
from datetime import datetime
import numpy as np
import warnings

In [2]:
#get the data
data = pd.read_html('https://www.basketball-reference.com/leagues/NBA_2019_totals.html')[0]
data.head()

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1,Álex Abrines,SG,25,OKC,31,2,588,56,157,...,0.923,5,43,48,20,17,6,14,53,165
1,2,Quincy Acy,PF,28,PHO,10,0,123,4,18,...,0.7,3,22,25,8,1,4,4,24,17
2,3,Jaylen Adams,PG,22,ATL,34,1,428,38,110,...,0.778,11,49,60,65,14,5,28,45,108
3,4,Steven Adams,C,25,OKC,80,80,2669,481,809,...,0.5,391,369,760,124,117,76,135,204,1108
4,5,Bam Adebayo,C,21,MIA,82,28,1913,280,486,...,0.735,165,432,597,184,71,65,121,203,729


In [3]:
#check to make sure the data loaded in correctly
data.describe()

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
count,734,734,734,734,734,734,734,734,734,734,...,691,734,734,734,734,734,734,734,734,734
unique,531,531,12,23,32,83,81,586,331,469,...,273,154,308,351,246,114,97,184,215,490
top,Rk,Player,SG,23,TOT,G,0,MP,FG,FGA,...,FT%,0,DRB,TRB,0,1,0,0,0,PTS
freq,26,26,176,75,86,26,199,26,26,26,...,26,42,26,26,27,45,90,36,26,26


In [4]:
#check for team names
data.Tm.unique()

array(['OKC', 'PHO', 'ATL', 'MIA', 'CLE', 'DEN', 'SAS', 'CHI', 'UTA',
       'BRK', 'NYK', 'POR', 'MEM', 'TOT', 'IND', 'MIL', 'DAL', 'HOU',
       'Tm', 'TOR', 'WAS', 'ORL', 'CHO', 'SAC', 'LAL', 'MIN', 'BOS',
       'GSW', 'NOP', 'LAC', 'PHI', 'DET'], dtype=object)

In [5]:
#get the proper team names
rockets = data.loc[data.Tm == "HOU"]
bucks = data.loc[data.Tm == 'MIL']
rockets.shape, bucks.shape

((23, 30), (24, 30))

In [6]:
#join the two teams together
rnb = pd.concat([rockets, bucks], axis=0, join='outer', ignore_index = True)
rnb.shape

(47, 30)

In [7]:
#save the files as jsons
rockets.to_json('assets/rockets.json')
bucks.to_json('assets/bucks.json')
rnb.to_json('assets/rnb.json')

In [8]:
data.to_json('assets/nba.json')

In [9]:
#get the scores by month
months = ['october', 'november', 'december', 
          'january', 'february', 'march',
         'april', 'may']

#put all the scores into a dataframe
for month in months:
    if month == 'october':
        month_data = pd.read_html('https://www.basketball-reference.com/leagues/NBA_2019_games-'+month+ '.html')[0]
    else:
        temp = pd.read_html('https://www.basketball-reference.com/leagues/NBA_2019_games-'+month+ '.html')[0]
        month_data = pd.concat([month_data, temp], axis=0, join='outer', ignore_index = True)


In [21]:
#only keep the regular season data, playoffs start at index 1230
regular_season = month_data[:1230]

#helper method to clean
def date_formatter(elt):
    cleaned = elt.replace(',','')
    return datetime.strptime(cleaned, '%a %b %d %Y')

#hehe sorry filter the warning for aesthetic
warnings.filterwarnings('ignore')
regular_season['Date'] = regular_season['Date'].apply(lambda x: date_formatter(x))
#check to make sure method worked
regular_season.head(10)

Unnamed: 0,Date,Start (ET),Visitor/Neutral,PTS,Home/Neutral,PTS.1,Unnamed: 6,Unnamed: 7,Attend.,Notes
0,2018-10-16,8:00p,Philadelphia 76ers,87,Boston Celtics,105,Box Score,,18624,
1,2018-10-16,10:30p,Oklahoma City Thunder,100,Golden State Warriors,108,Box Score,,19596,
2,2018-10-17,7:00p,Milwaukee Bucks,113,Charlotte Hornets,112,Box Score,,17889,
3,2018-10-17,7:00p,Brooklyn Nets,100,Detroit Pistons,103,Box Score,,20332,
4,2018-10-17,7:00p,Memphis Grizzlies,83,Indiana Pacers,111,Box Score,,17923,
5,2018-10-17,7:00p,Miami Heat,101,Orlando Magic,104,Box Score,,19191,
6,2018-10-17,7:30p,Atlanta Hawks,107,New York Knicks,126,Box Score,,18249,
7,2018-10-17,7:30p,Cleveland Cavaliers,104,Toronto Raptors,116,Box Score,,19915,
8,2018-10-17,8:00p,New Orleans Pelicans,131,Houston Rockets,112,Box Score,,18055,
9,2018-10-17,8:30p,Minnesota Timberwolves,108,San Antonio Spurs,112,Box Score,,18354,


In [11]:
#find houston's games
hou_home = regular_season.loc[regular_season['Home/Neutral'] == 'Houston Rockets']
hou_vis = regular_season.loc[regular_season['Visitor/Neutral'] == 'Houston Rockets']
hou = pd.concat([hou_home, hou_vis], axis=0, join='outer', ignore_index = True).sort_values('Date').reset_index(drop = True)

In [12]:
#find milwaukee's games 
mil_home = regular_season.loc[regular_season['Home/Neutral'] == 'Milwaukee Bucks']
mil_vis = regular_season.loc[regular_season['Visitor/Neutral'] == 'Milwaukee Bucks']
mil = pd.concat([mil_home, mil_vis], axis=0, join='outer', ignore_index = True).sort_values('Date').reset_index(drop = True)

In [13]:
#helper method to find the winner of the game
def winner(row):
    if row['PTS'] > row['PTS.1']:
        return row['Visitor/Neutral']
    else:
        return row['Home/Neutral']

In [14]:
mil['outcome'] = mil.apply(winner, axis = 1)
mil['outcome'] = (mil['outcome'] == 'Milwaukee Bucks').astype(int)
mil

wins = pd.Series()
curr_wins = 0
for idx in mil.index.tolist():
    curr_outcome = mil.iloc[idx]['outcome']
    curr_wins+=curr_outcome
    wins.set_value(idx, curr_wins)

mil['win_ct'] = wins
mil.head(10)

Unnamed: 0,Date,Start (ET),Visitor/Neutral,PTS,Home/Neutral,PTS.1,Unnamed: 6,Unnamed: 7,Attend.,Notes,outcome,win_ct
0,2018-10-17,7:00p,Milwaukee Bucks,113,Charlotte Hornets,112,Box Score,,17889,,1,1
1,2018-10-19,8:30p,Indiana Pacers,101,Milwaukee Bucks,118,Box Score,,17341,,1,2
2,2018-10-22,8:00p,New York Knicks,113,Milwaukee Bucks,124,Box Score,,16228,,1,3
3,2018-10-24,9:30p,Philadelphia 76ers,108,Milwaukee Bucks,123,Box Score,,17341,,1,4
4,2018-10-26,8:00p,Milwaukee Bucks,125,Minnesota Timberwolves,95,Box Score,,16334,,1,5
5,2018-10-27,8:30p,Orlando Magic,91,Milwaukee Bucks,113,Box Score,,17341,,1,6
6,2018-10-29,8:00p,Toronto Raptors,109,Milwaukee Bucks,124,Box Score,,17341,,1,7
7,2018-11-01,8:00p,Milwaukee Bucks,113,Boston Celtics,117,Box Score,,18624,,0,7
8,2018-11-04,3:30p,Sacramento Kings,109,Milwaukee Bucks,144,Box Score,,17341,,1,8
9,2018-11-06,10:00p,Milwaukee Bucks,103,Portland Trail Blazers,118,Box Score,,19512,,0,8


In [15]:
hou['outcome'] = hou.apply(winner, axis = 1)
hou['outcome'] = (hou['outcome'] == 'Houston Rockets').astype(int)

wins = pd.Series()
curr_wins = 0
for idx in hou.index.tolist():
    curr_outcome = hou.iloc[idx]['outcome']
    curr_wins+=curr_outcome
    wins.set_value(idx, curr_wins)

hou['win_ct'] = wins
hou.head(10)

Unnamed: 0,Date,Start (ET),Visitor/Neutral,PTS,Home/Neutral,PTS.1,Unnamed: 6,Unnamed: 7,Attend.,Notes,outcome,win_ct
0,2018-10-17,8:00p,New Orleans Pelicans,131,Houston Rockets,112,Box Score,,18055,,0,0
1,2018-10-20,10:30p,Houston Rockets,124,Los Angeles Lakers,115,Box Score,,18997,,1,1
2,2018-10-21,9:00p,Houston Rockets,112,Los Angeles Clippers,115,Box Score,,16149,,0,1
3,2018-10-24,8:00p,Utah Jazz,100,Houston Rockets,89,Box Score,,18055,,0,1
4,2018-10-26,8:00p,Los Angeles Clippers,133,Houston Rockets,113,Box Score,,18055,,0,1
5,2018-10-30,8:00p,Portland Trail Blazers,104,Houston Rockets,85,Box Score,,18055,,0,1
6,2018-11-02,7:30p,Houston Rockets,119,Brooklyn Nets,111,Box Score,,14013,,1,2
7,2018-11-03,8:00p,Houston Rockets,96,Chicago Bulls,88,Box Score,,20505,,1,3
8,2018-11-05,7:00p,Houston Rockets,98,Indiana Pacers,94,Box Score,,14735,,1,4
9,2018-11-08,8:00p,Houston Rockets,80,Oklahoma City Thunder,98,Box Score,,18203,,0,4


In [16]:
to_json = pd.DataFrame()

In [20]:
data = pd.Series([mil.win_ct.values.tolist(), hou.win_ct.values.tolist()])
teams = pd.Series(['Milwaukee Bucks', 'Houston Rockets'])
to_json = pd.DataFrame({'team': teams, 'data': data})
to_json.to_json('season_results.json')

In [22]:
to_json

Unnamed: 0,data,team
0,"[1, 2, 3, 4, 5, 6, 7, 7, 8, 8, 9, 9, 10, 10, 1...",Milwaukee Bucks
1,"[0, 1, 1, 1, 1, 1, 2, 3, 4, 4, 4, 5, 6, 7, 8, ...",Houston Rockets


In [23]:
416160 * 60 * 1000

24969600000

In [24]:
hou

Unnamed: 0,Date,Start (ET),Visitor/Neutral,PTS,Home/Neutral,PTS.1,Unnamed: 6,Unnamed: 7,Attend.,Notes,outcome,win_ct
0,2018-10-17,8:00p,New Orleans Pelicans,131,Houston Rockets,112,Box Score,,18055,,0,0
1,2018-10-20,10:30p,Houston Rockets,124,Los Angeles Lakers,115,Box Score,,18997,,1,1
2,2018-10-21,9:00p,Houston Rockets,112,Los Angeles Clippers,115,Box Score,,16149,,0,1
3,2018-10-24,8:00p,Utah Jazz,100,Houston Rockets,89,Box Score,,18055,,0,1
4,2018-10-26,8:00p,Los Angeles Clippers,133,Houston Rockets,113,Box Score,,18055,,0,1
5,2018-10-30,8:00p,Portland Trail Blazers,104,Houston Rockets,85,Box Score,,18055,,0,1
6,2018-11-02,7:30p,Houston Rockets,119,Brooklyn Nets,111,Box Score,,14013,,1,2
7,2018-11-03,8:00p,Houston Rockets,96,Chicago Bulls,88,Box Score,,20505,,1,3
8,2018-11-05,7:00p,Houston Rockets,98,Indiana Pacers,94,Box Score,,14735,,1,4
9,2018-11-08,8:00p,Houston Rockets,80,Oklahoma City Thunder,98,Box Score,,18203,,0,4


In [31]:
mil.loc[mil['Visitor/Neutral'] == 'Philadelphia 76ers']

Unnamed: 0,Date,Start (ET),Visitor/Neutral,PTS,Home/Neutral,PTS.1,Unnamed: 6,Unnamed: 7,Attend.,Notes,outcome,win_ct
3,2018-10-24,9:30p,Philadelphia 76ers,108,Milwaukee Bucks,123,Box Score,,17341,,1,4
69,2019-03-17,3:30p,Philadelphia 76ers,130,Milwaukee Bucks,125,Box Score,,18148,,0,52


In [30]:
mil

Unnamed: 0,Date,Start (ET),Visitor/Neutral,PTS,Home/Neutral,PTS.1,Unnamed: 6,Unnamed: 7,Attend.,Notes,outcome,win_ct
0,2018-10-17,7:00p,Milwaukee Bucks,113,Charlotte Hornets,112,Box Score,,17889,,1,1
1,2018-10-19,8:30p,Indiana Pacers,101,Milwaukee Bucks,118,Box Score,,17341,,1,2
2,2018-10-22,8:00p,New York Knicks,113,Milwaukee Bucks,124,Box Score,,16228,,1,3
3,2018-10-24,9:30p,Philadelphia 76ers,108,Milwaukee Bucks,123,Box Score,,17341,,1,4
4,2018-10-26,8:00p,Milwaukee Bucks,125,Minnesota Timberwolves,95,Box Score,,16334,,1,5
5,2018-10-27,8:30p,Orlando Magic,91,Milwaukee Bucks,113,Box Score,,17341,,1,6
6,2018-10-29,8:00p,Toronto Raptors,109,Milwaukee Bucks,124,Box Score,,17341,,1,7
7,2018-11-01,8:00p,Milwaukee Bucks,113,Boston Celtics,117,Box Score,,18624,,0,7
8,2018-11-04,3:30p,Sacramento Kings,109,Milwaukee Bucks,144,Box Score,,17341,,1,8
9,2018-11-06,10:00p,Milwaukee Bucks,103,Portland Trail Blazers,118,Box Score,,19512,,0,8


In [34]:
mil.iloc[range(30,60)]

Unnamed: 0,Date,Start (ET),Visitor/Neutral,PTS,Home/Neutral,PTS.1,Unnamed: 6,Unnamed: 7,Attend.,Notes,outcome,win_ct
30,2018-12-21,8:00p,Milwaukee Bucks,120,Boston Celtics,107,Box Score,,18624,,1,22
31,2018-12-22,8:00p,Milwaukee Bucks,87,Miami Heat,94,Box Score,,19600,,0,22
32,2018-12-25,12:00p,Milwaukee Bucks,109,New York Knicks,95,Box Score,,19812,,1,23
33,2018-12-27,8:00p,New York Knicks,96,Milwaukee Bucks,112,Box Score,,18058,,1,24
34,2018-12-29,5:00p,Brooklyn Nets,115,Milwaukee Bucks,129,Box Score,,17918,,1,25
35,2019-01-01,8:00p,Detroit Pistons,98,Milwaukee Bucks,121,Box Score,,17534,,1,26
36,2019-01-04,8:30p,Atlanta Hawks,112,Milwaukee Bucks,144,Box Score,,17632,,1,27
37,2019-01-05,8:30p,Toronto Raptors,123,Milwaukee Bucks,116,Box Score,,18028,,0,27
38,2019-01-07,8:00p,Utah Jazz,102,Milwaukee Bucks,114,Box Score,,17341,,1,28
39,2019-01-09,8:00p,Milwaukee Bucks,116,Houston Rockets,109,Box Score,,18055,,1,29
