In [1]:
import pandas as pd
from pandas_datareader import data
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

import cufflinks as cf

init_notebook_mode(connected=True)
cf.go_offline()

In [9]:
# grab dataset and alter column types
nba = pd.read_csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/nba-elo/nbaallelo.csv",parse_dates = ["date_game"])
nba[['game_result','team_id','game_location','fran_id']]= nba[['game_result','team_id','game_location','fran_id']].astype('category')
nba['is_playoffs'] = nba['is_playoffs'].astype('bool')

In [30]:
# break out playoffs and regular season
playoffs = nba[nba['is_playoffs']==1]
regular_season = nba[nba['is_playoffs']==0]

In [53]:
reg_season_scoring_avg = regular_season.groupby('year_id').mean()[['pts']]
playoff_scoring_avg = playoffs.groupby('year_id').mean()[['pts']]
reg_vs_playoff_scoring = pd.merge(reg_season_scoring_avg,playoff_scoring_avg,how='inner',on='year_id')
reg_vs_playoff_scoring.columns = [['Regular Season','Playoffs']]
reg_vs_playoff_scoring

Unnamed: 0_level_0,Regular Season,Playoffs
year_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1947,67.761329,70.605263
1948,72.653646,72.195652
1949,80.009722,78.500000
1950,80.044563,80.406250
1951,84.060734,83.115385
...,...,...
2011,99.550407,94.018519
2012,96.259596,92.803571
2013,98.137917,95.176471
2014,101.008943,99.544944


In [32]:
# regular season avg points per game
reg_season_scoring_avg.iplot(title='Average Points Per Game, 1946-2015 - Regular Season',xTitle='Season',yTitle='Scoring Average',colors='blue')

In [33]:
# regular season avg points per game
playoff_scoring_avg.iplot(title='Average Points Per Game, 1947-2015 - Playoffs',xTitle='Season',yTitle='Scoring Average',colors='red')

In [54]:
reg_vs_playoff_scoring.iplot(title='Average Points Per Game, 1947-2015',xTitle='Season',yTitle='Scoring Average',colors=['blue','red'])

In [13]:
# NBA Home win pct, 1946-2015
total_home_record = nba[nba["_iscopy"]==0].groupby(['game_result']).size().sort_values(ascending=False)
home_win_pct = total_home_record[0] / (total_home_record[0]+total_home_record[1]) * 100
home_win_pct = round(home_win_pct,2)
print("The NBA home winning percentage from 1946-2015 was " + str(home_win_pct) + "%")

The NBA home winning percentage from 1946-2015 was 62.24%


In [14]:
# Regular Season Home win pct, 1946-2015
reg_season_home_record = regular_season[regular_season["_iscopy"]==0].groupby(['game_result']).size().sort_values(ascending=False)
reg_season_home_win_pct = reg_season_home_record[0] / (reg_season_home_record[0]+reg_season_home_record[1]) * 100
reg_season_home_win_pct = round(reg_season_home_win_pct,2)
print("The NBA Regular Season home winning percentage from 1946-2015 was " + str(reg_season_home_win_pct) + "%")

The NBA Regular Season home winning percentage from 1946-2015 was 62.01%


In [15]:
# Playoff Home win pct, 1947-2015
playoff_home_record = playoffs[playoffs["_iscopy"]==0].groupby(['game_result']).size().sort_values(ascending=False)
playoff_home_win_pct = playoff_home_record[0] / (playoff_home_record[0]+playoff_home_record[1]) * 100
playoff_home_win_pct = round(playoff_home_win_pct,2)
print("The NBA Playoff home winning percentage from 1947-2015 was " + str(playoff_home_win_pct) + "%")

The NBA Playoff home winning percentage from 1947-2015 was 65.68%


In [82]:
# NBA home record by year - regular season

reg_season_by_year = regular_season[regular_season["_iscopy"]==0].groupby('year_id')
reg_home_games_by_season = reg_season_by_year['game_id'].size()
reg_home_games_by_season = pd.DataFrame(reg_home_games_by_season)
dedupe_reg = regular_season[regular_season["_iscopy"]==0]
reg_home_games_by_season['Home Wins'] = dedupe_reg[dedupe_reg['game_result']=='W'].groupby('year_id').size()
reg_home_games_by_season['Home Winning %'] = reg_home_games_by_season['Home Wins'] / reg_home_games_by_season['game_id'] * 100
# reg_home_games_by_season.columns = [['Total Home Games','Home Wins','Home Winning %']]
reg_home_games_by_season

Unnamed: 0_level_0,game_id,Home Wins,Home Winning %
year_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1947,331,203,61.329305
1948,192,109,56.770833
1949,360,220,61.111111
1950,561,369,65.775401
1951,354,257,72.598870
...,...,...,...
2011,1230,742,60.325203
2012,990,580,58.585859
2013,1229,753,61.269325
2014,1230,714,58.048780


In [76]:
# NBA home record by year - playoffs

playoffs_by_year = playoffs[playoffs["_iscopy"]==0].groupby('year_id')
playoff_games_by_season = playoffs_by_year['game_id'].size()
playoff_games_by_season = pd.DataFrame(playoff_games_by_season)
dedupe_playoffs = playoffs[playoffs["_iscopy"]==0]
playoff_games_by_season['Home Wins'] = dedupe_playoffs[dedupe_playoffs['game_result']=='W'].groupby('year_id').size()
playoff_games_by_season['Home Winning %'] = playoff_games_by_season['Home Wins'] / playoff_games_by_season['game_id'] * 100
playoff_games_by_season

Unnamed: 0_level_0,game_id,Home Wins,Home Winning %
year_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1947,19,13,68.421053
1948,23,15,65.217391
1949,20,15,75.000000
1950,32,23,71.875000
1951,26,21,80.769231
...,...,...,...
2011,81,54,66.666667
2012,84,57,67.857143
2013,85,54,63.529412
2014,89,50,56.179775


In [77]:
playoff_games_by_season['Home Winning %'].iplot(kind='histogram',title='Playoff Win % Distribution', xTitle='% Bins',colors='blue')

In [83]:
reg_home_games_by_season['Home Winning %'].iplot(kind='histogram',title='Regular Season Win % Distribution',xTitle='% Bins',colors='green')