To show that home advantage exists in the NBA, we'll check the home win percentage, point average at home and away and it's variance since 2004.

In [2]:
from utils import *

win_percentage = (len(games_df[games_df['HOME_TEAM_WINS'] == 1]) * 100 / len(games_df)).__round__(3)
home_points_avg = games_df['PTS_home'].mean()
home_points_var = games_df['PTS_home'].var()
away_points_avg = games_df['PTS_away'].mean()
away_points_var = games_df['PTS_away'].var()
print(f"Home win percentage since 2004: {win_percentage}%")
print(f"Home points average: {home_points_avg.__round__(3)}, var: {home_points_var.__round__(3)}")
print(f"Away points average: {away_points_avg.__round__(3)}, var: {away_points_var.__round__(3)}")

Home win percentage since 2004: 58.703%
Home points average: 103.456, var: 176.448
Away points average: 100.64, var: 180.523


During the 2020-2021 season, most games were capped at 10% capacity at most due to covid. Let's see the home wins percentage for this period.

In [3]:
games_df['GAME_DATE_EST'] = pd.to_datetime(games_df['GAME_DATE_EST'], format='mixed', dayfirst=True)
start_date = '2020-07-22'
end_date = '2021-02-27'

mask = (games_df['GAME_DATE_EST'] >= start_date) & (games_df['GAME_DATE_EST'] <= end_date)
covid_df = games_df[mask]
win_percentage_covid = (len(covid_df[covid_df['HOME_TEAM_WINS'] == 1]) * 100 / len(covid_df)).__round__(3)
home_points_avg_covid = covid_df['PTS_home'].mean()
home_points_var_covid = covid_df['PTS_home'].var()
away_points_avg_covid = covid_df['PTS_away'].mean()
away_points_var_covid = covid_df['PTS_away'].var()
print(f"Home win percentage during Covid: {win_percentage_covid}%")
print(f"Home points average: {home_points_avg_covid.__round__(3)}, var: {home_points_var_covid.__round__(3)}")
print(f"Away points average: {away_points_avg_covid.__round__(3)}, var: {away_points_var_covid.__round__(3)}")

Home win percentage during Covid: 53.222%
Home points average: 112.232, var: 159.923
Away points average: 111.233, var: 153.487


We see a significant decrease, about 5%. 
Let's check players' home and away FT%.

In [4]:
years_dict = {2015: pbp_2015, 2016: pbp_2016, 2017: pbp_2017, 2018: pbp_2018, 2019: pbp_2019, 2020: pbp_2020}
for year, pbp_year in years_dict.items():
    mask_home = pbp_year['HomePlay'].notna()
    home_plays = pbp_year[mask_home]

    mask_ft_home = home_plays['FreeThrowOutcome'].notna()
    home_fts = home_plays[mask_ft_home]

    home_ft_percentage = len(home_fts[home_fts['FreeThrowOutcome'] == 'make']) * 100 / len(home_fts)
    print(f"Home FT% in {year}: {home_ft_percentage.__round__(3)}%")

    mask_away = pbp_year['AwayPlay'].notna()
    away_plays = pbp_year[mask_away]

    mask_ft_away = away_plays['FreeThrowOutcome'].notna()
    away_fts = away_plays[mask_ft_away]

    away_ft_percentage = len(away_fts[away_fts['FreeThrowOutcome'] == 'make']) * 100 / len(away_fts)
    print(f"Away FT% in {year}: {away_ft_percentage.__round__(3)}%")


Home FT% in 2015: 75.875%
Away FT% in 2015: 75.335%
Home FT% in 2016: 77.118%
Away FT% in 2016: 77.311%
Home FT% in 2017: 76.713%
Away FT% in 2017: 76.646%
Home FT% in 2018: 76.81%
Away FT% in 2018: 76.687%
Home FT% in 2019: 77.458%
Away FT% in 2019: 77.353%
Home FT% in 2020: 75.695%
Away FT% in 2020: 76.704%


We see no significant difference in FT% between home and away teams. This could imply that home advantage may be influenced by referees/coaches more than players.
Let's check the difference in FG%.

In [5]:
for year, pbp_year in years_dict.items():
    mask_home = pbp_year['HomePlay'].notna()
    home_plays = pbp_year[mask_home]

    mask_fg_home = home_plays['ShotOutcome'].notna()
    home_fgs = home_plays[mask_fg_home]

    home_fg_percentage = len(home_fgs[home_fgs['ShotOutcome'] == 'make']) * 100 / len(home_fgs)
    print(f"Home FG% in {year}: {home_fg_percentage.__round__(3)}%")

    mask_away = pbp_year['AwayPlay'].notna()
    away_plays = pbp_year[mask_away]

    mask_fg_away = away_plays['ShotOutcome'].notna()
    away_fgs = away_plays[mask_fg_away]

    away_fg_percentage = len(away_fgs[away_fgs['ShotOutcome'] == 'make']) * 100 / len(away_fgs)
    print(f"Away FG% in {year}: {away_fg_percentage.__round__(3)}%")

Home FG% in 2015: 45.7%
Away FG% in 2015: 44.585%
Home FG% in 2016: 46.362%
Away FG% in 2016: 45.157%
Home FG% in 2017: 46.505%
Away FG% in 2017: 45.503%
Home FG% in 2018: 46.454%
Away FG% in 2018: 45.425%
Home FG% in 2019: 46.395%
Away FG% in 2019: 45.581%
Home FG% in 2020: 45.99%
Away FG% in 2020: 46.026%


Next, I'd like to check the number of FGA and FTA for home and away teams. More FTAs for the home team could imply crowd influence over the referee.

In [9]:
for year, pbp_year in years_dict.items():
    if year == 2020:
        continue
    mask_home = pbp_year['HomePlay'].notna()
    home_plays = pbp_year[mask_home]

    mask_ft_home = home_plays['FreeThrowOutcome'].notna()
    home_fts = home_plays[mask_ft_home]

    print(f"Home FTs average in a game in {year}: {(len(home_fts) / 1230).__round__(3)}")
    mask_away = pbp_year['AwayPlay'].notna()
    away_plays = pbp_year[mask_away]

    mask_ft_away = away_plays['FreeThrowOutcome'].notna()
    away_fts = away_plays[mask_ft_away]

    print(f"Away FTs average in a game in {year}: {(len(away_fts) / 1230).__round__(3)}")

Home FTs average in a game in 2015: 25.585
Away FTs average in a game in 2015: 24.432
Home FTs average in a game in 2016: 25.194
Away FTs average in a game in 2016: 24.09
Home FTs average in a game in 2017: 23.468
Away FTs average in a game in 2017: 22.885
Home FTs average in a game in 2018: 25.063
Away FTs average in a game in 2018: 24.328
Home FTs average in a game in 2019: 21.813
Away FTs average in a game in 2019: 21.252


In [7]:
for year, pbp_year in years_dict.items():
    if year == 2020:
        continue
    mask_home = pbp_year['HomePlay'].notna()
    home_plays = pbp_year[mask_home]

    mask_fg_home = home_plays['ShotOutcome'].notna()
    home_fgs = home_plays[mask_fg_home]

    print(f"Home FGs average in a game in {year}: {(len(home_fgs) / 1230).__round__(3)}")
    mask_away = pbp_year['AwayPlay'].notna()
    away_plays = pbp_year[mask_away]

    mask_fg_away = away_plays['ShotOutcome'].notna()
    away_fgs = away_plays[mask_fg_away]

    print(f"Away FGs average in a game in {year}: {(len(away_fgs) / 1230).__round__(3)}")

Home FGs average in a game in 2015: 90.402
Away FGs average in a game in 2015: 90.32
Home FGs average in a game in 2016: 90.628
Away FGs average in a game in 2016: 90.849
Home FGs average in a game in 2017: 91.712
Away FGs average in a game in 2017: 91.595
Home FGs average in a game in 2018: 94.999
Away FGs average in a game in 2018: 94.885
Home FGs average in a game in 2019: 82.266
Away FGs average in a game in 2019: 82.285
