To show that home advantage exists in the NBA, we'll check the home win percentage, point average at home and away and it's variance since 2004.

In [52]:
from utils import *
win_percentage = (len(games_df[games_df['HOME_TEAM_WINS'] == 1]) * 100 / len(games_df)).__round__(3)
home_points_avg = games_df['PTS_home'].mean()
home_points_var = games_df['PTS_home'].var()
away_points_avg = games_df['PTS_away'].mean()
away_points_var = games_df['PTS_away'].var()
print(f"Home win percentage since 2004: {win_percentage}%")
print(f"Home points average: {home_points_avg.__round__(3)}, var: {home_points_var.__round__(3)}")
print(f"Away points average: {away_points_avg.__round__(3)}, var: {away_points_var.__round__(3)}")

Home win percentage since 2004: 58.703%
Home points average: 103.456, var: 176.448
Away points average: 100.64, var: 180.523


During the 2020-2021 season, most games were capped at 10% capacity at most due to covid. Let's see the home wins percentage for this period.

In [53]:
games_df['GAME_DATE_EST'] = pd.to_datetime(games_df['GAME_DATE_EST'], format='mixed', dayfirst=True)
start_date = '2020-07-22'
end_date = '2021-02-27'

mask = (games_df['GAME_DATE_EST'] >= start_date) & (games_df['GAME_DATE_EST'] <= end_date)
covid_df = games_df[mask]
win_percentage_covid = (len(covid_df[covid_df['HOME_TEAM_WINS'] == 1]) * 100 / len(covid_df)).__round__(3)
home_points_avg_covid = covid_df['PTS_home'].mean()
home_points_var_covid = covid_df['PTS_home'].var()
away_points_avg_covid = covid_df['PTS_away'].mean()
away_points_var_covid = covid_df['PTS_away'].var()
print(f"Home win percentage during Covid: {win_percentage_covid}%")
print(f"Home points average: {home_points_avg_covid.__round__(3)}, var: {home_points_var_covid.__round__(3)}")
print(f"Away points average: {away_points_avg_covid.__round__(3)}, var: {away_points_var_covid.__round__(3)}")

Home win percentage during Covid: 53.222%
Home points average: 112.232, var: 159.923
Away points average: 111.233, var: 153.487


We see a significant decrease, about 5%. 
Let's check players' home and away FT%.

In [54]:
years_dict = {2015: pbp_2015, 2016: pbp_2016, 2017: pbp_2017, 2018: pbp_2018, 2019: pbp_2019, 2020: pbp_2020}
for year, pbp_year in years_dict.items():
    mask_home = pbp_year['HomePlay'].notna()
    home_plays = pbp_year[mask_home]

    mask_ft_home = home_plays['FreeThrowOutcome'].notna()
    home_fts = home_plays[mask_ft_home]

    home_ft_percentage = len(home_fts[home_fts['FreeThrowOutcome'] == 'make']) * 100 / len(home_fts)
    print(f"Home FT% in {year}: {home_ft_percentage.__round__(3)}%")

    mask_away = pbp_year['AwayPlay'].notna()
    away_plays = pbp_year[mask_away]

    mask_ft_away = away_plays['FreeThrowOutcome'].notna()
    away_fts = away_plays[mask_ft_away]

    away_ft_percentage = len(away_fts[away_fts['FreeThrowOutcome'] == 'make']) * 100 / len(away_fts)
    print(f"Away FT% in {year}: {away_ft_percentage.__round__(3)}%")

Home FT% in 2015: 75.875%
Away FT% in 2015: 75.335%
Home FT% in 2016: 77.118%
Away FT% in 2016: 77.311%
Home FT% in 2017: 76.713%
Away FT% in 2017: 76.646%
Home FT% in 2018: 76.81%
Away FT% in 2018: 76.687%
Home FT% in 2019: 77.458%
Away FT% in 2019: 77.353%
Home FT% in 2020: 75.695%
Away FT% in 2020: 76.704%


We see no significant difference in FT% between home and away teams. This could imply that home advantage may be influenced by referees/coaches more than players.
Let's check the difference in FG%.

In [55]:
for year, pbp_year in years_dict.items():
    mask_home = pbp_year['HomePlay'].notna()
    home_plays = pbp_year[mask_home]

    mask_fg_home = home_plays['ShotOutcome'].notna()
    home_fgs = home_plays[mask_fg_home]
    
    made_shots = home_fgs[home_fgs['ShotOutcome'] == 'make']
    home_fg_percentage = len(made_shots) * 100 / len(home_fgs)
        
    print(f"Home FG% in {year}: {home_fg_percentage.__round__(3)}%")

    mask_away = pbp_year['AwayPlay'].notna()
    away_plays = pbp_year[mask_away]

    mask_fg_away = away_plays['ShotOutcome'].notna()
    away_fgs = away_plays[mask_fg_away]

    away_fg_percentage = len(away_fgs[away_fgs['ShotOutcome'] == 'make']) * 100 / len(away_fgs)
    print(f"Away FG% in {year}: {away_fg_percentage.__round__(3)}%")

Home FG% in 2015: 45.7%
Away FG% in 2015: 44.585%
Home FG% in 2016: 46.362%
Away FG% in 2016: 45.157%
Home FG% in 2017: 46.505%
Away FG% in 2017: 45.503%
Home FG% in 2018: 46.454%
Away FG% in 2018: 45.425%
Home FG% in 2019: 46.395%
Away FG% in 2019: 45.581%
Home FG% in 2020: 45.99%
Away FG% in 2020: 46.026%


In [56]:
diff_dict = dict()
for year, pbp_year in years_dict.items():
    mask_home = pbp_year['HomePlay'].notna()
    home_plays = pbp_year[mask_home]

    mask_fg_home = home_plays['ShotOutcome'].notna()
    home_fgs = home_plays[mask_fg_home]
    
    home_2pt_fgs = home_fgs[home_fgs['ShotType'].apply(lambda x: x.startswith('2-pt'))]
    made_shots = home_2pt_fgs[home_2pt_fgs['ShotOutcome'] == 'make']
    home_2fg_percentage = len(made_shots) * 100 / len(home_2pt_fgs)
    diff_dict[year] = home_2fg_percentage.__round__(3)
    print(f"Home 2pt FG% in {year}: {home_2fg_percentage.__round__(3)}%")
    
    mask_away = pbp_year['AwayPlay'].notna()
    away_plays = pbp_year[mask_away]
    
    mask_fg_away = away_plays['ShotOutcome'].notna()
    away_fgs = away_plays[mask_fg_away]
    
    away_2pt_fgs = away_fgs[away_fgs['ShotType'].apply(lambda x: x.startswith('2-pt'))]
    made_shots = away_2pt_fgs[away_2pt_fgs['ShotOutcome'] == 'make']
    away_2fg_percentage = len(made_shots) * 100 / len(away_2pt_fgs)
    diff_dict[year] = diff_dict[year] - away_2fg_percentage.__round__(3)
    print(f"Away 2pt FG% in {year}: {away_2fg_percentage.__round__(3)}%")
    
diff = sum(diff_dict.values()) / 6
print(f"Average difference between home and away 2pt FG%: {diff.__round__(3)}")

Home 2pt FG% in 2015: 49.657%
Away 2pt FG% in 2015: 48.447%
Home 2pt FG% in 2016: 50.869%
Away 2pt FG% in 2016: 49.927%
Home 2pt FG% in 2017: 51.859%
Away 2pt FG% in 2017: 50.261%
Home 2pt FG% in 2018: 52.493%
Away 2pt FG% in 2018: 51.207%
Home 2pt FG% in 2019: 52.798%
Away 2pt FG% in 2019: 52.023%
Home 2pt FG% in 2020: 52.085%
Away 2pt FG% in 2020: 52.394%
Average difference between home and away 2pt FG%: 0.917


In [57]:
diff_dict = dict()
for year, pbp_year in years_dict.items():
    mask_home = pbp_year['HomePlay'].notna()
    home_plays = pbp_year[mask_home]

    mask_fg_home = home_plays['ShotOutcome'].notna()
    home_fgs = home_plays[mask_fg_home]
    
    home_3pt_fgs = home_fgs[home_fgs['ShotType'].apply(lambda x: x.startswith('3-pt'))]
    made_shots = home_3pt_fgs[home_3pt_fgs['ShotOutcome'] == 'make']
    home_3fg_percentage = len(made_shots) * 100 / len(home_3pt_fgs)
    diff_dict[year] = home_3fg_percentage.__round__(3)
    print(f"Home 3pt FG% in {year}: {home_3fg_percentage.__round__(3)}%")
    
    mask_away = pbp_year['AwayPlay'].notna()
    away_plays = pbp_year[mask_away]
    
    mask_fg_away = away_plays['ShotOutcome'].notna()
    away_fgs = away_plays[mask_fg_away]
    
    away_3pt_fgs = away_fgs[away_fgs['ShotType'].apply(lambda x: x.startswith('3-pt'))]
    made_shots = away_3pt_fgs[away_3pt_fgs['ShotOutcome'] == 'make']
    away_3fg_percentage = len(made_shots) * 100 / len(away_3pt_fgs)
    diff_dict[year] = diff_dict[year] - away_3fg_percentage.__round__(3)
    print(f"Away 3pt FG% in {year}: {away_3fg_percentage.__round__(3)}%")

diff = sum(diff_dict.values()) / 6
print(f"Average difference between home and away 3pt FG%: {diff.__round__(3)}")

Home 3pt FG% in 2015: 35.785%
Away 3pt FG% in 2015: 35.005%
Home 3pt FG% in 2016: 36.696%
Away 3pt FG% in 2016: 34.914%
Home 3pt FG% in 2017: 36.105%
Away 3pt FG% in 2017: 36.119%
Home 3pt FG% in 2018: 35.774%
Away 3pt FG% in 2018: 35.093%
Home 3pt FG% in 2019: 36.182%
Away 3pt FG% in 2019: 35.457%
Home 3pt FG% in 2020: 36.7%
Away 3pt FG% in 2020: 36.28%
Average difference between home and away 3pt FG%: 0.729


Next, I'd like to check the number of FGA and FTA for home and away teams. More FTAs for the home team could imply crowd influence over the referee.

In [58]:
diff_dict = dict()
for year, pbp_year in years_dict.items():
    num_games = pbp_year['URL'].nunique()
    if year == 2020:
        continue
    mask_home = pbp_year['HomePlay'].notna()
    home_plays = pbp_year[mask_home]

    mask_ft_home = home_plays['FreeThrowOutcome'].notna()
    home_fts = home_plays[mask_ft_home]
    diff_dict[year] = len(home_fts) / num_games
    
    print(f"Home FTs average in a game in {year}: {(len(home_fts) / num_games).__round__(3)}")
    mask_away = pbp_year['AwayPlay'].notna()
    away_plays = pbp_year[mask_away]

    mask_ft_away = away_plays['FreeThrowOutcome'].notna()
    away_fts = away_plays[mask_ft_away]
    diff_dict[year] = diff_dict[year] - (len(away_fts) / num_games)
    print(f"Away FTs average in a game in {year}: {(len(away_fts) / num_games).__round__(3)}")

diff = sum(diff_dict.values()) / 6
print(f"Average difference between home and away FTA: {diff.__round__(3)}")

Home FTs average in a game in 2015: 23.913
Away FTs average in a game in 2015: 22.835
Home FTs average in a game in 2016: 23.674
Away FTs average in a game in 2016: 22.636
Home FTs average in a game in 2017: 22.002
Away FTs average in a game in 2017: 21.455
Home FTs average in a game in 2018: 23.515
Away FTs average in a game in 2018: 22.825
Home FTs average in a game in 2019: 23.473
Away FTs average in a game in 2019: 22.87
Average difference between home and away FTA: 0.659


In [59]:
for year, pbp_year in years_dict.items():
    num_games = pbp_year['URL'].nunique()
    mask_home = pbp_year['HomePlay'].notna()
    home_plays = pbp_year[mask_home]

    mask_fg_home = home_plays['ShotOutcome'].notna()
    home_fgs = home_plays[mask_fg_home]
    diff_dict[year] = len(home_fgs) / num_games

    print(f"Home FGs average in a game in {year}: {(len(home_fgs) / num_games).__round__(3)}")
    mask_away = pbp_year['AwayPlay'].notna()
    away_plays = pbp_year[mask_away]

    mask_fg_away = away_plays['ShotOutcome'].notna()
    away_fgs = away_plays[mask_fg_away]
    diff_dict[year] = diff_dict[year] - (len(away_fgs) / num_games)
    print(f"Away FGs average in a game in {year}: {(len(away_fgs) / num_games).__round__(3)}")

diff = sum(diff_dict.values()) / 6
print(f"Average difference between home and away FGA: {diff.__round__(3)}")

Home FGs average in a game in 2015: 84.495
Away FGs average in a game in 2015: 84.417
Home FGs average in a game in 2016: 85.159
Away FGs average in a game in 2016: 85.366
Home FGs average in a game in 2017: 85.98
Away FGs average in a game in 2017: 85.87
Home FGs average in a game in 2018: 89.13
Away FGs average in a game in 2018: 89.022
Home FGs average in a game in 2019: 88.528
Away FGs average in a game in 2019: 88.548
Home FGs average in a game in 2020: 88.349
Away FGs average in a game in 2020: 89.038
Average difference between home and away FGA: -0.104


In [60]:
diff_dict = dict()
for year, pbp_year in years_dict.items():
    num_games = pbp_year['URL'].nunique()
    mask_home = pbp_year['HomePlay'].notna()
    home_plays = pbp_year[mask_home]

    mask_fg_home = home_plays['ShotOutcome'].notna()
    home_fgs = home_plays[mask_fg_home]
    
    home_2pt_fgs = home_fgs[home_fgs['ShotType'].apply(lambda x: x.startswith('2-pt'))]
    diff_dict[year] = (len(home_2pt_fgs) / num_games).__round__(3)
    print(f"Home 2pt FGA in {year}: {(len(home_2pt_fgs) / num_games).__round__(3)}")
    
    mask_away = pbp_year['AwayPlay'].notna()
    away_plays = pbp_year[mask_away]
    
    mask_fg_away = away_plays['ShotOutcome'].notna()
    away_fgs = away_plays[mask_fg_away]
    
    away_2pt_fgs = away_fgs[away_fgs['ShotType'].apply(lambda x: x.startswith('2-pt'))]
    diff_dict[year] = diff_dict[year] - (len(away_2pt_fgs) / num_games).__round__(3)
    print(f"Away 2pt FGA in {year}: {(len(away_2pt_fgs) / num_games).__round__(3)}")
    
diff = sum(diff_dict.values()) / 6
print(f"Average difference between home and away 2pt FGA: {diff.__round__(3)}")

Home 2pt FGA in 2015: 60.394
Away 2pt FGA in 2015: 60.163
Home 2pt FGA in 2016: 58.076
Away 2pt FGA in 2016: 58.241
Home 2pt FGA in 2017: 56.759
Away 2pt FGA in 2017: 56.982
Home 2pt FGA in 2018: 56.935
Away 2pt FGA in 2018: 57.083
Home 2pt FGA in 2019: 54.416
Away 2pt FGA in 2019: 54.116
Home 2pt FGA in 2020: 53.344
Away 2pt FGA in 2020: 53.852
Average difference between home and away 2pt FGA: -0.085


In [61]:
diff_dict = dict()
for year, pbp_year in years_dict.items():
    num_games = pbp_year['URL'].nunique()
    mask_home = pbp_year['HomePlay'].notna()
    home_plays = pbp_year[mask_home]

    mask_fg_home = home_plays['ShotOutcome'].notna()
    home_fgs = home_plays[mask_fg_home]
    
    home_3pt_fgs = home_fgs[home_fgs['ShotType'].apply(lambda x: x.startswith('3-pt'))]
    diff_dict[year] = (len(home_3pt_fgs) / num_games).__round__(3)
    print(f"Home 3pt FGA in {year}: {(len(home_3pt_fgs) / num_games).__round__(3)}")
    
    mask_away = pbp_year['AwayPlay'].notna()
    away_plays = pbp_year[mask_away]
    
    mask_fg_away = away_plays['ShotOutcome'].notna()
    away_fgs = away_plays[mask_fg_away]
    
    away_3pt_fgs = away_fgs[away_fgs['ShotType'].apply(lambda x: x.startswith('3-pt'))]
    diff_dict[year] = diff_dict[year] - (len(away_3pt_fgs) / num_games).__round__(3)
    print(f"Away 3pt FGA in {year}: {(len(away_3pt_fgs) / num_games).__round__(3)}")
    
diff = sum(diff_dict.values()) / 6
print(f"Average difference between home and away 3pt FGA: {diff.__round__(3)}")

Home 3pt FGA in 2015: 24.101
Away 3pt FGA in 2015: 24.254
Home 3pt FGA in 2016: 27.083
Away 3pt FGA in 2016: 27.125
Home 3pt FGA in 2017: 29.221
Away 3pt FGA in 2017: 28.889
Home 3pt FGA in 2018: 32.195
Away 3pt FGA in 2018: 31.939
Home 3pt FGA in 2019: 34.111
Away 3pt FGA in 2019: 34.431
Home 3pt FGA in 2020: 35.005
Away 3pt FGA in 2020: 35.187
Average difference between home and away 3pt FGA: -0.018


In [62]:
games_df['PTS_home_fixed'] = games_df['PTS_home'] - (home_points_avg.__round__(3) - away_points_avg.__round__(3))
games_df['home_win_larger_than_avg'] = games_df['PTS_home_fixed'] > games_df['PTS_away']
print(f"Win percentage of home teams after deducting the average loss from home team points: {(len(games_df[games_df['home_win_larger_than_avg'] == True]) / len(games_df) * 100).__round__(3)}%")

Win percentage of home teams after deducting the average loss from home team points: 53.885%


From this we can deduce that home teams tend to win at a higher margin than the average difference.
Next, I'd like to check what is the average margin in a home win vs. away win.

In [63]:
home_wins_df = games_df[games_df['HOME_TEAM_WINS'] == 1].copy()
away_wins_df = games_df[games_df['HOME_TEAM_WINS'] == 0].copy()

home_wins_df['point_diff'] = home_wins_df['PTS_home'] - home_wins_df['PTS_away']
home_win_avg_margin = home_wins_df['point_diff'].mean()

away_wins_df['point_diff'] = away_wins_df['PTS_away'] - away_wins_df['PTS_home']
away_win_avg_margin = away_wins_df['point_diff'].mean()

print(f"Average margin when home team wins: {home_win_avg_margin.__round__(3)}")
print(f"Average margin when away team wins: {away_win_avg_margin.__round__(3)}")

Average margin when home team wins: 11.855
Average margin when away team wins: 10.15


Next, we'll check if players' averages are better at home. If they are, we can start analyzing and understanding which players are most likely to be affected bt it.
We'll start with players who played a minimum of 82 game, which is equivalent to a single NBA season.

In [64]:
players_box_score_df = players_box_score_df[players_box_score_df['Season'] >= 2015]
min_games_played = 200
player_games_counts = players_box_score_df['PLAYER_NAME'].value_counts()
players_with_min_games = player_games_counts[player_games_counts >= min_games_played].index.tolist()
players_box_score_min_one_season = players_box_score_df[players_box_score_df['PLAYER_NAME'].isin(players_with_min_games)]
players_box_score_min_one_season['home_game'] = players_box_score_min_one_season['MATCHUP'].str.contains('vs.').astype(int)
players_box_score_home = players_box_score_min_one_season[players_box_score_min_one_season['home_game'] == 1]
players_box_score_away = players_box_score_min_one_season[players_box_score_min_one_season['home_game'] == 0]
player_fta_home = players_box_score_home.groupby('PLAYER_NAME')[['FTA', 'FTM']].sum().astype(int).reset_index()
player_fta_away = players_box_score_away.groupby('PLAYER_NAME')[['FTA', 'FTM']].sum().astype(int).reset_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_box_score_min_one_season['home_game'] = players_box_score_min_one_season['MATCHUP'].str.contains('vs.').astype(int)


In [65]:
player_fta_home['FTP'] = player_fta_home['FTM'] / player_fta_home['FTA']
player_fta_away['FTP'] = player_fta_away['FTM'] / player_fta_away['FTA']
ftp_diff = player_fta_home['FTP'] - player_fta_away['FTP']
print(f"The difference between players' ft% when compared to themselves: {ftp_diff.mean().__round__(4) * 100}%")

The difference between players' ft% when compared to themselves: 0.26%


Let's check the same for fg%.

In [66]:
player_fga_home = players_box_score_home.groupby('PLAYER_NAME')[['FGA', 'FGM']].sum().astype(int).reset_index()
player_fga_away = players_box_score_away.groupby('PLAYER_NAME')[['FGA', 'FGM']].sum().astype(int).reset_index()
player_fga_home['FGP'] = player_fga_home['FGM'] / player_fga_home['FGA']
player_fga_away['FGP'] = player_fga_away['FGM'] / player_fga_away['FGA']
fgp_diff = player_fga_home['FGP'] - player_fga_away['FGP']
print(f"The difference between players' fg% when compared to themselves: {(fgp_diff.mean() * 100).__round__(4)}%")

The difference between players' fg% when compared to themselves: 0.8998%


In [67]:
player_stats_home = players_box_score_home.groupby('PLAYER_NAME').agg({'PTS': 'sum', 'MIN': 'sum'})
player_stats_home['PTS_per_MIN'] = player_stats_home['PTS'] / player_stats_home['MIN']
player_stats_home.reset_index(inplace=True)
player_stats_away = players_box_score_away.groupby('PLAYER_NAME').agg({'PTS': 'sum', 'MIN': 'sum'})
player_stats_away['PTS_per_MIN'] = player_stats_away['PTS'] / player_stats_away['MIN']
player_stats_away.reset_index(inplace=True)

Next, I'd like to look at the 50 players with the largest difference in home points vs away points and understand what might be the cause of the difference.

In [68]:
scores_diff = player_stats_home['PTS_per_MIN'] - player_stats_away['PTS_per_MIN']
scores_diff_df = pd.DataFrame({'PLAYER_NAME': player_stats_home['PLAYER_NAME'],'SCORES_DIFF': scores_diff})
sorted_scores_diff_df = scores_diff_df.sort_values(by='SCORES_DIFF', ascending=False)
top_50_diff_leaders_df = sorted_scores_diff_df.head(50)
print(top_50_diff_leaders_df)

               PLAYER_NAME  SCORES_DIFF
219        Justin Anderson     0.102739
380          Terence Davis     0.076517
19             Aron Baynes     0.073178
122        Emmanuel Mudiay     0.072257
28        Boban Marjanovic     0.071667
332           Paul Millsap     0.069339
243          Klay Thompson     0.065494
148         Hamidou Diallo     0.064832
309                   Nene     0.061920
304       Montrezl Harrell     0.061143
397             Trae Young     0.061104
325          Pascal Siakam     0.060141
51          Christian Wood     0.058977
42         Cameron Johnson     0.058590
305          Moritz Wagner     0.058296
266            Luka Doncic     0.056835
62            Damian Jones     0.054449
203           John Konchar     0.053987
265          Luguentz Dort     0.051159
105           Dion Waiters     0.050899
366       Shaun Livingston     0.050720
274             Marc Gasol     0.049133
273          Manu Ginobili     0.049014
363         Shabazz Napier     0.048966


I'd like to check if the difference stems from these players' tendency to shoot more often at home than away.

In [69]:
top_50_home_fg = pd.merge(top_50_diff_leaders_df['PLAYER_NAME'], player_fga_home, on='PLAYER_NAME', how='inner')
top_50_away_fg = pd.merge(top_50_diff_leaders_df['PLAYER_NAME'], player_fga_away, on='PLAYER_NAME', how='inner')
top_50_fg_diff = ((top_50_away_fg['FGA'] / top_50_home_fg['FGA']) * 100)
print(f"The top 50 leaders in home points to away points difference shot approximately {(100 - top_50_fg_diff.mean()).__round__(3)}% more fgs at home than away.")

The top 50 leaders in home points to away points difference shot approximately 5.585% more fgs at home than away.


Next, we'd like to check the sensitivity of players and try to find out whether that metric is correlated with playing better at home.

In [77]:
sensitivity_dict = dict()
for year, pbp_year in years_dict.items():
    # Drop rows with NaN values in 'ShotType' and 'Shooter' columns
    pbp_year_cleaned = pbp_year.dropna(subset=['ShotType', 'Shooter'])
    
    three_pt_shots = pbp_year_cleaned[pbp_year_cleaned['ShotType'].str.startswith('3-pt')]
    player_counts = three_pt_shots['Shooter'].value_counts()
    players_over_200 = player_counts[player_counts > 200]
    players_names_over_200 = players_over_200.index.tolist()
    player_odds = {}
    
    # Iterate over each player whose name appears over 200 times
    for player_name in players_names_over_200:
        player_odds[player_name] = {'3pt_after_miss': 0, '3pt_after_make': 0}
        
        # Filter DataFrame for shots by this player
        player_shots = pbp_year_cleaned[pbp_year_cleaned['Shooter'] == player_name]
        
        # Iterate over each shot by the player
        num_shots = len(player_shots)
        for i in range(num_shots - 1):  # Skip the last shot as there is no "next shot" after it
            current_shot = player_shots.iloc[i]
            next_shot = player_shots.iloc[i + 1]
            
            # Check if both shots are from the same game
            if current_shot['URL'] != next_shot['URL']:
                continue
            
            # Check if the current shot was a 3-pt shot
            if current_shot['ShotType'].startswith('3-pt'):
                # Increment the counter based on the outcome of the next shot
                if next_shot['ShotType'].startswith('3-pt'):
                    if current_shot['ShotOutcome'] == 'miss':
                        player_odds[player_name]['3pt_after_miss'] += 1
                    elif current_shot['ShotOutcome'] == 'make':
                        player_odds[player_name]['3pt_after_make'] += 1
    
    # Calculate the odds for each player
    for player_name in player_odds:
        total_after_misses = player_odds[player_name]['3pt_after_miss']
        total_after_makes = player_odds[player_name]['3pt_after_make']
        
        if total_after_misses + total_after_makes > 0:
            odds_after_miss = total_after_misses / (total_after_misses + total_after_makes)
            odds_after_make = total_after_makes / (total_after_misses + total_after_makes)
            
            player_odds[player_name]['odds_after_miss'] = odds_after_miss
            player_odds[player_name]['odds_after_make'] = odds_after_make
        else:
            player_odds[player_name]['odds_after_miss'] = None
            player_odds[player_name]['odds_after_make'] = None
    sensitivity_dict[year] = player_odds


In [78]:
print(sensitivity_dict)

{2015: {'S. Curry - curryst01': {'3pt_after_miss': 267, '3pt_after_make': 282, 'odds_after_miss': 0.48633879781420764, 'odds_after_make': 0.5136612021857924}, 'K. Thompson - thompkl01': {'3pt_after_miss': 206, '3pt_after_make': 191, 'odds_after_miss': 0.5188916876574308, 'odds_after_make': 0.4811083123425693}, 'D. Lillard - lillada01': {'3pt_after_miss': 141, '3pt_after_make': 142, 'odds_after_miss': 0.49823321554770317, 'odds_after_make': 0.5017667844522968}, 'J. Harden - hardeja01': {'3pt_after_miss': 153, '3pt_after_make': 96, 'odds_after_miss': 0.6144578313253012, 'odds_after_make': 0.3855421686746988}, 'K. Lowry - lowryky01': {'3pt_after_miss': 175, '3pt_after_make': 120, 'odds_after_miss': 0.5932203389830508, 'odds_after_make': 0.4067796610169492}, 'J. Smith - smithjr01': {'3pt_after_miss': 213, '3pt_after_make': 192, 'odds_after_miss': 0.5259259259259259, 'odds_after_make': 0.4740740740740741}, 'P. George - georgpa01': {'3pt_after_miss': 133, '3pt_after_make': 86, 'odds_after_mi