# NBA 2021 Project
### Francis Peng, Annie Seo

# Introduction and Motivation

- Contract/value/net-worth
- Shot selection, 2s, 3s, midrange, layup, etc.
- Predicting outcomes/win percentage based on midgame stats of indiivudal players.
- ## Value of the midrange shot?
- Midrange shot is 10 ft to 3P

# Data and Code Setup

We will use the following API which scrapes NBA basketball statistics from basketball-reference.com
https://github.com/vishaalagartha/basketball_reference_scraper

In [1]:
# !pip install basketball-reference-scraper
# !pip install unidecode

In [1]:
# Boilerplate
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.formula.api as smf
import scipy.stats as stats
from scipy.optimize import minimize

In [2]:
# Load basketball-reference libraries
from basketball_reference_scraper.teams import get_roster, get_team_stats, get_opp_stats, get_roster_stats, get_team_misc
from basketball_reference_scraper.players import get_stats, get_game_logs, get_player_headshot
from basketball_reference_scraper.seasons import get_schedule, get_standings
from basketball_reference_scraper.box_scores import get_box_scores
from basketball_reference_scraper.pbp import get_pbp
from basketball_reference_scraper.shot_charts import get_shot_chart

## First, we retrieve the players with the best midrange efficiency. To filter out anomalies, we find players with relatively high midrange volume (top 50%). 

In [3]:
# Team abbreviation dictionary
team_abbr = {
    'ATLANTA HAWKS' : 'ATL',
'BOSTON CELTICS' : 'BOS',
'BROOKLYN NETS' : 'BRK',
'CHICAGO BULLS' : 'CHI',
'CHARLOTTE HORNETS' : 'CHO',
'CLEVELAND CAVALIERS' : 'CLE',
'DALLAS MAVERICKS' : 'DAL',
'DENVER NUGGETS' : 'DEN',
'DETROIT PISTONS' : 'DET',
'GOLDEN STATE WARRIORS' : 'GSW',
'HOUSTON ROCKETS' : 'HOU',
'INDIANA PACERS' : 'IND',
'LOS ANGELES CLIPPERS' : 'LAC',
'LOS ANGELES LAKERS' : 'LAL',
'MEMPHIS GRIZZLIES' : 'MEM',
'MIAMI HEAT' : 'MIA',
'MILWAUKEE BUCKS' : 'MIL',
'MINNESOTA TIMBERWOLVES' : 'MIN',
'NEW ORLEANS PELICANS' : 'NOP',
'NEW YORK KNICKS' : 'NYK',
'OKLAHOMA CITY THUNDER' : 'OKC',
'ORLANDO MAGIC' : 'ORL',
'PHILADELPHIA 76ERS' : 'PHI',
'PHOENIX SUNS' : 'PHO',
'PORTLAND TRAIL BLAZERS' : 'POR',
'SACRAMENTO KINGS' : 'SAC',
'SAN ANTONIO SPURS' : 'SAS',
'TORONTO RAPTORS' : 'TOR',
'UTAH JAZZ' : 'UTA',
'WASHINGTON WIZARDS' : 'WAS'
}
# Converting to Papndas dataframe
team_abbr_df = pd.DataFrame.from_dict(team_abbr, orient='index')

In [60]:
# Retrieve Schedule
schedule_2021_orig = get_schedule(2021, playoffs=False)

In [111]:
# Getting team abbreviations rather than full names
schedule_2021 = schedule_2021_orig[['DATE', 'VISITOR', 'HOME']]
schedule_2021 = schedule_2021.rename(columns={'VISITOR' : 'visit', 'HOME' : 'home'})
schedule_2021['visit'] = schedule_2021['visit'].str.upper()
schedule_2021['home'] = schedule_2021['home'].str.upper()
schedule_2021 = schedule_2021.join(team_abbr_df, on='visit', how='left', lsuffix='_l', rsuffix='_r')
schedule_2021 = schedule_2021.join(team_abbr_df, on='home', how='left', lsuffix='_l', rsuffix='_r')
schedule_2021 = schedule_2021[['DATE','0_l', '0_r']].rename(columns={'0_l' : 'VISITOR', '0_r' : 'HOME'})
schedule_2021

Unnamed: 0,DATE,VISITOR,HOME
0,2020-12-22,GSW,BRK
1,2020-12-22,LAC,LAL
2,2020-12-23,CHO,CLE
3,2020-12-23,NYK,IND
4,2020-12-23,MIA,ORL
...,...,...,...
1158,2021-06-26,PHO,LAC
1159,2021-06-27,MIL,ATL
1160,2021-06-28,LAC,PHO
1161,2021-06-29,MIL,ATL


In [124]:
master_shot_chart = pd.DataFrame(columns=['x','y','QUARTER','TIME_REMAINING','PLAYER','MAKE_MISS','VALUE','DISTANCE'])
for i in range(schedule_2021.shape[0]):
    chart_temp = get_shot_chart(schedule_2021.astype({'DATE':str}).iloc[i][0], schedule_2021.iloc[i][1], schedule_2021.iloc[i][2]).get(schedule_2021.iloc[i][1])
    chart_temp = chart_temp[chart_temp['VALUE'] == 2]
    master_shot_chart = pd.concat([master_shot_chart, chart_temp], sort=False)
    chart_temp = get_shot_chart(schedule_2021.astype({'DATE':str}).iloc[i][0], schedule_2021.iloc[i][1], schedule_2021.iloc[i][2]).get(schedule_2021.iloc[i][2])
    chart_temp = chart_temp[chart_temp['VALUE'] == 2]
    master_shot_chart = pd.concat([master_shot_chart, chart_temp], sort=False)
master_shot_chart

Unnamed: 0,x,y,QUARTER,TIME_REMAINING,PLAYER,MAKE_MISS,VALUE,DISTANCE
0,24.9 ft,3.08 ft,1,11:11.0,James Wiseman,MAKE,2,1 ft
4,26.8 ft,5.87 ft,1,9:38.0,Stephen Curry,MAKE,2,4 ft
5,20.5 ft,25.0 ft,1,9:18.0,James Wiseman,MISS,2,22 ft
6,26.2 ft,20.2 ft,1,9:01.0,Eric Paschall,MISS,2,17 ft
7,25.5 ft,2.88 ft,1,7:32.0,Andrew Wiggins,MISS,2,2 ft
...,...,...,...,...,...,...,...,...
65,40.1 ft,9.85 ft,4,8:04.0,Paul George,MAKE,2,17 ft
73,22.7 ft,4.87 ft,4,4:24.0,Terance Mann,MAKE,2,2 ft
76,22.1 ft,6.27 ft,4,1:39.0,Amir Coffey,MAKE,2,4 ft
77,11.2 ft,18.9 ft,4,1:08.0,Yogi Ferrell,MISS,2,20 ft


In [125]:
master_shot_chart.to_csv("TwoP.csv")

In [51]:
TwoP = pd.read_csv("TwoP.csv")

In [52]:
TwoP["DISTANCE"] = TwoP.DISTANCE.str.extract('(\d+)')
TwoP["DISTANCE"] = TwoP["DISTANCE"].astype(int)
TwoP = TwoP[TwoP["DISTANCE"] >= 10]

## Next, we compare their midrange efficiency against their three-point efficiency, again taking volume into consideration.

In [None]:
ThreeP = pd.DataFrame(columns = ['PLAYER', 'TEAM', '3P', '3PA'])

In [None]:
for index, row in team_abbr_df.iterrows():
    a = get_roster_stats(row[0], 2021, data_format='TOTALS', playoffs=False)
    a = a[['PLAYER','TEAM','3P', '3PA']]
    ThreeP = ThreeP.append(a)

In [None]:
ThreeP = ThreeP.assign(result = ThreeP['3P'].astype(float) * 3 / ThreeP['3PA'].astype(float)) 
ThreeP = ThreeP.rename(columns={"result": "3PE"})
ThreeP = ThreeP.dropna()
ThreeP = ThreeP.reset_index(drop = True)
ThreeP

In [8]:
ThreeP = pd.read_csv("ThreeP.csv")
ThreeP

Unnamed: 0.1,Unnamed: 0,PLAYER,TEAM,3P,3PA,3PE
0,0,Bogdan Bogdanović,ATL,146,333,1.315315
1,1,John Collins,ATL,83,208,1.197115
2,2,Kris Dunn,ATL,0,2,0.000000
3,3,Bruno Fernando,ATL,0,2,0.000000
4,4,Danilo Gallinari,ATL,103,254,1.216535
...,...,...,...,...,...,...
588,588,Jerome Robinson,WAS,11,42,0.785714
589,589,Ish Smith,WAS,18,49,1.102041
590,590,Moritz Wagner,WAS,13,42,0.928571
591,591,Russell Westbrook,WAS,86,273,0.945055


# Methodology and Analysis

# Conclusion