# Chapter 15: The Sports Analytics case study

In [1]:
import pandas as pd
import seaborn as sns
import json
from urllib import request

## Get the data

In [2]:
shots_url = 'https://www.murach.com/python_analysis/shots.json'
shots = request.urlretrieve(shots_url, filename='shots.json')

In [3]:
with open('shots.json') as jsonData:
    shots = json.load(jsonData)
shots.keys()

dict_keys(['resource', 'parameters', 'resultSets'])

In [4]:
columnHeaders = shots['resultSets'][0]['headers']
columnHeaders = [x.lower() for x in columnHeaders]
columnHeaders

['grid_type',
 'game_id',
 'game_event_id',
 'player_id',
 'player_name',
 'team_id',
 'team_name',
 'period',
 'minutes_remaining',
 'seconds_remaining',
 'event_type',
 'action_type',
 'shot_type',
 'shot_zone_basic',
 'shot_zone_area',
 'shot_zone_range',
 'shot_distance',
 'loc_x',
 'loc_y',
 'shot_attempted_flag',
 'shot_made_flag',
 'game_date',
 'htm',
 'vtm']

In [5]:
rows = shots['resultSets'][0]['rowSet']

In [7]:
df = pd.DataFrame(data=rows, columns=columnHeaders)
df

Unnamed: 0,grid_type,game_id,game_event_id,player_id,player_name,team_id,team_name,period,minutes_remaining,seconds_remaining,...,shot_zone_area,shot_zone_range,shot_distance,loc_x,loc_y,shot_attempted_flag,shot_made_flag,game_date,htm,vtm
0,Shot Chart Detail,0020900015,4,201939,Stephen Curry,1610612744,Golden State Warriors,1,11,25,...,Right Side Center(RC),24+ ft.,26,99,249,1,0,20091028,GSW,HOU
1,Shot Chart Detail,0020900015,17,201939,Stephen Curry,1610612744,Golden State Warriors,1,9,31,...,Left Side Center(LC),16-24 ft.,18,-122,145,1,1,20091028,GSW,HOU
2,Shot Chart Detail,0020900015,53,201939,Stephen Curry,1610612744,Golden State Warriors,1,6,2,...,Center(C),8-16 ft.,14,-60,129,1,0,20091028,GSW,HOU
3,Shot Chart Detail,0020900015,141,201939,Stephen Curry,1610612744,Golden State Warriors,2,9,49,...,Left Side(L),16-24 ft.,19,-172,82,1,0,20091028,GSW,HOU
4,Shot Chart Detail,0020900015,249,201939,Stephen Curry,1610612744,Golden State Warriors,2,2,19,...,Left Side Center(LC),16-24 ft.,16,-68,148,1,0,20091028,GSW,HOU
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11841,Shot Chart Detail,0021801205,533,201939,Stephen Curry,1610612744,Golden State Warriors,3,0,3,...,Left Side Center(LC),24+ ft.,30,-201,224,1,1,20190407,GSW,LAC
11842,Shot Chart Detail,0021801215,25,201939,Stephen Curry,1610612744,Golden State Warriors,1,9,49,...,Left Side(L),16-24 ft.,18,-180,-15,1,0,20190409,NOP,GSW
11843,Shot Chart Detail,0021801215,48,201939,Stephen Curry,1610612744,Golden State Warriors,1,8,1,...,Center(C),24+ ft.,26,73,255,1,1,20190409,NOP,GSW
11844,Shot Chart Detail,0021801215,61,201939,Stephen Curry,1610612744,Golden State Warriors,1,6,36,...,Right Side Center(RC),24+ ft.,23,132,199,1,0,20190409,NOP,GSW


## Clean the data

In [None]:
df.period.unique()

In [None]:
df = df.query('period < 5.0')

In [None]:
df.nunique(dropna=False)

In [None]:
shots = df.drop(columns=['grid_type','game_event_id','team_id',
    'team_name','player_id','shot_zone_range','shot_zone_basic',
    'shot_zone_area','event_type','action_type', 'minutes_remaining',
    'seconds_remaining', 'shot_distance','player_name','period','htm',
    'vtm','shot_attempted_flag'])

In [None]:
shots.game_date = pd.to_datetime(shots.game_date)

In [None]:
shots.info()

## Prepare the data

In [None]:
shots.head(3)

In [None]:
shots.set_index('game_id', inplace=True)

In [None]:
def get_season(row):
    if row.game_date.month > 6:
        season = f'{row.game_date.year}-{row.game_date.year + 1}'
    else:
        season = f'{row.game_date.year - 1}-{row.game_date.year}'
    return season

shots['season'] = shots.apply(get_season, axis=1)
shots.head()

In [None]:
shots['shot_result'] = shots.shot_made_flag.replace({0:'Missed', 1:'Made'})
shots.head()

In [None]:
shots['shot_type'].unique()

In [None]:
shots['points_made'] = shots.apply(lambda x: 0 if x.shot_result == 'Missed' else 
                          (3 if x.shot_type == '3PT Field Goal' else 2), axis=1)
shots.head()

In [None]:
shots['points_made_game'] = shots.groupby('game_id').points_made.transform('sum')

In [None]:
shots['shots_attempted'] = shots.groupby('game_id').shot_made_flag.transform('count')

In [None]:
shots['shots_made'] = shots.groupby('game_id').shot_made_flag.transform('sum')

In [None]:
shots[['shot_type','points_made','points_made_game','shots_attempted','shots_made']]

## Plot the summary data

In [None]:
shotsSeason = shots[['season','game_date','points_made_game','shots_made',
                     'shots_attempted']].drop_duplicates()

In [None]:
sns.catplot(data=shotsSeason, kind='box', x='season', y='points_made_game', 
            aspect=2.5, palette='deep')

In [None]:
shotsSeason.head()

In [None]:
shotsSeasonAvg = shotsSeason.groupby('season').mean().reset_index()

In [None]:
shotsSeasonAvg.plot(
    x='season', y=['points_made_game','shots_made','shots_attempted'], 
    color={'points_made_game':'red','shots_made':'blue','shots_attempted':'green'}, 
    figsize=(8,5), ylim=(0,30))

## Plot the shots for two games

In [None]:
# two games with a lot of shots
gameIDs = ['0021800923','0021800642']
g = sns.relplot(data=shots.query('game_id in @gameIDs'), kind='scatter', 
    x='loc_x', y='loc_y', hue='shot_result', col='game_id')

In [None]:
# SOURCE: http://savvastjortjoglou.com/nba-shot-sharts.html
from matplotlib.patches import Circle, Rectangle, Arc
def draw_court(ax=None, color='black', lw=2, outer_lines=False):
    # If an axes object isn't provided to plot onto, just get current one
    if ax is None:
        ax = plt.gca()

    # Create the various parts of an NBA basketball court

    # Create the basketball hoop
    # Diameter of a hoop is 18" so it has a radius of 9", which is a value
    # 7.5 in our coordinate system
    hoop = Circle((0, 0), radius=7.5, linewidth=lw, color=color, fill=False)

    # Create backboard
    backboard = Rectangle((-30, -7.5), 60, -1, linewidth=lw, color=color)

    # The paint
    # Create the outer box 0f the paint, width=16ft, height=19ft
    outer_box = Rectangle((-80, -47.5), 160, 190, linewidth=lw, color=color,
                          fill=False)
    # Create the inner box of the paint, widt=12ft, height=19ft
    inner_box = Rectangle((-60, -47.5), 120, 190, linewidth=lw, color=color,
                          fill=False)

    # Create free throw top arc
    top_free_throw = Arc((0, 142.5), 120, 120, theta1=0, theta2=180,
                         linewidth=lw, color=color, fill=False)
    # Create free throw bottom arc
    bottom_free_throw = Arc((0, 142.5), 120, 120, theta1=180, theta2=0,
                            linewidth=lw, color=color, linestyle='dashed')
    # Restricted Zone, it is an arc with 4ft radius from center of the hoop
    restricted = Arc((0, 0), 80, 80, theta1=0, theta2=180, linewidth=lw,
                     color=color)

    # Three point line
    # Create the side 3pt lines, they are 14ft long before they begin to arc
    corner_three_a = Rectangle((-220, -47.5), 0, 140, linewidth=lw,
                               color=color)
    corner_three_b = Rectangle((220, -47.5), 0, 140, linewidth=lw, color=color)
    # 3pt arc - center of arc will be the hoop, arc is 23'9" away from hoop
    # I just played around with the theta values until they lined up with the 
    # threes
    three_arc = Arc((0, 0), 475, 475, theta1=22, theta2=158, linewidth=lw,
                    color=color)

    # Center Court
    center_outer_arc = Arc((0, 422.5), 120, 120, theta1=180, theta2=0,
                           linewidth=lw, color=color)
    center_inner_arc = Arc((0, 422.5), 40, 40, theta1=180, theta2=0,
                           linewidth=lw, color=color)

    # List of the court elements to be plotted onto the axes
    court_elements = [hoop, backboard, outer_box, inner_box, top_free_throw,
                      bottom_free_throw, restricted, corner_three_a,
                      corner_three_b, three_arc, center_outer_arc,
                      center_inner_arc]

    if outer_lines:
        # Draw the half court line, baseline and side out bound lines
        outer_lines = Rectangle((-250, -47.5), 500, 470, linewidth=lw,
                                color=color, fill=False)
        court_elements.append(outer_lines)

    # Add the court elements onto the axes
    for element in court_elements:
        ax.add_patch(element)

    return ax

In [None]:
g = sns.relplot(data=shots.query('game_id in @gameIDs'), kind='scatter', 
    x='loc_x', y='loc_y', hue='shot_result', col='game_id', s=50)

for i, ax in enumerate(g.axes.flat):
    ax.set_title('shots for game ' + gameIDs[i])
    ax = draw_court(ax, outer_lines=True)
    ax.set_xlim(-300,300)
    ax.set_ylim(-100, 500)

## Plot shot data for two seasons

In [None]:
colors = ['#FF0B04','#4374B3'] # blue and red
sns.set_palette(sns.color_palette(colors))

seasons = ['2009-2010','2018-2019']
g = sns.relplot(data=shots.query('season in @seasons'), kind='scatter', 
    x='loc_x', y='loc_y', hue='shot_result', col='season', col_wrap=1)

for ax in g.axes.flat:
    ax = draw_court(ax, outer_lines=True)
    ax.set_xlim(-300, 300)
    ax.set_ylim(-100, 500)

## Plot shot density for one season

In [None]:
colors = ['#4374B3','#FF0B04'] # red and blue
sns.set_palette(sns.color_palette(colors))

g = sns.displot(data=shots.query('season == "2015-2016"'), kind='kde', legend=False,
    x='loc_x', y='loc_y', col='shot_result', hue='shot_result', col_wrap=1)

for ax in g.axes.flat:
    ax = draw_court(ax, outer_lines=True)
    ax.set_xlim(-300, 300)
    ax.set_ylim(-100, 500)

## Plot shot density for two seasons

In [None]:
colors = ['#FF0B04','#4374B3'] # blue and red
sns.set_palette(sns.color_palette(colors))

seasons = ['2009-2010','2015-2016']
g = sns.displot(data=shots.query('season in @seasons'), kind='kde', 
                x='loc_x', y='loc_y', row='shot_result', col='season', 
                hue='shot_result', legend=False)
for ax in g.axes.flat:
    ax = draw_court(ax, outer_lines=True)
    ax.set_xlim(-300, 300)
    ax.set_ylim(-100, 500)