In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
sns.set()

## Previous Notes
- **91 games exist**
- **14193 plays exist**
    - 6.47 defenders in the box on average (count=11556)
    - 4.22 pass rushers per play on average (count=6710)
    - 8.27 pass length on average (count=6261)
        - 4.96 yards after catch on average (count=3937)
- **1713 players exist**
    - Remember teams have a 53 man roster
    - May want to consider converting height string to a metric height (meters)
        - The most recent NFL draft had 32 teams who choose players across 7 rounds = 224 draft picks
    - Although in this dataset the highest draft number was 256
- **91 NGS Datasets = 91 games**

### THEME #3: IDENTIFY BEST RECEIVER-ROUTE COMBINATIONS 
- **Specifics: Using the player tracking data, identify what combinations of routes work best together. This theme challenges you to consider:**
    - **Spatial characteristics of receivers and defensive backs.**
    - **Personnel groupings (both offense and defense), as well as defensive matchups and coverage type.**
    - **What constitutes a successful outcome for receivers.**
    - **Approaches for evaluating receiver traits simultaneously.**
    - **Rank (and justify) optimal combinations.**
        
        
- Given a specific play, we want to consider what are the most optimal matchups/formations for the offense and defense 
    - What makes these formations more favorable to the offense or defense?
        - Are players grouped more densely, double coverage, single coverage, zone?
        - What are the distances between players and do certain distances lead to optimal outcome?
    - Define "successful" outcome for receiver: primarily distance to first down, return on points (yards gained/end of drive yards * points gained)
- How can we understand all receivers simultaneously
- Rank the best combinations

    
- Random question: do teams have higher success at scoring between the 8 and 18 yardline or the 0 - 8 yardline?

# Course of Action
1. Filter plays for only pass plays
2. Look at formations to get a general sense of distributions
    - On what down and distance are they used?
    - Against what formations are they successful?
    - **May want to consider creating images for the start of the play to feed to clustering algorithm**
3. Consider what effect score, previous play outcome, etc. has on subsequent play. Or does the success lie within just the play?

In [3]:
game_df = pd.read_csv('data/games.csv')
play_df = pd.read_csv('data/plays.csv')
player_df = pd.read_csv('data/players.csv')
print('Game data shape:', game_df.shape)
print('Play data shape:', play_df.shape)
print('Player data shape:', player_df.shape)
print(play_df.shape)
game_df.tail(1)

Game data shape: (91, 21)
Play data shape: (14193, 27)
Player data shape: (1713, 10)
(14193, 27)


Unnamed: 0,season,week,gameDate,gameId,gameTimeEastern,HomeScore,VisitorScore,homeTeamAbbr,visitorTeamAbbr,homeDisplayName,...,Stadium,Location,StadiumType,Turf,GameLength,GameWeather,Temperature,Humidity,WindSpeed,WindDirection
90,2017,6,10/15/2017,2017101501,13:00:00,24,27,BAL,CHI,Baltimore Ravens,...,M&T Stadium,"Baltimore, Md.",Outdoor,Natural,03:26:00,Partly Sunny,69.0,87.0,7,s


## Tracking Data
- Files tracking_gameId_[gameId].csv contain player tracking data from game [gameId]. Nearly all plays from [gameId] are included; certain plays with incomplete or missing data are dropped. 
- The key variables are <b>gameId, playId, and nflId</b>.
- time: Time stamp of play (time, yyyy-mm-dd, hh:mm:ss)
- x: Player position along the long axis of the field, 0 - 120 yards. See Figure 1 below. (numeric)
- y: Player position along the short axis of the field, 0 - 53.3 yards. See Figure 1 below. (numeric)
- s: Speed in yards/second (numeric)
- dis: Distance traveled from prior time point, in yards (numeric)
- dir: Angle of player motion (deg), 0 - 360 degrees (numeric)
- event: Tagged play details, including moment of ball snap, pass release, pass catch, tackle, etc (text)
- nflId: Player identification number, unique across players (numeric)
- displayName: Player name (text)
- jerseyNumber: Jersey number of player (numeric)
- team: Team (away or home) of corresponding player (text)
- frame.id: Frame identifier for each play, starting at 1 (numeric)
- gameId: Game identifier, unique (numeric)
- playId: Play identifier, not unique across games (numeric)
- <b>91 datasets = 91 games</b>

In [18]:
tracking_df = pd.read_csv('data/tracking_gameId_2017090700.csv')
print(tracking_df.shape)
tracking_df.tail(1)

(316025, 14)


Unnamed: 0,time,x,y,s,dis,dir,event,nflId,displayName,jerseyNumber,team,frame.id,gameId,playId
316024,2017-09-08T04:16:03Z,73.27,27.03,0.22,0.07,,,,football,,ball,18,2017090700,4805


In [19]:
tracking_df.describe()

Unnamed: 0,x,y,s,dis,dir,nflId,jerseyNumber,frame.id,gameId,playId
count,315998.0,315998.0,315998.0,315998.0,302255.0,302282.0,302282.0,316025.0,316025.0,316025.0
mean,65.829084,27.731391,2.688532,0.283424,179.422799,2368211.0,50.040793,42.112912,2017091000.0,2511.296644
std,26.233381,9.560142,2.594922,0.284264,104.155377,587844.9,27.212049,27.307785,0.0,1355.507224
min,-16.07,-6.53,0.0,0.0,0.0,2649.0,2.0,1.0,2017091000.0,44.0
25%,45.12,22.38,0.74,0.05,85.6,2507948.0,25.0,20.0,2017091000.0,1355.0
50%,66.72,27.79,1.97,0.22,180.2,2543563.0,50.0,39.0,2017091000.0,2591.0
75%,88.05,32.82,3.97,0.44,270.79,2552392.0,71.0,60.0,2017091000.0,3725.0
max,130.0,61.88,32.73,8.08,360.0,2558924.0,99.0,166.0,2017091000.0,4805.0


In [76]:
unique_ids = tracking_df.groupby(['gameId','playId']).size().reset_index().rename(columns={0:'count'})
unique_ids

Unnamed: 0,gameId,playId,count
0,2017090700,44,2254
1,2017090700,68,2001
2,2017090700,94,2047
3,2017090700,118,1288
4,2017090700,139,1311
5,2017090700,160,1702
6,2017090700,189,1403
7,2017090700,210,1771
8,2017090700,309,1679
9,2017090700,345,1150


- Let's try mapping routes of players
- They have been so kind to give us an index (frame.id) for a given play

In [74]:
# # Select a specific player by jerseynumber
# jersey_number = 50
# where_condition = (tracking_df['jerseyNumber'] == jersey_number)
# tracking_df[where_condition]

In [32]:
def get_play(df, play_id):
    '''Create dataframe of just a particular play'''
    where_condition = df['playId'] == play_id
    new_df = df[where_condition].copy()
    return new_df

In [92]:
# Get the play and order appropriately for animation
game_id = tracking_df.loc[0, 'gameId']
play_id = 4503
the_play = get_play(tracking_df, play_id)
the_play.sort_values(by=['frame.id', 'team'], inplace=True)
the_play.reset_index(drop=True, inplace=True)

In [93]:
# Create Title for current play

# Current Score
where_condition = ((play_df['gameId'] == game_id) &\
                   (play_df['playId'] == play_id))
home_score = play_df[where_condition]['HomeScoreBeforePlay'].values[0]
away_score = play_df[where_condition]['VisitorScoreBeforePlay'].values[0]

# Teams
home_team = game_df[game_df['gameId'] == game_id]['homeTeamAbbr'].values[0]
away_team = game_df[game_df['gameId'] == game_id]['visitorTeamAbbr'].values[0]

title = home_team + ' - ' + away_team + ' (' + str(home_score) + ' - ' + str(away_score) + ')'
title

'NE - KC (27 - 41)'

In [97]:
'''SCRIPT WITH TEAM LABELS'''
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.animation
import numpy as np
from IPython.display import HTML

# Setup field
fig, ax = plt.subplots(figsize=(10,6))
ax.set(
    xlim=(-10, 130),
    ylim=(-10, 65),
    xlabel='yardline',
    ylabel='width of field'
)
plt.title(title, fontsize=16)
plt.xticks(np.arange(0, 130, step=10),
              ['End', 'GL', '10', '20', '30', '40', '50', '40', '30', '20', '10', 'GL', 'End'])
plt.yticks(np.arange(0, 65, 53.3), ['Sideline', 'Sideline'])
red_patch = mpatches.Patch(color='red', label='away team')
brown_patch = mpatches.Patch(color='brown', label='ball')
blue_patch = mpatches.Patch(color='blue', label='home team')
plt.legend(handles=[red_patch, blue_patch, brown_patch])

# Away Team
scat1 = ax.scatter(the_play.loc[:10, 'x'], the_play.loc[:10, 'y'], color='red', alpha=0.5)
# Ball
scat2 = ax.scatter(the_play.loc[11, 'x'], the_play.loc[11, 'y'], color='brown', alpha=0.9)
# Home team
scat3 = ax.scatter(the_play.loc[12:22, 'x'], the_play.loc[12:22, 'y'], color='blue', alpha=0.5)

def animate(i):
    if i == 0:
        return
    else:
        # Away team update
        scat1.set_offsets(np.c_[the_play.loc[(i*23):(i*23)+10, 'x'], 
                                the_play.loc[(i*23):(i*23)+10, 'y']])
        # Ball update
        scat2.set_offsets(np.c_[the_play.loc[(i*23)+11, 'x'], 
                                the_play.loc[(i*23)+11, 'y']])
        # Hom team update
        scat3.set_offsets(np.c_[the_play.loc[(i*23)+12:(i*23)+22, 'x'], 
                                the_play.loc[(i*23)+12:(i*23)+22, 'y']])

        
where_condition = ((play_df['gameId'] == game_id) &\
                   (play_df['playId'] == play_id))
print('Play Description:', play_df[where_condition]['playDescription'].values[0])
ani = matplotlib.animation.FuncAnimation(fig, animate, frames=int(len(the_play)/23), interval=100, repeat=False)
plt.close()
HTML(ani.to_jshtml())

Play Description: C.Santos extra point is GOOD, Center-J.Winchester, Holder-D.Colquitt.


In [40]:
play_df.head(1)

Unnamed: 0,gameId,playId,quarter,GameClock,down,yardsToGo,possessionTeam,yardlineSide,yardlineNumber,offenseFormation,...,VisitorScoreAfterPlay,isPenalty,isSTPlay,SpecialTeamsPlayType,KickReturnYardage,PassLength,PassResult,YardsAfterCatch,PlayResult,playDescription
0,2017091004,37,1,15:00:00,0,0,DET,DET,35.0,,...,0,False,True,Kickoff,23.0,,,,42,K.Redfern kicks 65 yards from DET 35 to ARZ 0....


In [None]:
# Map Routes of concussed player and partner player
# and give approximate speeds throughout their route
for i in range(len(injured_players)):
    # Get necessary values for query of NGS data
    game_key = injured_players.loc[i, 'GameKey']
    play_id = injured_players.loc[i, 'PlayID']
    concussed_id = injured_players.loc[i, 'GSISID']
    partner_id = injured_players.loc[i, 'Primary_Partner_GSISID']
    print('GameKey:', game_key, 'PlayID:', play_id)
    print('Play Description:', injured_players.loc[i,'PlayDescription'])
    print('Primary Impact Type:', injured_players.loc[i, 'Primary_Impact_Type'])
    print('Concussed:', concussed_id, 'Role:', injured_players.loc[i, 'Role'])
    print('Partner:', partner_id)
    # Visualizing play with .gif file
    display(HTML(''.join(make_html(game_key, play_id))))
    
    # Concussed player
    where_condition = (
        (ngs_concussion['GameKey'] == game_key)&\
        (ngs_concussion['PlayID'] == play_id) &\
        (ngs_concussion['GSISID'] == concussed_id))
    concussion = ngs_concussion[where_condition].copy()
    # Reorder by Time and reset index
    concussion.sort_values(by=['Time'], inplace=True)
    concussion.reset_index(drop=True, inplace=True)
    
    # Partner player
    where_condition = (
        (ngs_concussion['GameKey'] == game_key)&\
        (ngs_concussion['PlayID'] == play_id) &\
        (ngs_concussion['GSISID'] == partner_id))
    partner = ngs_concussion[where_condition].copy()
    partner.sort_values(by=['Time'], inplace=True)
    partner.reset_index(drop=True, inplace=True) 

    # Variables for Mapping
    concussion_x = concussion['x']
    concussion_y = concussion['y']
    partner_x = partner['x']
    partner_y = partner['y']
    speed1 = concussion['dis'] / 0.1
    speed2 = partner['dis'] / 0.1
    
    # Mapping of play
    sns.set()
    plt.figure(figsize=(10,5))
    cmap = plt.get_cmap('coolwarm')
    plt.scatter(concussion_x, concussion_y, c=speed1, cmap=cmap, alpha=0.5)
    if partner_id != 'NaN':
        plt.scatter(partner_x, partner_y, c=speed2, cmap=cmap, alpha=0.5)
    plt.clim(0, 12)
    plt.colorbar(label='yards/sec')
    # Normal length of field is 120 yards
    plt.xlim(-10, 130)
    plt.xticks(np.arange(0, 130, step=10),
               ['End', 'Goal Line', '10', '20', '30', '40', '50', '40', '30', '20', '10', 'Goal Line', 'End'])
    # Normal width is 53.3 yards
    plt.ylim(-10, 65)
    plt.yticks(np.arange(0, 65, 53.3), ['Sideline', 'Sideline'])
    plt.title('Playing Field')
    plt.xlabel('yardline')
    plt.ylabel('width of field')
    plt.show()
    print('---')