In [4]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pylab as plt
import os
import psutil

# import lightgbm as lgb
from sklearn.model_selection import train_test_split
import random
import sklearn
from itertools import cycle, islice

import warnings
warnings.filterwarnings("ignore")

from tqdm.notebook import tqdm
pd.set_option('max_columns', 500)
plt.style.use('fivethirtyeight')

In [None]:
# Read in data
tracks = pd.read_csv('../in/PlayerTrackData.csv',
                        dtype={'time':'float64',
                                'x':'float16',
                                'y':'float16',
                                'dir': 'float16',
                                'dis': 'float16',
                                'o':'float16',
                                's':'float16'})

plays = pd.read_csv('../in/PlayList.csv')
injury = pd.read_csv('../in/InjuryRecord.csv')

# Create injury detailed by merging on play information
injury_detailed = injury.merge(plays, how='left')
injury_detailed = injury_detailed.merge(plays[['PlayerKey','RosterPosition']].drop_duplicates() \
                                            .rename(columns={'RosterPosition':'RosterPosition_notplay'}))
injury_detailed['RosterPosition_notplay'] = injury_detailed['RosterPosition_notplay'] \
    .replace({'Safety':'Defensive Back',
              'Cornerback' : 'Defensive Back'})

In [None]:
plt.style.use('fivethirtyeight')
# Find Injury Rate by Surface
injury_playkeys = injury['PlayKey'].unique()
plays['counter'] = 1 # Column used when grouping to count
plays['isInjuryPlay'] = False
plays.loc[plays['PlayKey'].isin(injury_playkeys), 'isInjuryPlay'] = True

# Plot Results
fig, ax = plt.subplots(1, 1, figsize=(8, 6))
(plays.groupby('FieldType')[['isInjuryPlay']].mean() * 100000).plot(kind='bar', ax=ax)
ax.get_legend().remove()
ax.set_xlabel('')
ax.set_title('Lower Body Injury Rate by Surface')
ax.set_ylabel('Injury per \n 100,000 plays', rotation=0, fontsize=13, color='darkgrey')
ax.yaxis.set_label_coords(-0.12,0.85)
for p in ax.patches:
    ax.annotate(f'{int(p.get_height())}',
                (p.get_x() + 0.20,
                 p.get_height() - 3),
                 fontsize=20,
                color='white')
ax.axhline(0, color='k')
plt.xticks(rotation=0)
plt.show()

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(15, 5))
ax = axes[0]
injury_detailed.groupby('RosterPosition_notplay')['PlayerKey'].count().sort_values() \
    .plot(kind='barh', title='Non-Contact Injuries', figsize=(10, 5), ax=ax)
count = 0
for x in ax.patches:
    if count > 3:
        x.set_color('orange')
    count += 1
# plt.annotate('Three positions account for 2/3 of injuries', xy=(15, 1.5), fontsize=15, color='brown')
# plt.arrow(15, 2, -0.3, 1, color='brown', head_width=0.2, head_length=0.2, lw=3)
ax.set_title('All Non-Contact Injuries', fontsize=15)
ax.grid(b=None, axis='y')
ax.set_ylabel('')
ax.set_xlabel('Injury Count', fontsize=15)
ax.axvline(0.1, color='black')
ax2 = axes[1]
injury_detailed.query('DM_M7 == 1').groupby('RosterPosition_notplay')['PlayerKey'] \
    .count() \
    .sort_values() \
    .plot(kind='barh', figsize=(15, 5), ax=ax2)
count = 0
for x in ax2.patches:
    if count > 3:
        x.set_color('orange')
    count += 1
ax2.set_title('Non-Contact Injury > 1 week missed', fontsize=15)
ax2.grid(b=None, axis='y')
ax2.set_xlabel('Injury Count', fontsize=15)
ax2.set_ylabel('')
plt.subplots_adjust(wspace = 0.4)
fig.suptitle('3 Positions account for 70% of all injuries', fontsize=20)
plt.subplots_adjust(top=0.83)

rects = ax.patches
# For each bar: Place a label
for rect in rects:
    # Get X and Y placement of label from rect.
    x_value = rect.get_width()
    y_value = rect.get_y() + rect.get_height() / 2

    # Number of points between bar and label. Change to your liking.
    space = -20
    # Vertical alignment for positive values
    ha = 'left'

    # If value of bar is negative: Place label left of bar
    if x_value < 0:
        # Invert space to place label to the left
        space *= -1
        # Horizontally align label at right
        ha = 'right'

    # Use X value as label and format number with one decimal place
    label = "{:.0f}".format(x_value)

    # Create annotation
    ax.annotate(
        label,                      # Use `label` as label
        (x_value, y_value),         # Place label at end of the bar
        xytext=(space, 0),          # Horizontally shift label by `space`
        textcoords="offset points", # Interpret `xytext` as offset in points
        va='center',                # Vertically center label
        ha=ha,
        fontsize=14,
        color='white')                      # Horizontally align label differently for
                                    # positive and negative values.
        
        
rects = ax2.patches
# For each bar: Place a label
for rect in rects:
    # Get X and Y placement of label from rect.
    x_value = rect.get_width()
    y_value = rect.get_y() + rect.get_height() / 2

    # Number of points between bar and label. Change to your liking.
    space = -20
    # Vertical alignment for positive values
    ha = 'left'

    # If value of bar is negative: Place label left of bar
    if x_value < 0:
        # Invert space to place label to the left
        space *= -1
        # Horizontally align label at right
        ha = 'center'

    # Use X value as label and format number with one decimal place
    label = "{:.0f}".format(x_value)

    # Create annotation
    ax2.annotate(
        label,                      # Use `label` as label
        (x_value, y_value),         # Place label at end of the bar
        xytext=(space, 0),          # Horizontally shift label by `space`
        textcoords="offset points", # Interpret `xytext` as offset in points
        va='center',                # Vertically center label
        ha=ha,
        fontsize=14,
        color='white')                      # Horizontally align label differently for
                                    # positive and negative values.
plt.legend(['Excluded from study'])
ax2.axvline(0.05, color='black')
plt.show()

In [None]:
injury_detailed['PlayType_simple'] = injury_detailed['PlayType'] \
    .replace({'Kickoff Not Returned' : 'Kickoff',
              'Kickoff Returned' : 'Kickoff',
              'Punt Not Returned' : 'Punt',
              'Punt Returned' : 'Punt'})


ax = injury_detailed.groupby('PlayType_simple') \
    .count()['PlayKey'] \
    .sort_values().plot(kind='barh',
                        figsize=(8, 4),
                       title='Non-Contact Injury count by Play Type')
count = 0
#ax.text(16, 1.5, '*Over 70% occured during non-special teams plays', fontsize=12, color='brown')
for x in ax.patches:
    if count > 1:
        x.set_color('orange')
    count += 1
ax.grid(b=None, axis='y')
ax.set_ylabel('')
plt.xlabel('Injury Count', fontsize=15)

rects = ax.patches
# For each bar: Place a label
for rect in rects:
    # Get X and Y placement of label from rect.
    x_value = rect.get_width()
    y_value = rect.get_y() + rect.get_height() / 2

    # Number of points between bar and label. Change to your liking.
    space = -20
    # Vertical alignment for positive values
    ha = 'center'

    # If value of bar is negative: Place label left of bar
    if x_value < 0:
        # Invert space to place label to the left
        space *= -1
        # Horizontally align label at right
        ha = 'right'

    # Use X value as label and format number with one decimal place
    label = "{:.0f}".format(x_value)

    # Create annotation
    ax.annotate(
        label,                      # Use `label` as label
        (x_value, y_value),         # Place label at end of the bar
        xytext=(space, 0),          # Horizontally shift label by `space`
        textcoords="offset points", # Interpret `xytext` as offset in points
        va='center',                # Vertically center label
        ha=ha,
        fontsize=15,
        color='white')                      # Horizontally align label differently for
                                    # positive and negative values.
plt.legend(['Excluded from study'])
ax.axvline(0.05, color='black')
plt.show()

In [1]:
# Remove any data for a play 0.1 second before snap
# print(tracks.shape)
tracks_snap = tracks[['PlayKey','x','y','time','event']].query('event == "ball_snap"')
tracks_snap = tracks_snap[['PlayKey','x','y','time']] \
    .rename(columns={'x':'x_snap',
                     'y':'y_snap',
                     'time':'time_snap'}).copy()
tracks = tracks.merge(tracks_snap, on='PlayKey', how='left')
tracks = tracks.query('time >= (time_snap - 0.1)')
# print(tracks.shape)

# Remove any data for a play 0.1 second after last event
# print(tracks.shape)
tracks_max_event = tracks.loc[~tracks['event'].isna()] \
    .groupby('PlayKey')['time'] \
    .max().reset_index()
tracks_max_event = tracks_max_event.rename(columns={'time': 'time_last_event'}).copy()
tracks = tracks.merge(tracks_max_event)
tracks['max_event'] = tracks.loc[tracks['time_last_event'] == tracks['time']]['event'].values[0]
tracks = tracks.query('time <= (time_last_event + 0.1)')
# print(tracks.shape)

# Fix orientation
# Reference: https://www.kaggle.com/jpmiller/how-to-adjust-orientation
# print(tracks.shape)
tough_guys = plays.loc[plays.PlayerDay >= 350, 'PlayerKey'].unique()
playlist_tough = plays[plays.PlayerKey.isin(tough_guys)].copy()
days = playlist_tough.groupby('PlayerDay')['PlayerGamePlay'].mean()

playlist_tough['Season'] = np.where(playlist_tough.PlayerDay<350, 1, 2)
games = playlist_tough.drop_duplicates('GameID')[['GameID', 'Season']]

tracks = tracks.merge(playlist_tough[['GameID', 'PlayKey']], on='PlayKey', how='left')
tracks = tracks.merge(games, on='GameID', how='left')
tracks['Season'] = tracks['Season'].fillna(-999) # Unknown season as -999

# Assume other seasons based on direction at snap - if orientation at snap is outside normal range, shift.
s1 = tracks.query('event == "ball_snap" and o < 50 and Season < 0')['PlayKey'].unique().tolist()
s2 = tracks.query('event == "ball_snap" and o > 325 and Season < 0')['PlayKey'].unique().tolist()
s3 = tracks.query('event == "ball_snap" and o < 225 and o > 125 and Season < 0')['PlayKey'].unique().tolist()

tracks.loc[(tracks['Season'] < 0) &
           (tracks['PlayKey'].isin(s1+s2+s3)), 'Season'] = 1
tracks.loc[(tracks['Season'] < 0) &
           (~tracks['PlayKey'].isin(s1+s2+s3)), 'Season'] = 2

# Change orientation for season 1
tracks['o'] = np.where(tracks.Season == 1,
                            np.mod(tracks.o+90, 360),
                            tracks.o
                            )
# print(tracks.shape)

# Previous speed, acceleration, absolute acceleration
tracks['s_prev1'] = tracks.groupby('PlayKey')['s'].shift(1)
tracks['a'] = tracks['s'] - tracks['s_prev1']
tracks['a_abs'] = np.abs(tracks['a'])

# Add playerkey
tracks = tracks.merge(plays[['PlayKey','PlayerKey']])

# Binary Features for track data
# If tracks is for injured player, play where injury occured, 
tracks = tracks.merge(plays[['PlayKey','RosterPosition','PositionGroup','FieldType','PlayType']], how='left')
tracks.loc[tracks['PositionGroup'].isin(['DB','WR','LB']), 'isInjuryPronePos'] = True
tracks['isInjuryPlay'] = False
tracks.loc[tracks['PlayKey'].isin(injury['PlayKey'].unique()), 'isInjuryPlay'] = True
tracks['isRushPass'] = False
tracks.loc[tracks['PlayType'].isin(['Rush','Pass']), 'isRushPass'] =  True
tracks['isInjuredPlayer'] = False
tracks.loc[tracks['PlayerKey'].isin(injury['PlayerKey'].unique()), 'isInjuredPlayer'] =  True

# Generalized Position groups focus on high injury roles
tracks['Position_inj'] = tracks['PositionGroup']
tracks.loc[~tracks['Position_inj'].isin(['LB','WR','DB']), 'Position_inj'] = 'Other'
tracks['Position_inj'] = tracks['Position_inj'].replace({'LB':'Linebacker',
                                'WR':'Wide Receiver',
                                'DB':'Defensive Back',
                                'Other':'Other Positions'})

# Time since the snap
tracks['time_since_snap'] = tracks['time']- tracks['time_snap']
tracks['time_since_snap'] = tracks['time_since_snap'].round(2)
# print(tracks.shape)
tracks = tracks.loc[tracks['time_since_snap'] < 25]
# print(tracks.shape)
tracks['counter'] = True # Used for aggregating counts

NameError: name 'tracks' is not defined

In [2]:
tracks.query('event == "ball_snap"')['o'] \
    .plot(kind='hist',
          bins=50,
          figsize=(15, 5),
          title='Distribution of Orientation during Snap after Data Cleaning')
plt.show()

NameError: name 'tracks' is not defined

In [3]:
# O vs Dir feature
tracks['o_dir_diff1'] = np.abs(tracks['o'] - tracks['dir'])
tracks['o_dir_diff2'] = np.abs(tracks['o'] - (tracks['dir'] - 360))
tracks['o_dir_diff3'] = np.abs(tracks['o'] - (tracks['dir'] + 360))
tracks['o_dir_diff'] = tracks[['o_dir_diff1','o_dir_diff2','o_dir_diff3']].min(axis=1)
tracks = tracks.drop(['o_dir_diff1','o_dir_diff2','o_dir_diff3'], axis=1)

# Create movement groups
tracks['OffsetAngleGroup'] = 'Forward'
tracks.loc[tracks['o_dir_diff'] >= 75, 'OffsetAngleGroup'] = 'Lateral'
tracks.loc[tracks['o_dir_diff'] >= 105, 'OffsetAngleGroup'] = 'Backpedal'
tracks['isLateralMovement'] = False
tracks.loc[tracks['OffsetAngleGroup'] == 'Lateral', 'isLateralMovement'] = True

NameError: name 'np' is not defined

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
# Linebacker
ax=axes[0]
t_group = tracks.query('isRushPass and Position_inj == "Linebacker" and time_since_snap < 5 and s > 0') \
    .groupby(['time_since_snap','OffsetAngleGroup'])['OffsetAngleGroup'] \
    .count() \
    .unstack('OffsetAngleGroup')
t_group.apply(lambda x: 100 * x / float(x.sum()), axis=1) \
    .plot(kind='area', stacked=True, alpha=0.5, ax=ax, title='Linebacker')
for tick in ax.get_xticklabels():
    tick.set_rotation(0)
# Shrink current axis by 20%
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
ax.set_xlabel('seconds after snap', fontsize=14)
ax.set_ylabel('% time in \n movement \n category', rotation=0, fontsize=10, color='darkgrey')
ax.yaxis.set_label_coords(-0.25,0.82)

ax.get_legend().remove()

# Defensive Back
ax=axes[1]
t_group = tracks.query('isRushPass and Position_inj == "Defensive Back" and time_since_snap < 5 and s > 0') \
    .groupby(['time_since_snap','OffsetAngleGroup'])['OffsetAngleGroup'] \
    .count() \
    .unstack('OffsetAngleGroup')
t_group.apply(lambda x: 100 * x / float(x.sum()), axis=1).plot(kind='area', stacked=True, alpha=0.5, ax=ax, title='Defensive Back')
for tick in ax.get_xticklabels():
    tick.set_rotation(0)
# Shrink current axis by 20%
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
ax.set_xlabel('seconds after snap', fontsize=14)
ax.set_ylabel('% time in \n movement \n category', rotation=0, fontsize=10, color='darkgrey')
ax.yaxis.set_label_coords(-0.25,0.82)
ax.get_legend().remove()

# Wide Receiver
ax=axes[2]
t_group = tracks.query('isRushPass and Position_inj == "Wide Receiver" and time_since_snap < 5 and s > 0') \
    .groupby(['time_since_snap','OffsetAngleGroup'])['OffsetAngleGroup'] \
    .count() \
    .unstack('OffsetAngleGroup')
t_group.apply(lambda x: 100 * x / float(x.sum()), axis=1).plot(kind='area', stacked=True, alpha=0.5, ax=ax, title='Wide Receiver')
for tick in ax.get_xticklabels():
    tick.set_rotation(0)
# Shrink current axis by 20%
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
ax.set_xlabel('seconds after snap', fontsize=14)
ax.set_ylabel('% time in \n movement \n category', rotation=0, fontsize=10, color='darkgrey')
ax.yaxis.set_label_coords(-0.25,0.82)
# # Put a legend to the right of the current axis
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()

In [None]:
t_group = tracks[['s','isRushPass','isInjuryPronePos',
                  'PlayKey','OffsetAngleGroup','isInjuryPlay']] \
    .loc[tracks['isRushPass'] & tracks['isInjuryPronePos']] \
    .groupby(['OffsetAngleGroup','isInjuryPlay'])['PlayKey'] \
    .count() \
    .unstack(['OffsetAngleGroup'])

fig, ax = plt.subplots(1,1, figsize=(8, 8))
t_group.apply(lambda x: 100 * x / float(x.sum()), axis=1)['Lateral'] \
    .plot(kind='bar',
          title='Time spent in Lateral Movement',
          figsize=(10, 4),
         ax=ax)
ax.xaxis.set_label('')
ax.set_ylabel('% of \n play time', rotation=0, color='darkgrey', fontsize=12)
ax.yaxis.set_label_coords(-0.08, 0.85)
ax.set_xlabel('')
for p in ax.patches:
    ax.annotate(f'{round(p.get_height(),1)}%', (p.get_x() + 0.2, p.get_height() - 1.5), color='white')
ax.set_xticklabels(['Non Injury Play', 'Injury Play'], rotation=0)
ax.axhline(0, color='black')
ax.axhline(10.1, linestyle='--', linewidth=2, color='orange')
# fig.annotate('*Linebackers, Defensive Backs, and Cornerbacks, not including special teams', (0, -0.001), fontsize=8)

plt.figtext(0.99, 0.01,
            '*LB, DB, and WRs not including special teams',
            fontsize=6,
            horizontalalignment='right')
plt.annotate(r"$\{$",fontsize=60,
            xy=(0.58, 0.7), xycoords='figure fraction'
            )
plt.annotate('40% increase', xy=(0.3, 12))
plt.grid(b=None, axis='x')

In [None]:
injury_prone_pos = ['Wide Receiver', 'Linebacker', 'Defensive Back']
ax = (tracks.query('Position_inj in @injury_prone_pos and isRushPass') \
    .groupby(['Position_inj','isInjuryPlay'])['isLateralMovement'].mean() * 100) \
    .unstack('isInjuryPlay').plot(kind='barh', figsize=(10, 5),
                                  title='Time Spent in Lateral Movement')

# set individual bar lables using above list
for i in ax.patches:
    # get_width pulls left or right; get_y pushes up or down
    ax.text(i.get_width()-0.7, i.get_y()+.08, \
            str(int((i.get_width())))+'%', fontsize=10,
            color='white')

plt.legend(['Non-Injury Play', 'Injury Play'])
plt.xlabel('% of Play')
plt.ylabel('')
plt.grid(b=None, axis='y')

In [None]:
fig, ax= plt.subplots(1,1, figsize=(15, 5))
sns.distplot(tracks.query('FieldType == "Natural"')['o_dir_diff'].dropna(),
             hist=False, label='Natural', color='darkgreen')
sns.distplot(tracks.query('FieldType == "Synthetic"')['o_dir_diff'].dropna(),
             hist=False, label='Synthetic', color='mediumseagreen')
ax.set_ylabel('% of play time')
ax.set_xlabel('Orientation-Movement Angle')
ax.set_title('Player movement angle by Turf Type')
ax.legend(['Natural Turf', 'Synthetic Turf'])
plt.show()

In [None]:
from scipy.stats import ks_2samp

ks_stat = ks_2samp(tracks.query('FieldType == "Natural"')['o_dir_diff'].dropna(),
        tracks.query('FieldType == "Synthetic"')['o_dir_diff'].dropna())[0]
print(f'The Kolmogorov-Smirnov statistic on 2 samples is {ks_stat:0.4f}')

In [None]:
injury_prone_pos = ['Wide Receiver', 'Linebacker', 'Defensive Back']
my_colors = list(islice(cycle(['darkgreen','mediumseagreen']), None, 3))
ax = (tracks.query('Position_inj in @injury_prone_pos and isRushPass') \
    .groupby(['Position_inj','FieldType'])['isLateralMovement'].mean() * 100) \
    .unstack('FieldType').plot(kind='barh', figsize=(10, 5),
                               title='Time Spent in Lateral Movement',
                               color=my_colors)

# set individual bar lables using above list
for i in ax.patches:
    # get_width pulls left or right; get_y pushes up or down
    ax.text(i.get_width()-0.8, i.get_y()+.08, \
            str(round(i.get_width(), 2))+'%', fontsize=10,
            color='white')

plt.xlabel('% of Play')
plt.ylabel('')
plt.show()