In [None]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import math

In [None]:
# Join a week of tracking data to plays with preprocessing criteria
def get_tracking_with_plays(filepath):
    tracking = pd.read_csv(filepath)
    plays = pd.read_csv("plays.csv")
    tracking = tracking[tracking['playId'].isin(tracking[tracking['event'] != 'fumble']['playId'].unique())]
    plays = plays[plays['playNullifiedByPenalty'] == 'N']
    tracking.loc[tracking['playDirection'] == 'left', 'x'] = 120 - tracking.loc[tracking['playDirection'] == 'left', 'x']
    tracking.loc[tracking['playDirection'] == 'left', 'y'] = (160/3) - tracking.loc[tracking['playDirection'] == 'left', 'y']
    tracking.loc[tracking['playDirection'] == 'left', 'dir'] += 180
    tracking.loc[tracking['dir'] > 360, 'dir'] -= 360
    tracking.loc[tracking['playDirection'] == 'left', 'o'] += 180
    tracking.loc[tracking['o'] > 360, 'o'] -= 360
    tracking_with_plays = tracking.merge(plays, on=['gameId', 'playId'], how='left')
    tracking_with_plays['is_on_offense'] = tracking_with_plays['club'] == tracking_with_plays['possessionTeam']
    tracking_with_plays['is_on_defence'] = tracking_with_plays['club'] == tracking_with_plays['defensiveTeam']
    tracking_with_plays['is_ballcarrier'] = tracking_with_plays['ballCarrierId'] == tracking_with_plays['nflId']
    bc_coords=tracking_with_plays.loc[tracking_with_plays['is_ballcarrier']]
    bc_coords['bc_x']=bc_coords['x']
    bc_coords['bc_y']=bc_coords['y']
    bc_coords=bc_coords[['gameId', 'playId', 'frameId', 'bc_x', 'bc_y']]
    tracking_with_plays=tracking_with_plays.merge(bc_coords, on=['gameId', 'playId', 'frameId'], how='left')
    end_frame = tracking_with_plays[tracking_with_plays['event'].isin(['tackle', 'out_of_bounds'])].groupby(['gameId', 'playId'])['frameId'].min().reset_index()
    end_frame.rename(columns={'frameId': 'frameId_end'}, inplace=True)
    start_frame = tracking_with_plays[tracking_with_plays['event'].isin(['run', 'lateral', 'run_pass_option', 'handoff', 'pass_arrived'])].groupby(['gameId', 'playId'])['frameId'].min().reset_index()
    start_frame.rename(columns={'frameId': 'frameId_start'}, inplace=True)
    tracking_with_plays = tracking_with_plays.merge(start_frame, on=['gameId', 'playId'], how='left')
    tracking_with_plays = tracking_with_plays.merge(end_frame, on=['gameId', 'playId'], how='left')
    tracking_with_plays = tracking_with_plays[(tracking_with_plays['frameId'] <= tracking_with_plays['frameId_end']) &
                                              (tracking_with_plays['frameId'] >= tracking_with_plays['frameId_start'])]
    return tracking_with_plays

In [None]:
# Load in required data
plays = pd.read_csv('plays.csv')
players = pd.read_csv('players.csv')
tracking1 = get_tracking_with_plays('tracking_week_1.csv')
tracking2 = get_tracking_with_plays('tracking_week_2.csv')
tracking3 = get_tracking_with_plays('tracking_week_3.csv')
tracking4 = get_tracking_with_plays('tracking_week_4.csv')
tracking5 = get_tracking_with_plays('tracking_week_5.csv')
tracking6 = get_tracking_with_plays('tracking_week_6.csv')
tracking7 = get_tracking_with_plays('tracking_week_7.csv')
tracking8 = get_tracking_with_plays('tracking_week_8.csv')
tracking9 = get_tracking_with_plays('tracking_week_9.csv')
weeks = [tracking1, tracking2, tracking3, tracking4, tracking5, tracking6, tracking7, tracking8, tracking9]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bc_coords['bc_x']=bc_coords['x']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bc_coords['bc_y']=bc_coords['y']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bc_coords['bc_x']=bc_coords['x']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer]

In [None]:
# Calibrate expected yards
prediction_df = pd.read_csv('ExpectedYards_v1.csv')
prediction_df = prediction_df.sort_values(by=['gameId', 'playId', 'frameId'])
first_frames = prediction_df.drop_duplicates(subset=['gameId', 'playId'])
total_yards = first_frames['ActualYards'].sum()
total_expected_yards = first_frames['ExpectedYards'].sum()
calibration_factor = total_yards / total_expected_yards
prediction_df['cExpectedYards'] = prediction_df['ExpectedYards'] * calibration_factor

In [None]:
# Identify an Evasion Play if there's a frame with 2 or less expected yards and 5 or more yards remaining
def weekly_evasion(tracking_week):
    tracking_with_pred = pd.merge(tracking_week, prediction_df, on=['gameId', 'playId', 'frameId'], how='left')
    evasion_df = tracking_with_pred[(tracking_with_pred['is_ballcarrier']) & (tracking_with_pred['cExpectedYards'] <= 2) & (tracking_with_pred['ActualYards'] >= 5)].drop_duplicates(subset=['gameId', 'playId'])
    evasion_df['Evasion YAx'] = evasion_df['ActualYards'] - evasion_df['cExpectedYards']
    teams = tracking_with_pred.groupby('gameId')['club'].unique()
    evasion_df['defense'] = evasion_df.apply(lambda row: [team for team in teams[row['gameId']] if team != 'football' and team != row['club']][0], axis=1)
    return evasion_df, after_contact_df

evasion_dfs = pd.DataFrame()
after_contact_dfs = pd.DataFrame()

for week in weeks:
    evasion_df, after_contact_df = weekly_evasion(week)
    evasion_dfs = pd.concat([evasion_dfs, evasion_df])

evasion_dfs_pos = pd.merge(evasion_dfs, players[['nflId', 'position']], on='nflId', how='left')
evasion_dfs_pos['unique_play'] = evasion_dfs_pos['gameId'].astype(str) + '_' + evasion_dfs_pos['playId'].astype(str)

# Aggregate TEP ranking table
player_evasion_rankings = evasion_dfs_pos.groupby(['nflId', 'position']).agg(
    Name=('displayName', 'first'),
    Team=('club', 'first'),
    Evasion_Plays=('unique_play', 'nunique'),
    Evasion_YAx=('Evasion YAx', 'sum'),
    Evasion_Yards=('ActualYards', 'sum')).reset_index()

player_evasion_rankings['Evasion YAx/play'] = player_evasion_rankings['Evasion_YAx'] / player_evasion_rankings['Evasion_Plays']
player_evasion_rankings['Evasion Yards/play'] = player_evasion_rankings['Evasion_Yards'] / player_evasion_rankings['Evasion_Plays']



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
# Write top 10 Evasion Plays to csv. If there's a tie in Evasion Plays, choose higher Evasion YAx
top10_evasion_plays = top10_evasion_plays.sort_values(by=['Evasion_Plays', 'Evasion_YAx'], ascending=[False, False]).head(10)
top10_evasion_plays.to_csv('top10_evasion_plays.csv')