In [None]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import math

In [None]:
# Join a week of tracking data to plays with preprocessing criteria
def get_tracking_with_plays(filepath):
    tracking = pd.read_csv(filepath)
    plays = pd.read_csv("plays.csv")
    tracking = tracking[tracking['playId'].isin(tracking[tracking['event'] != 'fumble']['playId'].unique())]
    plays = plays[plays['playNullifiedByPenalty'] == 'N']
    tracking.loc[tracking['playDirection'] == 'left', 'x'] = 120 - tracking.loc[tracking['playDirection'] == 'left', 'x']
    tracking.loc[tracking['playDirection'] == 'left', 'y'] = (160/3) - tracking.loc[tracking['playDirection'] == 'left', 'y']
    tracking.loc[tracking['playDirection'] == 'left', 'dir'] += 180
    tracking.loc[tracking['dir'] > 360, 'dir'] -= 360
    tracking.loc[tracking['playDirection'] == 'left', 'o'] += 180
    tracking.loc[tracking['o'] > 360, 'o'] -= 360
    tracking_with_plays = tracking.merge(plays, on=['gameId', 'playId'], how='left')
    tracking_with_plays['is_on_offense'] = tracking_with_plays['club'] == tracking_with_plays['possessionTeam']
    tracking_with_plays['is_on_defence'] = tracking_with_plays['club'] == tracking_with_plays['defensiveTeam']
    tracking_with_plays['is_ballcarrier'] = tracking_with_plays['ballCarrierId'] == tracking_with_plays['nflId']
    bc_coords=tracking_with_plays.loc[tracking_with_plays['is_ballcarrier']]
    bc_coords['bc_x']=bc_coords['x']
    bc_coords['bc_y']=bc_coords['y']
    bc_coords=bc_coords[['gameId', 'playId', 'frameId', 'bc_x', 'bc_y']]
    tracking_with_plays=tracking_with_plays.merge(bc_coords, on=['gameId', 'playId', 'frameId'], how='left')
    end_frame = tracking_with_plays[tracking_with_plays['event'].isin(['tackle', 'out_of_bounds'])].groupby(['gameId', 'playId'])['frameId'].min().reset_index()
    end_frame.rename(columns={'frameId': 'frameId_end'}, inplace=True)
    start_frame = tracking_with_plays[tracking_with_plays['event'].isin(['run', 'lateral', 'run_pass_option', 'handoff', 'pass_arrived'])].groupby(['gameId', 'playId'])['frameId'].min().reset_index()
    start_frame.rename(columns={'frameId': 'frameId_start'}, inplace=True)
    tracking_with_plays = tracking_with_plays.merge(start_frame, on=['gameId', 'playId'], how='left')
    tracking_with_plays = tracking_with_plays.merge(end_frame, on=['gameId', 'playId'], how='left')
    tracking_with_plays = tracking_with_plays[(tracking_with_plays['frameId'] <= tracking_with_plays['frameId_end']) &
                                              (tracking_with_plays['frameId'] >= tracking_with_plays['frameId_start'])]
    return tracking_with_plays

In [None]:
# Load in required data
players = pd.read_csv('players.csv')
tracking1 = get_tracking_with_plays('tracking_week_1.csv')
tracking2 = get_tracking_with_plays('tracking_week_2.csv')
tracking3 = get_tracking_with_plays('tracking_week_3.csv')
tracking4 = get_tracking_with_plays('tracking_week_4.csv')
tracking5 = get_tracking_with_plays('tracking_week_5.csv')
tracking6 = get_tracking_with_plays('tracking_week_6.csv')
tracking7 = get_tracking_with_plays('tracking_week_7.csv')
tracking8 = get_tracking_with_plays('tracking_week_8.csv')
tracking9 = get_tracking_with_plays('tracking_week_9.csv')
weeks = [tracking1, tracking2, tracking3, tracking4, tracking5, tracking6, tracking7, tracking8, tracking9]

In [None]:
# Calibrate expected yards
prediction_df = pd.read_csv('ExpectedYards_v1.csv')
prediction_df = prediction_df.sort_values(by=['gameId', 'playId', 'frameId'])
first_frames = prediction_df.drop_duplicates(subset=['gameId', 'playId'])
total_yards = first_frames['ActualYards'].sum()
total_expected_yards = first_frames['ExpectedYards'].sum()
calibration_factor = total_yards / total_expected_yards
prediction_df['cExpectedYards'] = prediction_df['ExpectedYards'] * calibration_factor

In [None]:
# Calculate YAx from first frame of ball carrier
def weekly_YAx(tracking_week):
    tracking_with_pred = pd.merge(tracking_week, prediction_df, on=['gameId', 'playId', 'frameId'], how='left')
    start_frame_df = tracking_with_pred[tracking_with_pred['is_ballcarrier']].drop_duplicates(subset=['gameId', 'playId'])
    start_frame_df['YAx'] = start_frame_df['ActualYards'] - start_frame_df['cExpectedYards']
    teams = tracking_with_pred.groupby('gameId')['club'].unique()
    start_frame_df['defense'] = start_frame_df.apply(lambda row: [team for team in teams[row['gameId']] if team != 'football' and team != row['club']][0], axis=1)
    return start_frame_df

start_frame_dfs = pd.DataFrame()

for week in weeks:
    start_frame_df = weekly_YAx(week)
    start_frame_dfs = pd.concat([start_frame_dfs, start_frame_df])

start_frame_dfs_pos = evasion_dfs_with_position = pd.merge(start_frame_dfs, players[['nflId', 'position']], on='nflId', how='left')
start_frame_dfs_pos['unique_play'] = start_frame_dfs_pos['gameId'].astype(str) + '_' + start_frame_dfs_pos['playId'].astype(str)

# Aggregate YAx ranking table for players, team offense, team defense
player_rankings = start_frame_dfs_pos.groupby(['nflId', 'position']).agg(
    Name=('displayName', 'first'),
    Team=('club', 'first'),
    Plays=('unique_play', 'nunique'),
    YAx=('YAx', 'sum'),
    Yards_From_Touch=('ActualYards', 'sum')).reset_index()
player_rankings['YAx/play'] = player_rankings['YAx'] / player_rankings['Plays']
player_rankings['Yards From Touch/play'] = player_rankings['Yards_From_Touch'] / player_rankings['Plays']

offense_rankings = start_frame_dfs.groupby('club').agg(
    Plays=('playId', 'nunique'),
    YAx=('YAx', 'sum'),
    Yards_From_Touch=('ActualYards', 'sum')).reset_index()
offense_rankings['YAx/play'] = offense_rankings['YAx'] / offense_rankings['Plays']
offense_rankings['Yards From Touch/play'] = offense_rankings['Yards_From_Touch'] / offense_rankings['Plays']

defense_rankings = start_frame_dfs.groupby('defense').agg(
    Plays=('playId', 'nunique'),
    YAx_Allowed=('YAx', 'sum'),
    Yards_From_Touch_Allowed=('ActualYards', 'sum')).reset_index()
defense_rankings['YAx Allowed/play'] = defense_rankings['YAx_Allowed'] / defense_rankings['Plays']
defense_rankings['Yards From Touch Allowed/play'] = defense_rankings['Yards_From_Touch_Allowed'] / defense_rankings['Plays']
defense_rankings['club'] = defense_rankings['defense']

In [None]:
# Write top 10 YAx rankings to csv
top10_YAx = player_rankings.sort_values(by='YAx', ascending = False).head(10)
top10_YAx.to_csv('top10_YAx.csv')
# Write team YAx of offense and defense to csv
full_yax_df = pd.merge(offense_rankings, defense_rankings, on=['club'], how='left')
full_yax_df.to_csv('full_yax_df.csv')