In [None]:
import pandas as pd
import numpy as np
import math
import torch
from tqdm import tqdm
import pandas as pd
import warnings
import os

In [None]:
from model import load_expected_yards_model, PREPROCESS_MEAN, PREPROCESS_STD
#Load in model
model = load_expected_yards_model().to('cuda').eval()

In [None]:
def get_tracking_with_plays(filepath):
    '''
    Get Tracking data oriented in the same direction with appropriate frams filtered out
    '''
    #Load in tracking week
    tracking = pd.read_csv(filepath)
    #Load in plays
    plays=pd.read_csv(os.path.join(os.path.join("Data", str(2024)),"plays.csv"))
    #Remove all fumble plays, and plays nullified by penalties
    tracking = tracking[tracking['playId'].isin(tracking[tracking['event'] != 'fumble']['playId'].unique())]
    plays = plays[plays['playNullifiedByPenalty'] == 'N']
    #Orient plays to be all in one direction
    tracking.loc[tracking['playDirection'] == 'left', 'x'] = 120 - tracking.loc[tracking['playDirection'] == 'left', 'x']
    tracking.loc[tracking['playDirection'] == 'left', 'y'] = (160/3) - tracking.loc[tracking['playDirection'] == 'left', 'y']
    tracking.loc[tracking['playDirection'] == 'left', 'dir'] += 180
    tracking.loc[tracking['dir'] > 360, 'dir'] -= 360
    tracking.loc[tracking['playDirection'] == 'left', 'o'] += 180
    tracking.loc[tracking['o'] > 360, 'o'] -= 360
    #Merge with plays df plays to get ball carrier info
    tracking_with_plays = tracking.merge(plays, on=['gameId', 'playId'], how='left')
    tracking_with_plays['is_on_offense'] = tracking_with_plays['club'] == tracking_with_plays['possessionTeam']
    tracking_with_plays['is_on_defense'] = tracking_with_plays['club'] == tracking_with_plays['defensiveTeam']
    tracking_with_plays['is_ballcarrier'] = tracking_with_plays['ballCarrierId'] == tracking_with_plays['nflId']
    #Get balcarrier coordinates
    bc_coords=tracking_with_plays.loc[tracking_with_plays['is_ballcarrier']]
    bc_coords['bc_x']=bc_coords['x']
    bc_coords['bc_y']=bc_coords['y']
    bc_coords=bc_coords[['gameId', 'playId', 'frameId', 'bc_x', 'bc_y']]
    tracking_with_plays=tracking_with_plays.merge(bc_coords, on=['gameId', 'playId', 'frameId'], how='left')
    #Get end and start frame for the paly
    end_frame = tracking_with_plays[tracking_with_plays['event'].isin(['tackle', 'out_of_bounds'])].groupby(['gameId', 'playId'])['frameId'].min().reset_index()
    end_frame.rename(columns={'frameId': 'frameId_end'}, inplace=True)
    start_frame = tracking_with_plays[tracking_with_plays['event'].isin(['run', 'lateral', 'run_pass_option', 'handoff', 'pass_arrived'])].groupby(['gameId', 'playId'])['frameId'].min().reset_index()
    start_frame.rename(columns={'frameId': 'frameId_start'}, inplace=True)
    tracking_with_plays = tracking_with_plays.merge(start_frame, on=['gameId', 'playId'], how='left')
    tracking_with_plays = tracking_with_plays.merge(end_frame, on=['gameId', 'playId'], how='left')
    #Filter out frames that are before or after the end of the handoff and tackle
    tracking_with_plays = tracking_with_plays[(tracking_with_plays['frameId'] <= tracking_with_plays['frameId_end']) &
                                              (tracking_with_plays['frameId'] >= tracking_with_plays['frameId_start'])]
    return tracking_with_plays

In [None]:
def recreate_feature_tensor(feature_df, nflid, input_tensor):
    '''
    Takes in previous input tensor and updates it to take players updated location for counter factuals
    '''
    #Set count = 0
    cur_count = 0
    #Iterate over gameIds, which will just be one in this version of the function
    for gid in feature_df['gameId'].unique():
        #Set game = only plays within that game
        game = feature_df[feature_df['gameId'] == gid]
        #Iterate over plays (theres only one in this version)
        for play_id, play_group in game.groupby('playId'):
            #Iterate over all frames in the play
            for frame_id, frame_group in play_group.groupby('frameId'):
                #Get variables to go into tensor
                offense_players = frame_group[(frame_group['is_on_offense']) & (~frame_group['is_ballcarrier'])].head(10)
                defense_players = frame_group[frame_group['is_on_defense']].head(11)
                ballcarrier = frame_group[frame_group['is_ballcarrier']]
                ballcarrier_sx = ballcarrier.s * np.cos(np.deg2rad(ballcarrier.dir))
                ballcarrier_sy = ballcarrier.s * np.sin(np.deg2rad(ballcarrier.dir))
                #Itterate over defensive player
                for i, def_player in enumerate(defense_players.itertuples()):
                    #If the defensive player is the player whose counterfactuals we are looking at, update the tensor for that player only
                    if def_player.nflId == nflid:
                        #Get current players speed components
                        def_player_sx = def_player.s * np.cos(np.deg2rad(def_player.dir))
                        def_player_sy = def_player.s * np.sin(np.deg2rad(def_player.dir))
                        #Iterate over offensive players
                        for j, off_player in enumerate(offense_players.itertuples()):
                            #Get offensive players speed components
                            off_player_sx = off_player.s * np.cos(np.deg2rad(off_player.dir))
                            off_player_sy = off_player.s * np.sin(np.deg2rad(off_player.dir))
                            #Update tensor with values
                            input_tensor[cur_count, 0, i, j] = off_player.x - def_player.x
                            input_tensor[cur_count, 1, i, j] = off_player.y - def_player.y
                            input_tensor[cur_count, 2, i, j] = def_player_sx
                            input_tensor[cur_count, 3, i, j] = def_player_sy
                            input_tensor[cur_count, 4, i, j] = def_player_sx - ballcarrier_sx
                            input_tensor[cur_count, 5, i, j] = def_player_sy - ballcarrier_sy
                            input_tensor[cur_count, 6, i, j] = def_player.x - ballcarrier.x
                            input_tensor[cur_count, 7, i, j] = def_player.y - ballcarrier.y
                            input_tensor[cur_count, 8, i, j] = off_player_sx - def_player_sx
                            input_tensor[cur_count, 9, i, j] = off_player_sy - def_player_sy
                        #break loop because only player that needs to be updated has been updated
                        break
                cur_count += 1
        

    return input_tensor

In [None]:
def project_location(df, nflId):
    #Get x and y components of speed
    df['sx'] = df['s'] * np.cos(np.deg2rad(df['dir']))
    df['sy'] = df['s'] * np.sin(np.deg2rad(df['dir']))
    #Get specific nfl player
    filtered_df = df[df['nflId'] == nflId].copy()
    #Shift values to get previous values
    filtered_df['prior_x'] = filtered_df['x'].shift(1)
    filtered_df['prior_sx'] = filtered_df['sx'].shift(1)
    filtered_df['prior_y'] = filtered_df['y'].shift(1)
    filtered_df['prior_sy'] = filtered_df['sy'].shift(1)
    #Iterate through frames
    for index, row in filtered_df.iterrows():
        #Set player to be in opposite corner of ball_carrier
        if row['bc_x']>=60:
            df.at[index, 'x'] = 0

        else:
            df.at[index, 'x'] = 120
               
        if row['bc_y']>=53.3/2:
            df.at[index, 'y'] = 0

        else:
            df.at[index, 'y'] = 53.3
            
    return df

In [None]:
def player_counterfactual(df, nflId, input_tensor,prediction_df):
  '''
  Returns Dataframe of counterfactual predictions for target player
  '''
  #Get Expected yards values
  sorted_predictions = prediction_df[(prediction_df['gameId'] == df['gameId'].iloc[0]) &
                                         (prediction_df['playId'] == df['playId'].iloc[0]) & prediction_df['frameId'].isin(np.unique(df['frameId']).tolist())].sort_values(by='frameId')
  original_prediction = sorted_predictions['ExpectedYards'].values
  #Project player to corner and calculate new prediction
  projected_df = project_location(df.copy(), nflId)
  projected_prediction2 = model(((torch.tensor(recreate_feature_tensor(projected_df,nflId,input_tensor), dtype=torch.float).to('cuda') - PREPROCESS_MEAN) / PREPROCESS_STD)).detach().to('cpu').numpy()
  #Calculate Counter Factual Difference
  prediction_difference = projected_prediction2 - original_prediction
  #Calculate Yards remaining
  max_frame = df['frameId'].max()
  ball_end_x = df.loc[(df['frameId'] == max_frame) & (df['is_ballcarrier']), 'x'].values[0]
  yards_remaining = ball_end_x - df[df['is_ballcarrier']]['x']
  #Set Dataframe for input player
  player_counterfactual_df = pd.DataFrame({'nflId': nflId,
                          'gameId': df['gameId'].iloc[0],
                          'playId': df['playId'].iloc[0],
                          'frameId': df['frameId'].unique(),
                          'Yards Remaining': yards_remaining,
                          'Original Prediction': original_prediction,
                          'Projected Prediction': projected_prediction2.flatten(),
                          'Prediction Difference': prediction_difference.flatten()})
  #Return player's counterfactual dataframe
  return player_counterfactual_df

In [None]:
#Load in plays, games and all tracking
plays=pd.read_csv(os.path.join(os.path.join("Data", str(2024)),"plays.csv"))
games=pd.read_csv(r'Data\2024\games.csv')
tracking1 = get_tracking_with_plays(os.path.join(os.path.join("Data", str(2024)),"tracking_week_"+str(1) +".csv"))
tracking2 = get_tracking_with_plays(os.path.join(os.path.join("Data", str(2024)),"tracking_week_"+str(2) +".csv"))
tracking3 = get_tracking_with_plays(os.path.join(os.path.join("Data", str(2024)),"tracking_week_"+str(3) +".csv"))
tracking4 = get_tracking_with_plays(os.path.join(os.path.join("Data", str(2024)),"tracking_week_"+str(4) +".csv"))
tracking5 = get_tracking_with_plays(os.path.join(os.path.join("Data", str(2024)),"tracking_week_"+str(5) +".csv"))
tracking6 = get_tracking_with_plays(os.path.join(os.path.join("Data", str(2024)),"tracking_week_"+str(6) +".csv"))
tracking7 = get_tracking_with_plays(os.path.join(os.path.join("Data", str(2024)),"tracking_week_"+str(7) +".csv"))
tracking8 = get_tracking_with_plays(os.path.join(os.path.join("Data", str(2024)),"tracking_week_"+str(8) +".csv"))
tracking9 = get_tracking_with_plays(os.path.join(os.path.join("Data", str(2024)),"tracking_week_"+str(9) +".csv"))

In [None]:
def get_tracking(week_num):
    '''
    Returns Tracking for the input week
    '''
    if week_num==1:
        return tracking1
    elif week_num==2:
        return tracking2
    elif week_num==3:
        return tracking3
    elif week_num==4:
        return tracking4
    elif week_num==5:
        return tracking5
    elif week_num==6:
        return tracking6
    elif week_num==7:
        return tracking7
    elif week_num==8:
        return tracking8
    else:
        return tracking9

In [None]:
def full_counterfactuals(week_number):
  #Read in plays and game csvs
  plays=pd.read_csv(os.path.join(os.path.join("Data", str(2024)),"plays.csv"))
  games=pd.read_csv(r'Data\2024\games.csv')
  #Create empty counterfactual df, and list to store all counterfactuals
  full_counterfactual_df = pd.DataFrame()
  counterfactual_dfs = []
  #Read in expected yards csv
  prediction_df = pd.read_csv('ExpectedYards_v1.csv')
  #Read in tracking, input tensors and reference tensors for input week
  tracking = get_tracking(week_number)
  cur_x = np.load(f'week_{week_number}_x_new.npy')
  cur_r = torch.tensor(np.load(f'week_{week_number}_r_new.npy'),dtype= torch.int64)
  #Get list of distinct plays
  distinct_plays = plays.sort_values(by=['gameId', 'playId'])
  count = 0
  ls = []
  #Iterate through all plays
  for i, row in tqdm(distinct_plays.iterrows()):
    week_numbertemp = int(games.loc[games['gameId']==row['gameId']]['week'])
    #if week for play is the same as target week, do full counterfactuals
    if week_numbertemp == week_number:
      try:
        #Get List of players in play
        players = tracking.loc[(tracking['gameId']==row['gameId']) & (tracking['playId']==row['playId']) & (tracking['is_on_defense']==True)]['nflId'].unique()
        #Get input tensors for the play
        inds = (cur_r[:,0] == row['gameId']) & (cur_r[:,1] == row['playId'])
        input_tensor = cur_x[inds,:]
        #Iterate over players to do counterfactuals for each 
        for j in players:
          player_counterfactual_df = player_counterfactual(tracking.loc[(tracking['playId'] == int(row['playId'])) & (tracking['gameId'] == int(row['gameId'])) & (tracking['frameId'].isin(cur_r[inds][:,2].tolist()))], j,input_tensor.copy(),prediction_df)
          counterfactual_dfs.append(player_counterfactual_df)
        count += 1
      except:
        ls += [[row['gameId'],row['playId']]]
    else:
      pass
  #Cat full list of counterfactuals together
  full_counterfactual_df = pd.concat(counterfactual_dfs, ignore_index=True)
  return full_counterfactual_df, ls

In [None]:
warnings.filterwarnings('ignore')
#Iterate over weeks
for week_number in range(1,10):
    #Get full counterfactuals for week, save full set of counterfactuals to a csv
    df, ls= full_counterfactuals(week_number)
    df.to_csv('EndZoneCFS_week'+ str(week_number) + '.csv')
    print(week_number)