In [7]:
import pandas as pd
import numpy as np
import math
import torch
from tqdm import tqdm
import pandas as pd
import warnings
import os

In [2]:
from model import load_expected_yards_model, PREPROCESS_MEAN, PREPROCESS_STD
model = load_expected_yards_model().to('cuda').eval()

In [3]:
def get_tracking_with_plays(filepath):
    tracking = pd.read_csv(filepath)
    plays = pd.read_csv("plays.csv")
    tracking = tracking[tracking['playId'].isin(tracking[tracking['event'] != 'fumble']['playId'].unique())]
    plays = plays[plays['playNullifiedByPenalty'] == 'N']
    tracking.loc[tracking['playDirection'] == 'left', 'x'] = 120 - tracking.loc[tracking['playDirection'] == 'left', 'x']
    tracking.loc[tracking['playDirection'] == 'left', 'y'] = (160/3) - tracking.loc[tracking['playDirection'] == 'left', 'y']
    tracking.loc[tracking['playDirection'] == 'left', 'dir'] += 180
    tracking.loc[tracking['dir'] > 360, 'dir'] -= 360
    tracking.loc[tracking['playDirection'] == 'left', 'o'] += 180
    tracking.loc[tracking['o'] > 360, 'o'] -= 360
    tracking_with_plays = tracking.merge(plays, on=['gameId', 'playId'], how='left')
    tracking_with_plays['is_on_offense'] = tracking_with_plays['club'] == tracking_with_plays['possessionTeam']
    tracking_with_plays['is_on_defense'] = tracking_with_plays['club'] == tracking_with_plays['defensiveTeam']
    tracking_with_plays['is_ballcarrier'] = tracking_with_plays['ballCarrierId'] == tracking_with_plays['nflId']
    bc_coords=tracking_with_plays.loc[tracking_with_plays['is_ballcarrier']]
    bc_coords['bc_x']=bc_coords['x']
    bc_coords['bc_y']=bc_coords['y']
    bc_coords=bc_coords[['gameId', 'playId', 'frameId', 'bc_x', 'bc_y']]
    tracking_with_plays=tracking_with_plays.merge(bc_coords, on=['gameId', 'playId', 'frameId'], how='left')
    end_frame = tracking_with_plays[tracking_with_plays['event'].isin(['tackle', 'out_of_bounds'])].groupby(['gameId', 'playId'])['frameId'].min().reset_index()
    end_frame.rename(columns={'frameId': 'frameId_end'}, inplace=True)
    start_frame = tracking_with_plays[tracking_with_plays['event'].isin(['run', 'lateral', 'run_pass_option', 'handoff', 'pass_arrived'])].groupby(['gameId', 'playId'])['frameId'].min().reset_index()
    start_frame.rename(columns={'frameId': 'frameId_start'}, inplace=True)
    tracking_with_plays = tracking_with_plays.merge(start_frame, on=['gameId', 'playId'], how='left')
    tracking_with_plays = tracking_with_plays.merge(end_frame, on=['gameId', 'playId'], how='left')
    tracking_with_plays = tracking_with_plays[(tracking_with_plays['frameId'] <= tracking_with_plays['frameId_end']) &
                                              (tracking_with_plays['frameId'] >= tracking_with_plays['frameId_start'])]
    return tracking_with_plays

In [4]:
def recreate_feature_tensor(feature_df, nflid, input_tensor):
    cur_count = 0
    for gid in feature_df['gameId'].unique():
        game = feature_df[feature_df['gameId'] == gid]
        for play_id, play_group in game.groupby('playId'):
            for frame_id, frame_group in play_group.groupby('frameId'):
                offense_players = frame_group[(frame_group['is_on_offense']) & (~frame_group['is_ballcarrier'])].head(10)
                defense_players = frame_group[frame_group['is_on_defense']].head(11)
                ballcarrier = frame_group[frame_group['is_ballcarrier']]
                ballcarrier_sx = ballcarrier.s * np.cos(np.deg2rad(ballcarrier.dir))
                ballcarrier_sy = ballcarrier.s * np.sin(np.deg2rad(ballcarrier.dir))
                for i, def_player in enumerate(defense_players.itertuples()):
                    
                    if def_player.nflId == nflid:
                        def_player_sx = def_player.s * np.cos(np.deg2rad(def_player.dir))
                        def_player_sy = def_player.s * np.sin(np.deg2rad(def_player.dir))
                        for j, off_player in enumerate(offense_players.itertuples()):
                            off_player_sx = off_player.s * np.cos(np.deg2rad(off_player.dir))
                            off_player_sy = off_player.s * np.sin(np.deg2rad(off_player.dir))
                            input_tensor[cur_count, 0, i, j] = off_player.x - def_player.x
                            input_tensor[cur_count, 1, i, j] = off_player.y - def_player.y
                            input_tensor[cur_count, 2, i, j] = def_player_sx
                            input_tensor[cur_count, 3, i, j] = def_player_sy
                            input_tensor[cur_count, 4, i, j] = def_player_sx - ballcarrier_sx
                            input_tensor[cur_count, 5, i, j] = def_player_sy - ballcarrier_sy
                            input_tensor[cur_count, 6, i, j] = def_player.x - ballcarrier.x
                            input_tensor[cur_count, 7, i, j] = def_player.y - ballcarrier.y
                            input_tensor[cur_count, 8, i, j] = off_player_sx - def_player_sx
                            input_tensor[cur_count, 9, i, j] = off_player_sy - def_player_sy
                        break
                cur_count += 1
        

    return input_tensor

In [5]:
def project_location(df, nflId):
    df['sx'] = df['s'] * np.cos(np.deg2rad(df['dir']))
    df['sy'] = df['s'] * np.sin(np.deg2rad(df['dir']))
    filtered_df = df[df['nflId'] == nflId].copy()
    filtered_df['prior_x'] = filtered_df['x'].shift(1)
    filtered_df['prior_sx'] = filtered_df['sx'].shift(1)
    filtered_df['prior_y'] = filtered_df['y'].shift(1)
    filtered_df['prior_sy'] = filtered_df['sy'].shift(1)
    for index, row in filtered_df.iterrows():
        if row['bc_x']>=60:
            df.at[index, 'x'] = 0

        else:
            df.at[index, 'x'] = 120
               
        if row['bc_y']>=53.3/2:
            df.at[index, 'y'] = 0

        else:
            df.at[index, 'y'] = 53.3
            
    return df

In [6]:
def player_counterfactual(df, nflId, input_tensor,prediction_df):
  sorted_predictions = prediction_df[(prediction_df['gameId'] == df['gameId'].iloc[0]) &
                                         (prediction_df['playId'] == df['playId'].iloc[0]) & prediction_df['frameId'].isin(np.unique(df['frameId']).tolist())].sort_values(by='frameId')
  original_prediction = sorted_predictions['ExpectedYards'].values
  projected_df = project_location(df.copy(), nflId)
  projected_prediction2 = model(((torch.tensor(recreate_feature_tensor(projected_df,nflId,input_tensor), dtype=torch.float).to('cuda') - PREPROCESS_MEAN) / PREPROCESS_STD)).detach().to('cpu').numpy()
  prediction_difference = projected_prediction2 - original_prediction
  max_frame = df['frameId'].max()
  ball_end_x = df.loc[(df['frameId'] == max_frame) & (df['is_ballcarrier']), 'x'].values[0]
  yards_remaining = ball_end_x - df[df['is_ballcarrier']]['x']
  player_counterfactual_df = pd.DataFrame({'nflId': nflId,
                          'gameId': df['gameId'].iloc[0],
                          'playId': df['playId'].iloc[0],
                          'frameId': df['frameId'].unique(),
                          'Yards Remaining': yards_remaining,
                          'Original Prediction': original_prediction,
                          'Projected Prediction': projected_prediction2.flatten(),
                          'Prediction Difference': prediction_difference.flatten()})
  return player_counterfactual_df

In [4]:
plays=pd.read_csv(os.path.join(os.path.join("Data", str(2024)),"plays.csv"))
games=pd.read_csv(r'Data\2024\games.csv')
tracking1 = get_tracking_with_plays(os.path.join(os.path.join("Data", str(2024)),"tracking_week_"+str(1) +".csv"))
tracking2 = get_tracking_with_plays(os.path.join(os.path.join("Data", str(2024)),"tracking_week_"+str(2) +".csv"))
tracking3 = get_tracking_with_plays(os.path.join(os.path.join("Data", str(2024)),"tracking_week_"+str(3) +".csv"))
tracking4 = get_tracking_with_plays(os.path.join(os.path.join("Data", str(2024)),"tracking_week_"+str(4) +".csv"))
tracking5 = get_tracking_with_plays(os.path.join(os.path.join("Data", str(2024)),"tracking_week_"+str(5) +".csv"))
tracking6 = get_tracking_with_plays(os.path.join(os.path.join("Data", str(2024)),"tracking_week_"+str(6) +".csv"))
tracking7 = get_tracking_with_plays(os.path.join(os.path.join("Data", str(2024)),"tracking_week_"+str(7) +".csv"))
tracking8 = get_tracking_with_plays(os.path.join(os.path.join("Data", str(2024)),"tracking_week_"+str(8) +".csv"))
tracking9 = get_tracking_with_plays(os.path.join(os.path.join("Data", str(2024)),"tracking_week_"+str(9) +".csv"))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bc_coords['bc_x']=bc_coords['x']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bc_coords['bc_y']=bc_coords['y']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bc_coords['bc_x']=bc_coords['x']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer]

In [5]:
def get_tracking(week_num):
    if week_num==1:
        return tracking1
    elif week_num==2:
        return tracking2
    elif week_num==3:
        return tracking3
    elif week_num==4:
        return tracking4
    elif week_num==5:
        return tracking5
    elif week_num==6:
        return tracking6
    elif week_num==7:
        return tracking7
    elif week_num==8:
        return tracking8
    else:
        return tracking9

In [9]:
def full_counterfactuals(week_number):
  full_counterfactual_df = pd.DataFrame()
  prediction_df = pd.read_csv('ExpectedYards_v1.csv')
  counterfactual_dfs = []
  tracking = get_tracking(week_number)
  cur_x = np.load(f'week_{week_number}_x_new.npy')
  cur_r = torch.tensor(np.load(f'week_{week_number}_r_new.npy'),dtype= torch.int64)
  distinct_plays = plays.sort_values(by=['gameId', 'playId'])
  count = 0
  ls = []
  for i, row in tqdm(distinct_plays.iterrows()):
    week_numbertemp = int(games.loc[games['gameId']==row['gameId']]['week'])
    if week_numbertemp == week_number:
      try:
        players = tracking.loc[(tracking['gameId']==row['gameId']) & (tracking['playId']==row['playId']) & (tracking['is_on_defense']==True)]['nflId'].unique()
        inds = (cur_r[:,0] == row['gameId']) & (cur_r[:,1] == row['playId'])
        input_tensor = cur_x[inds,:]
        for j in players:
          player_counterfactual_df = player_counterfactual(tracking.loc[(tracking['playId'] == int(row['playId'])) & (tracking['gameId'] == int(row['gameId'])) & (tracking['frameId'].isin(cur_r[inds][:,2].tolist()))], j,input_tensor.copy(),prediction_df)
          counterfactual_dfs.append(player_counterfactual_df)
        count += 1
      except:
        ls += [[row['gameId'],row['playId']]]
    else:
      pass
  full_counterfactual_df = pd.concat(counterfactual_dfs, ignore_index=True)
  return full_counterfactual_df, ls

In [10]:
warnings.filterwarnings('ignore')
plays=pd.read_csv(os.path.join(os.path.join("Data", str(2024)),"plays.csv"))
games=pd.read_csv(r'Data\2024\games.csv')
for week_number in range(1,10):
    df, ls= full_counterfactuals(week_number)
    df.to_csv('EndZoneCFS_week'+ str(week_number) + '.csv')
    torch.save(ls,'EndZoneRemovedPlays_week' + str(week_number) + '.pt')
    print(week_number,len(ls))

12486it [1:20:53,  2.57it/s]  


1 0


12486it [1:19:41,  2.61it/s]  


2 0


12486it [1:23:05,  2.50it/s]  


3 0


12486it [1:21:34,  2.55it/s]  


4 0


12486it [1:27:28,  2.38it/s]  


5 0


12486it [1:16:16,  2.73it/s]  


6 0


12486it [1:17:05,  2.70it/s] 


7 0


12486it [1:18:56,  2.64it/s] 


8 0


12486it [59:45,  3.48it/s]  


9 0
