In [1]:
import pandas as pd
import os
import numpy as np
from tqdm import tqdm
from functools import reduce

In [None]:
#Load in All 9 weeks of Counterfactuals
df1=pd.read_csv('EndZoneCFS_week1.csv', index_col=None)
df2=pd.read_csv('EndZoneCFS_week2.csv', index_col=None)
df3=pd.read_csv('EndZoneCFS_week3.csv', index_col=None)
df4=pd.read_csv('EndZoneCFS_week4.csv', index_col=None)
df5=pd.read_csv('EndZoneCFS_week5.csv', index_col=None)
df6=pd.read_csv('EndZoneCFS_week6.csv', index_col=None)
df7=pd.read_csv('EndZoneCFS_week7.csv', index_col=None)
df8=pd.read_csv('EndZoneCFS_week8.csv', index_col=None)
df9=pd.read_csv('EndZoneCFS_week9.csv', index_col=None)

In [None]:
#Cat all 9 weeks of CounterFactuals into one Dataframe
df_counterfactuals=pd.concat([df1,df2, df3, df4,df5,df6, df7,df8, df9])

In [None]:
#Calibrate the model predictions such that the sum of Yards Saved will come out to zero
df_counterfactuals['Prediction Difference']=df_counterfactuals['Prediction Difference']*1.558
df_counterfactuals['Original Prediction']=df_counterfactuals['Original Prediction']*1.558
df_counterfactuals['Projected Prediction']=df_counterfactuals['Projected Prediction']*1.558

In [None]:
#Get prediction of previous frame, and Yards remaining on previous frame
df_counterfactuals['PrevPrediction']=df_counterfactuals.groupby(['gameId', 'playId', 'nflId'])['Original Prediction'].transform(lambda x: x.shift(1))
df_counterfactuals['PrevYR']=df_counterfactuals.groupby(['gameId', 'playId', 'nflId'])['Yards Remaining'].transform(lambda x: x.shift(1))
#Shift Prediction Differences by one frame
df_counterfactuals['Prediction Difference-1']=df_counterfactuals.groupby(['gameId', 'playId', 'nflId'])['Prediction Difference'].transform(lambda x: x.shift(1))
#Calculate Yards saved as previous predictionn - change in yards - current prediction
df_counterfactuals['YardsSaved'] = df_counterfactuals['PrevPrediction'] - (df_counterfactuals['PrevYR'] - df_counterfactuals['Yards Remaining']) - df_counterfactuals['Original Prediction']
#Remove nan from df
df_counterfactuals=df_counterfactuals.loc[(df_counterfactuals['YardsSaved'].isna()==False) & (df_counterfactuals['Prediction Difference-1'].isna()==False)]

In [None]:
#Group df_counterfactuals into individual frames
frames = df_counterfactuals.groupby(['gameId', 'playId', 'frameId'])
framelist1 = []

for frame in tqdm(frames):

    #Set negative counterfactuals to zero
    frame[1].loc[frame[1]['Prediction Difference-1'] <= 0,'Prediction Difference-1'] = 0  
    #If all counterfactuals are zero, assign credit to whole team.
    if sum(frame[1].loc[:,'Prediction Difference-1']) == 0:
        frame[1].loc[:,'Prediction Difference-1'] = 1
    #Set sum of normalized YSAX to be equal to yards saved
    x = (frame[1]['YardsSaved'].iloc[0])/sum((frame[1]['Prediction Difference-1']))
    frame[1]['NormalizedYSAX'] = frame[1]['Prediction Difference-1']*x
    #Add frame to list of frames, to be catted together later
    framelist1 += [frame[1]]

In [None]:
#Cat all frames back together, save into a csv
results1 = pd.concat(framelist1, ignore_index=True)
results1.to_csv('finalrankings.csv') 

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [6]:
#Read in results of normalizing ysax
results = pd.read_csv('finalrankings.csv')

#Fill in any potential nans with zeros
results['NormalizedYSAX'] = results['NormalizedYSAX'].fillna(0)

#Read in tackle csv, set tackle value to be tackles + 0.5*assists - missed tackles
tackles = pd.read_csv(os.path.join(os.path.join("Data", str(2024)),"tackles.csv"))
tacklesum = pd.DataFrame(tackles.groupby(['nflId'])['tackle'].sum()).sort_values(by='tackle', ascending=False).reset_index()
assistsum = pd.DataFrame(tackles.groupby(['nflId'])['assist'].sum()).sort_values(by='assist', ascending=False).reset_index()
missedsum = pd.DataFrame(tackles.groupby(['nflId'])['pff_missedTackle'].sum()).sort_values(by='pff_missedTackle', ascending=False).reset_index()
totaltackles = [tacklesum,assistsum,missedsum]
totaltackles = reduce(lambda left, right: pd.merge(left,right, on = ['nflId'], how = 'outer'),totaltackles).fillna(0)
totaltackles['TackleValue'] = totaltackles['tackle'] + 0.5*totaltackles['assist'] - totaltackles['pff_missedTackle']

#Sum normalized YSAX on over frames on plays
results =pd.DataFrame({'NormalizedYSAX' : results.groupby(['nflId','playId'])['NormalizedYSAX'].sum()}).sort_values(by='NormalizedYSAX', ascending=False).reset_index()
#Count number of snaps each player played in
results['SnapCount'] = results.groupby(['nflId'])['nflId'].transform('count')
#Sum results over plays
results=pd.DataFrame({'NormalizedYSAX': results.groupby(['nflId'])['NormalizedYSAX'].sum(), 'SnapCount':results.groupby(['nflId'])['SnapCount'].sum()}).sort_values(by='NormalizedYSAX', ascending=False).reset_index()
results['SnapCount'] = results['SnapCount']**0.5
results = results[['NormalizedYSAX','nflId','SnapCount']]

#Read in players csv
players = pd.read_csv(os.path.join(os.path.join("Data", str(2024)),"players.csv"))
#Merge players, tackle value, and results into final csv
rankings = players.merge(totaltackles[['nflId','TackleValue']], how='left', on=['nflId']).fillna(0)
rankings = rankings.merge(results[['nflId','NormalizedYSAX','SnapCount']], how='left', on=['nflId']).fillna(0)
#Filter out offensive players
rankings = rankings[rankings['position'].isin(['CB', 'DB', 'DE', 'DT', 'FS', 'ILB', 'MLB', 'NT', 'OLB', 'SS'])]
#Filter by Snap Count if needed
#rankings = rankings[rankings['SnapCount'] > 0]
#Calculate Values per snap
rankings['TackleValue/Snap'] = rankings['TackleValue']/rankings['SnapCount']
rankings['NormalizedYSAX/Snap'] = rankings['NormalizedYSAX']/rankings['SnapCount']
#Remove extra columns
rankings = rankings[['displayName','nflId','position','SnapCount','NormalizedYSAX','NormalizedYSAX/Snap','TackleValue','TackleValue/Snap']]
#Sort by YSAX
rankings = rankings.sort_values(by='NormalizedYSAX', ascending=False)


In [None]:
#Get LineBacker Rankings
LB = rankings.loc[(rankings['position']=='ILB') | (rankings['position']=='OLB') | (rankings['position']=='MLB')]
LB.head(10)

In [None]:
#Get Defensive Back Rankings
S = rankings.loc[(rankings['position']=='FS') | (rankings['position']=='SS') | (rankings['position']=='CB')]
S.head(10)

In [None]:
#Get Defensive Linemen Rankings
DE = rankings.loc[(rankings['position']=='DT') | (rankings['position']=='NT') | (rankings['position']=='DE')].head(10)
DE.head(10)