In [3]:
import math
import numpy as np
import pandas as pd
from pandas import Series
pd.set_option('max_columns', None)
pd.set_option('display.max_colwidth', None)

from matplotlib import pyplot as plt
import plotly.express as px

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from sklearn.preprocessing import LabelEncoder

from xgboost import XGBClassifier

In [4]:
# loading in data
weeks = []
for i in range(8):
    weeks.append(pd.read_csv('/Users/alexchoi/Downloads/nfl-big-data-bowl-2023/week' + str(i + 1) + '.csv'))
tracking = pd.concat(weeks)

In [4]:
players_data = pd.read_csv('/Users/alexchoi/Downloads/nfl-big-data-bowl-2023/players.csv')
scouting_data = pd.read_csv('/Users/alexchoi/Downloads/nfl-big-data-bowl-2023/pffScoutingData.csv')

#initializes the elo_dict
elo_dict = {}
for index, player in players_data.iterrows():
    elo_dict[player["nflId"]] = {"elo" : 1000, 
    "position": player["officialPosition"], 
    "displayName": player["displayName"], 
    "snaps": 0}

sacks = 0
hits = 0
hurry = 0
snap = 0

#calculates and updates the elo
def calculateElo(offense,  play):
    global sacks
    global hits
    global hurry
    global snap

    #goes through all the blockers and creates dict of who is being blocked
    blockedPlayers = {}
    for index, row in offense.iterrows():
        blockedPlayer = row['pff_nflIdBlockedPlayer']
        if blockedPlayer not in blockedPlayers:
            blockedPlayers[blockedPlayer] = [row['nflId']]
        else:
            blockedPlayers[blockedPlayer] += [row['nflId']]

    #iterates thought every blocked player
    for blockedPlayer in blockedPlayers.keys():
        player = play[play['nflId'] == blockedPlayer]
        numBlockers = len(blockedPlayers[blockedPlayer])
        blockerElo = 0

        #cumulates the total elo of blockers against defender
        for blockerId in blockedPlayers[blockedPlayer]:
            elo_dict[blockerId]["snaps"] += 1
            if numBlockers > 1:
                blockerElo += elo_dict[blockerId]["elo"] 
            else:
                blockerElo = elo_dict[blockerId]["elo"]


        if blockedPlayer not in elo_dict.keys():
            elo_dict[blockedPlayer] = {"elo" : 1000, "position": "N/A", "snaps": 1, "displayName": "N/A"}
        else:
            elo_dict[blockedPlayer]["snaps"] = elo_dict[blockedPlayer]["snaps"] + 1


        #calculates expected score
        blockedPlayerExpected = 1 / (1 + pow(10, ((blockerElo - elo_dict[blockedPlayer]["elo"]) / 400)))
        blockingPlayerExpected = 1 / (1 + pow(10, ((elo_dict[blockedPlayer]["elo"] - blockerElo) / 400)))

        snap += 1
        #cases and updates elo dictionary
        if (player['pff_sack'] == 1.0).all():
            elo_dict[blockedPlayer]["elo"] = elo_dict[blockedPlayer]["elo"] + (40 * (0.94 - blockedPlayerExpected) * numBlockers)
            sacks += 1
            for blockerId in blockedPlayers[blockedPlayer]:
                elo_dict[blockerId]["elo"] = elo_dict[blockerId]["elo"] + 40 * (0.06 - blockingPlayerExpected) 

        elif ((player['pff_hit'] == 1.0).all() or (player['pff_hurry'] == 1.0).all()):
            bt = elo_dict[blockedPlayer]["elo"] / (elo_dict[blockedPlayer]["elo"] + blockerElo)

            elo_dict[blockedPlayer]["elo"] = elo_dict[blockedPlayer]["elo"] + (40 * (0.91 - blockedPlayerExpected) * numBlockers)
            hits += 1
            for blockerId in blockedPlayers[blockedPlayer]:
                elo_dict[blockerId]["elo"] = elo_dict[blockerId]["elo"] + 40 * (0.09 - blockingPlayerExpected) 

        else:
            elo_dict[blockedPlayer]["elo"] = elo_dict[blockedPlayer]["elo"] + (40 * (0.14 - blockedPlayerExpected) * numBlockers)
            for blockerId in blockedPlayers[blockedPlayer]:
                elo_dict[blockerId]["elo"] = elo_dict[blockerId]["elo"] + 40 * (0.86 - blockingPlayerExpected)

        
#filters game id's
gameIds = np.array(scouting_data['gameId'])
gameIds = np.unique(gameIds)



for gameId in gameIds:
    game = scouting_data[scouting_data['gameId'] == gameId]

    #filters play id's
    playIds = np.array(game['playId'])
    playIds = np.unique(playIds)

    for playId in playIds:
        play = game[game['playId'] == playId]
        #gets the offensive players
        Offense = play[play['pff_role'] == "Pass Block"]
        calculateElo(Offense, play)

0.0571762625830553
0.08617788804288933
0.0
0.8566458493740554
0.9428237374169447
0.9138221119571107
1.0
0.1433541506259446


In [17]:
# Filters to get the n best players WITH position

filtered_data = {k: v for k, v in elo_dict.items() if (v["position"] == "DE" or v["position"] == "DT") and v["snaps"] > 50}
n = 10000  # number of keys to get
sorted_keys = sorted(filtered_data, key=lambda x: filtered_data[x]["elo"], reverse=True)[:n]
positions = []
elo = []
for key in sorted_keys:
    print(elo_dict[key]["displayName"])
    # print(elo_dict[key]["position"])
    # print(elo_dict[key]["elo"])
    elo.append(elo_dict[key]["elo"])
    positions.append(elo_dict[key]["position"])
    print(elo_dict[key]["snaps"])

Aaron Donald
244
B.J. Hill
135
Javon Hargrave
172
Osa Odighizuwa
171
Shelby Harris
158
Jeffery Simmons
266
Christian Barmore
182
Star Lotulelei
108
Solomon Thomas
126
Akiem Hicks
97
Dexter Lawrence
173
Leonard Williams
233
Jonathan Allen
217
J.J. Watt
178
Quinton Jefferson
159
Justin Zimmer
94
Mario Edwards
62
Grady Jarrett
183
DeMarcus Walker
87
DeForest Buckner
213
William Gholston
117
Jarran Reed
169
Lawrence Guy
82
Teair Tart
91
Tanoh Kpassagnon
107
Derrick Brown
158
Marcus Davenport
52
DaQuan Jones
130
Larry Ogunjobi
208
Sheldon Richardson
126
Nathan Shepherd
72
Folorunso Fatukasi
121
Maliek Collins
137
Malcom Brown
115
Daron Payne
227
Dre'Mont Jones
185
Christian Wilkins
153
Al Woods
126
Carlos Watkins
83
Myles Garrett
191
Fletcher Cox
175
Darius Philon
75
Adam Gotsis
89
Nick Bosa
146
Chris Jones
141
Matt Ioannidis
145
Cameron Heyward
194
Bravvion Roy
53
Roy Robertson-Harris
75
Grover Stewart
137
Jerry Tillery
174
Tyquan Lewis
163
Malcolm Roach
90
Arik Armstead
158
Jerry Hughes
1

In [6]:
# Filters to get the n best players regardless of position

filter_snaps = {k: v for k, v in elo_dict.items() if v["snaps"] > 50}
n = 50  # number of keys to get
sorted_keys = sorted(filter_snaps, key=lambda x: filter_snaps[x]["elo"], reverse=True)[:n]
for key in sorted_keys:
    print(elo_dict[key]["displayName"])
    print(elo_dict[key]["position"])
    print(elo_dict[key]["elo"])
    # print(elo_dict[key]["snaps"])

Aaron Donald
DT
1448.8901623218408
Kenny Clark
NT
1415.4793469228857
Greg Gaines
NT
1301.2274590516715
B.J. Hill
DT
1290.328355646834
Javon Hargrave
DT
1287.222187055183
Osa Odighizuwa
DT
1260.632868770668
Shelby Harris
DE
1251.683160016249
Jeffery Simmons
DT
1250.7862066440684
Christian Barmore
DT
1246.5568849505619
Star Lotulelei
DT
1243.2905967984116
Solomon Thomas
DT
1242.9684072038392
Tristan Wirfs
T
1230.5603220526523
Akiem Hicks
DT
1227.7264460985837
Dexter Lawrence
DE
1224.2540672435644
Leonard Williams
DT
1214.1729974388106
Jonathan Allen
DT
1210.4373771809871
J.J. Watt
DE
1210.2193548018965
Quinton Jefferson
DT
1199.3457977945866
Justin Zimmer
DT
1186.7630017270135
Mario Edwards
DT
1177.7635315017224
Grady Jarrett
DT
1177.230182131733
Josh Jones
G
1176.4061479340214
Rob Havenstein
T
1176.1380113216126
Andrew Whitworth
T
1173.2827553434158
Austin Johnson
NT
1168.86606134487
Terence Steele
T
1168.147224404362
Oday Aboushi
G
1160.7945792023622
Braden Smith
T
1159.3687441714396
D