In [2]:
import pandas as pd
import numpy as np
import scipy as sp
import time
import os
import json
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.path as mpltPath
from matplotlib.collections import PatchCollection
from matplotlib.animation import FuncAnimation
from matplotlib.lines import Line2D
from shapely.geometry import Polygon, box, LineString, Point
from shapely.ops import unary_union
from scipy.spatial import Voronoi, cKDTree, ConvexHull, Delaunay
from concurrent.futures import ProcessPoolExecutor, as_completed
import utils as NFLUtils

### Format

In [3]:
expected_yards = pd.read_csv('./data/eval_frame_df.csv')
expected_yards.head()

Unnamed: 0,gameId,playId,frameId,frameId_start,frameId_end,startFootballX,endFootballX,currFootballX,yardsByCarrier,gainedYardsByCarrier,remainingYardsByCarrier,expectedRemainingYardsByCarrier,expectedYardsByCarrier
0,2022092200,56,20,20.0,47.0,29.5,36.400002,29.5,6.900002,0.0,6.900002,4.692643,4.692643
1,2022092200,56,21,20.0,47.0,29.5,36.400002,29.550003,6.900002,0.050003,6.849998,4.394129,4.444132
2,2022092200,56,22,20.0,47.0,29.5,36.400002,29.709999,6.900002,0.209999,6.690002,4.390025,4.600024
3,2022092200,56,23,20.0,47.0,29.5,36.400002,29.959999,6.900002,0.459999,6.440002,4.142516,4.602515
4,2022092200,56,24,20.0,47.0,29.5,36.400002,30.279999,6.900002,0.779999,6.120003,4.147831,4.927829


In [5]:
play = pd.read_csv('./games/2022090800_LA_BUF/56/tpc_per_frame_unweighted.csv', index_col=0)
play.head()

Unnamed: 0,38577,41239,42816,43294,43298,43335,47844,47917,48026,52607,53532
6,1.8407,0.0,0.0,197.6369,3.7853,0.0,0.0,246.0608,0.0,0.0,7.184
7,2.6687,0.0,0.0,197.7528,3.1367,0.0,0.0,227.5581,0.0,0.0,6.718
8,3.5493,-0.0,0.0,197.1863,2.4883,0.0,0.0,207.6185,-0.0,0.0,5.5805
9,4.4418,0.0,0.0,196.1758,1.8154,0.0,0.0,185.4501,0.0,0.0,4.2674
10,5.1009,0.0,0.0,194.1843,1.2621,0.0,0.0,159.3285,0.0,0.0,3.075


### Unweighted Results

In [27]:
######## This iterates through the tpc files, normalizes each frame to sum to one, and multiplies each row by the expected yards_{frame+1} - expected_yards_{frame}
# Define the path to the 'games' folder
games_folder = './games'

# Iterate over each game folder
for game_folder in os.listdir(games_folder):
    game_path = os.path.join(games_folder, game_folder)
    if os.path.isdir(game_path):
        # Extract gameId from the folder name
        gameId = int(game_folder.split('_')[0])
        print(gameId)

        # Iterate over each play folder within the game folder
        for play_folder in os.listdir(game_path):
            play_path = os.path.join(game_path, play_folder)
            if os.path.isdir(play_path):
                # Extract playId from the folder name
                playId = int(play_folder)

                # Define the path to the tpc_per_frame_unweighted.csv file
                tpc_file_path = os.path.join(play_path, 'tpc_per_frame_unweighted.csv')
                if os.path.exists(tpc_file_path):
                    # Load the tpc_per_frame_unweighted DataFrame
                    tpc_df = pd.read_csv(tpc_file_path, index_col=0)

                    # Normalize each row so that it sums to 1
                    tpc_df = tpc_df.div(tpc_df.sum(axis=1), axis=0)

                    # Filter the expected_yards DataFrame for the current gameId and playId
                    filtered_expected_yards = expected_yards[(expected_yards['gameId'] == gameId) & 
                                                             (expected_yards['playId'] == playId)]

                    # Initialize a DataFrame to store the results
                    constrictive_presence_ratio_df = pd.DataFrame(index=tpc_df.index, columns=tpc_df.columns)

                    # Iterate through the rows
                    for frame in tpc_df.index:
                        # Ensure the frame exists in the filtered_expected_yards
                        if frame in filtered_expected_yards['frameId'].values:
                            # Calculate the difference in expected yards for each frame
                            current_frame_row = filtered_expected_yards[filtered_expected_yards['frameId'] == frame]
                            next_frame_row = filtered_expected_yards[filtered_expected_yards['frameId'] == frame + 1]
                            if not next_frame_row.empty:
                                delta_expected_yards = next_frame_row['expectedRemainingYardsByCarrier'].values[0] - \
                                                       current_frame_row['expectedRemainingYardsByCarrier'].values[0]
                                # Multiply the percentages for each player by the delta
                                # print(tpc_df.loc[frame])
                                # print(delta_expected_yards)
                                # print(tpc_df.loc[frame] * delta_expected_yards)
                                constrictive_presence_ratio_df.loc[frame,:] = tpc_df.loc[frame,:] * delta_expected_yards

                    # Save the resulting DataFrame
                    # print(constrictive_presence_ratio_df)
                    constrictive_presence_ratio_df.to_csv(os.path.join(play_path, 'constrictive_presence_ratio_unweighted.csv'))

2022110604
2022100204
2022092501
2022100900
2022091812
2022103008
2022091105
2022100910
2022102303
2022110700
2022110607
2022092507
2022102400
2022100908
2022092900
2022091107
2022091500
2022102310
2022091803
2022100205
2022092513
2022101700
2022102700
2022092512
2022100905
2022091802
2022091113
2022091901
2022102304
2022100212
2022101602
2022103100
2022103009
2022100903
2022110603
2022110600
2022091200
2022090800
2022100600
2022091100
2022091101
2022091800
2022092500
2022102308
2022103001
2022092600
2022110601
2022100202
2022100210
2022091805
2022100206
2022091110
2022100211
2022091804
2022103007
2022110605
2022091806
2022103002
2022100208
2022101603
2022091809
2022091108
2022091104
2022110606
2022100904
2022092504
2022092510
2022100209
2022092509
2022091900
2022103004
2022101601
2022102307
2022102309
2022110608
2022092511
2022100911
2022092505
2022102305
2022091103
2022092502
2022092200
2022091811
2022101600
2022101611
2022101609
2022102302
2022102311
2022110300
2022110610
2022092508

In [32]:
###### aggregate all players into one df

import os
import csv

# Define the path to the 'games' folder and the output file
games_folder = './games'
output_file = './data/constrictive_presence_ratio_unweighted.csv'
counter = 0
# Ensure the output directory exists
os.makedirs(os.path.dirname(output_file), exist_ok=True)

# Open the CSV file for writing
with open(output_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)

    # Write the header row
    writer.writerow(['gameId', 'playId', 'nflId', 'constrictivePresenceSum'])

    # Iterate over each game folder
    for game_folder in os.listdir(games_folder):
        game_path = os.path.join(games_folder, game_folder)
        if os.path.isdir(game_path):
            # Extract gameId from the folder name
            gameId = int(game_folder.split('_')[0])
            print(f'Game: {gameId}, counter: {counter}')
            counter+=1

            # Iterate over each play folder within the game folder
            for play_folder in os.listdir(game_path):
                play_path = os.path.join(game_path, play_folder)
                if os.path.isdir(play_path):
                    # Extract playId from the folder name
                    playId = int(play_folder)

                    # Define the path to the constrictive_presence_ratio_unweighted.csv file
                    constrictive_file_path = os.path.join(play_path, 'constrictive_presence_ratio_unweighted.csv')
                    if os.path.exists(constrictive_file_path):
                    
                        # Load the constrictive_presence_ratio_unweighted DataFrame
                        constrictive_df = pd.read_csv(constrictive_file_path, index_col=0)

                        # Calculate the sum of each column (player)
                        player_sums = constrictive_df.sum()

                        # Write each player's sum to the CSV file
                        for playerId, sum_value in player_sums.items():
                            writer.writerow([gameId, playId, playerId, sum_value])

print("Aggregation complete. Data saved to", output_file)


Game: 2022110604, counter: 0
Game: 2022100204, counter: 1
Game: 2022092501, counter: 2
Game: 2022100900, counter: 3
Game: 2022091812, counter: 4
Game: 2022103008, counter: 5
Game: 2022091105, counter: 6
Game: 2022100910, counter: 7
Game: 2022102303, counter: 8
Game: 2022110700, counter: 9
Game: 2022110607, counter: 10
Game: 2022092507, counter: 11
Game: 2022102400, counter: 12
Game: 2022100908, counter: 13
Game: 2022092900, counter: 14
Game: 2022091107, counter: 15
Game: 2022091500, counter: 16
Game: 2022102310, counter: 17
Game: 2022091803, counter: 18
Game: 2022100205, counter: 19
Game: 2022092513, counter: 20
Game: 2022101700, counter: 21
Game: 2022102700, counter: 22
Game: 2022092512, counter: 23
Game: 2022100905, counter: 24
Game: 2022091802, counter: 25
Game: 2022091113, counter: 26
Game: 2022091901, counter: 27
Game: 2022102304, counter: 28
Game: 2022100212, counter: 29
Game: 2022101602, counter: 30
Game: 2022103100, counter: 31
Game: 2022103009, counter: 32
Game: 2022100903, co

In [41]:
# augment with additional data like position and name

# Load the players DataFrame
players_file = './data/players.csv'
players_df = pd.read_csv(players_file)

# Load the constrictive presence ratio DataFrame
constrictive_file = './data/constrictive_presence_ratio_unweighted.csv'
constrictive_df = pd.read_csv(constrictive_file)

# Merge the DataFrames on nflId
augmented_df = pd.merge(constrictive_df, players_df[['nflId', 'position', 'displayName']], 
                        on='nflId', how='left')

# Save the augmented DataFrame
augmented_df.to_csv('./data/constrictive_presence_ratio_unweighted.csv', index=False)

print("Augmentation complete. Data saved to './data/constrictive_presence_ratio_unweighted.csv'")


Augmentation complete. Data saved to './data/constrictive_presence_ratio_unweighted.csv'


In [43]:
constrictive_presence_ratio_all = pd.read_csv('./data/constrictive_presence_ratio_unweighted.csv')
grouped_by_player = constrictive_presence_ratio_all[['displayName', 'constrictivePresenceSum']].groupby('displayName').sum()

In [45]:
ranked_players = grouped_by_player.sort_values(by='constrictivePresenceSum', ascending=False)

# Print the names of the players in order
print("Ranked Players from Greatest to Lowest based on Constrictive Presence Sum:")
for name, score in ranked_players.itertuples():
    print(name, score)

Ranked Players from Greatest to Lowest based on Constrictive Presence Sum:
Matt Dickerson 1.8849376336780983
Sam Eguavoen 0.5595819881752866
Wyatt Ray 0.40930969160823144
Jalyn Armour-Davis 0.23352217337413173
Brandon Smith 0.08837568189628131
D'Anthony Bell 0.0737161452246625
Christian Matthew 0.0218081950586163
Michael Hoecht 0.0091007117452202
Chase Winovich 0.0
Cobie Durant 0.0
Lewis Cine 0.0
Christian Ringo 0.0
Joseph Jones 0.0
Chris Jackson 0.0
Quincy Roche 0.0
Steven Means 0.0
Tarell Basham 0.0
C.J. Brewer 0.0
Bryce Thompson 0.0
Bryce Hall 0.0
Justin Coleman 0.0
T.J. Watt 0.0
Daryl Worley 0.0
DaMarcus Mitchell 0.0
Nakobe Dean 0.0
Jamal Adams 0.0
Matthias Farley 0.0
Micah Hyde 0.0
Michael Ojemudia 0.0
Isaac Yiadom 0.0
Marquise Blair 0.0
Erik Harris 0.0
Troy Reeder 0.0
Elijah Lee 0.0
Olasunkanmi Adeniyi 0.0
Donovan Jeter 0.0
Phidarian Mathis 0.0
Derek Barnett 0.0
Deionte Thompson 0.0
Kyle Fuller 0.0
Da'Shawn Hand 0.0
Keir Thomas 0.0
Kevin Pierre-Louis 0.0
Aaron Patrick 0.0
Ugochuk