# NFL Big Data Bowl 2024 - Tackling

Introduction

## Cleaning

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

In [2]:
tracking = []
for i in range(1, 10):
    tracking.append(pd.read_csv(f"data/tracking_week_{i}.csv"))
tracking = pd.concat(tracking)
tracking.reset_index(drop=True, inplace=True)

In [3]:
from Cleaning import clean_games_data, clean_plays_data, clean_players_data, \
    clean_tackles_data, clean_tracking_data
games = clean_games_data(pd.read_csv("data/games.csv"))
players = clean_players_data(pd.read_csv("data/players.csv"))
plays = clean_plays_data(pd.read_csv("data/plays.csv"))
tackles = clean_tackles_data(pd.read_csv("data/tackles.csv"))
tracking = clean_tracking_data(tracking)

Games data has been cleaned and memory has been reduced by 2.65625 bytes.
Players data has been cleaned and memory has been reduced by 13.1484375 bytes.
Plays data has been cleaned and memory has been reduced by 1041.80859375 bytes.
Tackles data has been cleaned and memory has been reduced by 476.4921875 bytes.
Tracking data has been cleaned and memory has been reduced by 476070.234375 bytes.


In [None]:
from Cleaning import check_for_snap, check_for_end
full_plays = check_for_snap(plays, tracking)
full_plays = check_for_end(full_plays, tracking)

## Feature Engineering

In [4]:
# Make all the plays go from left to right to remove the variablilty it may cause
from Preprocessing import all_plays_left_to_right
from Preprocessing import create_acceleration_vectors, create_velocity_vectors, create_player_influence

full_plays, full_tracking = all_plays_left_to_right(plays, tracking)

full_tracking = create_acceleration_vectors(full_tracking)
full_tracking = create_velocity_vectors(full_tracking)
full_tracking = create_player_influence(full_tracking)

In [None]:
football_and_player_tracking['player_to_football_distance'] = np.sqrt((football_and_player_tracking['x_player'] - football_and_player_tracking['x_football'])**2 + (football_and_player_tracking['y_player'] - football_and_player_tracking['y_football'])**2)


In [None]:
df

In [None]:
import numpy as np
import pandas as pd

# Constants
max_speed = 18

# Function to create the scaling matrix
def create_scaling_matrix(row):
    sx = row['sx_player']
    sy = row['sy_player']
    return np.array([[sx, 0], [0, sy]])

def create_rotation_matrix(row):
    angle = row['dir_rad_player']
    return np.array([[np.cos(angle), -np.sin(angle)], [np.sin(angle), np.cos(angle)]])

def create_cov_matrix(row):
    return row['rotation_player'] @ row['scaling_player'] @ row['scaling_player'] @ row['rotation_player'].T

def create_mean_matrix(row):
    x = row['x_player'] + (np.cos(row['dir_rad_player']) * row['s_player'] * 0.5)
    y = row['y_player'] + (np.sin(row['dir_rad_player']) * row['s_player'] * 0.5)
    return np.array([x, y])

def create_inv_cov_matrix(row):
    return np.linalg.inv(row['cov_player'])

def gaussian_pdf(p, mu, cov_inv, det_cov):
    k = len(mu)  # Dimensionality of the Gaussian distribution
    norm_factor = 1 / (np.sqrt((2 * np.pi) ** k * det_cov))
    diff = p - mu
    exponent = -0.5 * (diff.T @ cov_inv @ diff)
    return norm_factor * np.exp(exponent)

# Apply functions to compute matrices
football_and_player_tracking['scaling_player'] = football_and_player_tracking.apply(create_scaling_matrix, axis=1)
football_and_player_tracking['rotation_player'] = football_and_player_tracking.apply(create_rotation_matrix, axis=1)
football_and_player_tracking['cov_player'] = football_and_player_tracking.apply(create_cov_matrix, axis=1)
football_and_player_tracking['mean_player'] = football_and_player_tracking.apply(create_mean_matrix, axis=1)
football_and_player_tracking['cov_inv_player'] = football_and_player_tracking.apply(create_inv_cov_matrix, axis=1)

# Function to calculate the PDF value at the ball carrier's position
def calculate_influence_degree(row):
    # Ball carrier's position (example position used here)
    p = np.array([row['x_football'], row['y_football']])
    mu = row['mean_player']
    cov_inv = row['cov_inv_player']
    det_cov = np.linalg.det(row['cov_player'])
    pdf_value = gaussian_pdf(p, mu, cov_inv, det_cov)
    
    # Normalize the PDF value (using a baseline, e.g., the defender's own position)
    baseline_position = np.array([row['x_player'], row['y_player']])
    baseline_pdf_value = gaussian_pdf(baseline_position, mu, cov_inv, det_cov)
    
    return pdf_value / baseline_pdf_value

# Apply the function to compute the influence degree
football_and_player_tracking['influence_degree'] = football_and_player_tracking.apply(calculate_influence_degree, axis=1)

# Display the DataFrame with the influence degree

In [None]:
df = football_and_player_tracking.query('gameId == 2022090800 and playId == 56 and frameId == 20')

In [None]:
df[df['displayName_player'] != 'football']['influence_degree'] * 10

In [None]:

# Create a grid of points covering the field
x_grid, y_grid = np.meshgrid(np.linspace(0, 100, 200), np.linspace(0, 50, 100))  # Adjust the field size
grid_points = np.vstack([x_grid.ravel(), y_grid.ravel()]).T

def gaussian_pdf(p, mu, cov_inv, det_cov):
    k = len(mu)
    norm_factor = 1 / (np.sqrt((2 * np.pi) ** k * det_cov))
    diff = p - mu
    exponent = -0.5 * (diff.T @ cov_inv @ diff)
    return norm_factor * np.exp(exponent)

# Compute the influence degree at each grid point
influence_map = np.zeros(x_grid.shape)
for index, row in df.iterrows():
    mean = np.array(row['mean_player'])
    cov_inv = row['cov_inv_player']
    det_cov = np.linalg.det(row['cov_player'])
    for i, point in enumerate(grid_points):
        pdf_value = gaussian_pdf(point, mean, cov_inv, det_cov)
        x_idx = i // y_grid.shape[1]
        y_idx = i % y_grid.shape[1]
        influence_map[x_idx, y_idx] += row['influence_degree'] 

# Create the plot
fig = go.Figure()

# Add the contour map of influence degrees
fig.add_trace(go.Contour(
    z=10*influence_map,
    x=np.linspace(0, 100, 200),
    y=np.linspace(0, 50, 100),
    colorscale='Viridis',
    colorbar=dict(title='Influence Degree'),
    contours=dict(
        showlines=True,
        coloring='fill',
        labelfont=dict(size=12, color='white')
    ),
    name='Influence Contours'
))

# # Add player positions
# fig.add_trace(go.Scatter(
#     x=df['x_player'],
#     y=df['y_player'],
#     mode='markers+text',
#     text=df['influence_degree'].apply(lambda x: f"{x:.5f}"),
#     textposition='top center',
#     marker=dict(size=10, color='red'),
#     name='Players'
# ))

# Layout settings
fig.update_layout(
    title='Player Influence on the Field',
    xaxis_title='X Position',
    yaxis_title='Y Position',
    xaxis=dict(range=[0, 100]),
    yaxis=dict(range=[0, 50]),
    autosize=True
)

# Show the plot
fig.show()

In [None]:
from Visualizations import animate_play

animate_play(games, full_plays, full_tracking, gameId=2022090800, playId=56, acceleration=True, velocity=True)

In [None]:
data = pd.merge(plays, tracking, on = ["gameId", "playId"])

Finding who made the tackle on this play

Show the movement of the players during the play

In [None]:
# Make a more customized version in plotly

In [None]:
exampleFrame = data.query('gameId == 2022091103 and '
           'playId == 3126 and '
           'frameId == 20')

In [None]:
from Visualizations import animate_play
animate_play(games, full_plays, full_tracking, gameId=2022091103, playId=3126, velocity=True)

In [None]:
play = data[(data['gameId'] == 2022090800) & (data['playId'] == 80)]

In [None]:
playerData = data[(data['gameId'] == 2022090800) & (data['playId'] == 56) & (data['nflId'] == 35472)]

Find the relationship between the player and the ball at all times.

In [None]:
football = play[play['club'] == 'football']

footballAndPlayer = pd.merge(football, play, on = ['gameId', 'playId', 'ballCarrierId', 'ballCarrierDisplayName',
       'playDescription', 'quarter', 'down', 'yardsToGo', 'possessionTeam',
       'defensiveTeam', 'yardlineSide', 'yardlineNumber', 'gameClock',
       'preSnapHomeScore', 'preSnapVisitorScore', 'passResult', 'passLength',
       'penaltyYards', 'prePenaltyPlayResult', 'playResult',
       'playNullifiedByPenalty', 'absoluteYardlineNumber', 'offenseFormation',
       'defendersInTheBox', 'passProbability', 'preSnapHomeTeamWinProbability',
       'preSnapVisitorTeamWinProbability', 'homeTeamWinProbabilityAdded',
       'visitorTeamWinProbilityAdded', 'expectedPoints', 'expectedPointsAdded',
       'foulName1', 'foulName2', 'foulNFLId1', 'foulNFLId2',
        'frameId', 'time',
       'playDirection','event'], suffixes=('_football', '_player'))

In [None]:
footballAndPlayer

In [None]:
footballAndPlayer['distanceFromBall'] = np.sqrt((footballAndPlayer['x_x'] - footballAndPlayer['x_y'])**2 + (footballAndPlayer['y_x'] - footballAndPlayer['y_y'])**2)

Find the distance between the players and the ball at all times

In [None]:
defense = footballAndPlayer[footballAndPlayer['club_y'] == footballAndPlayer.iloc[0]['defensiveTeam']]

Plot the distance between each player and the ball at all times during the play

In [None]:
playersInGame = set(defense['displayName_y'])
for index, player in enumerate(playersInGame):
    plt.plot(defense[defense['displayName_y'] == player]['frameId'], defense[defense['displayName_y'] == player]['distanceFromBall'])
plt.legend(playersInGame)
plt.axhline(0)
plt.axvline(defense[defense['event'] == "tackle"]['frameId'].iloc[0])
plt.show()

Plot the speed of the player at all times

In [None]:
playersInGame = set(defense['displayName_y'])
for index, player in enumerate(playersInGame):
    plt.plot(defense[defense['displayName_y'] == player]['frameId'], defense[defense['displayName_y'] == player]['s_y'])
plt.legend(playersInGame)
plt.axvline(defense[defense['event'] == "tackle"]['frameId'].iloc[0])
plt.show()

Plot the acceleration of the player at all times

In [None]:
playersInGame = set(defense['displayName_y'])
for index, player in enumerate(playersInGame):
    plt.plot(defense[defense['displayName_y'] == player]['frameId'], defense[defense['displayName_y'] == player]['a_y'])
plt.legend(playersInGame)
plt.axvline(defense[defense['event'] == "tackle"]['frameId'].iloc[0])
plt.show()

# Classify each tackle as open-field, sideline, in the hole, solo, or gang.

Criteria for each tackle:
- open field
    - The defender is 
    
ToDo:
Use a semi-supervised algorothm to classify all tackles
Use this classification to rank players on their ability to make these specific tackles
Create a reinforcemnt learning agent to make each type of tackle and see what the computer can do to make each tackle