# Data Exploration

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

In [2]:
tracking = []
for i in range(1, 10):
    tracking.append(pd.read_csv(f"data/tracking_week_{i}.csv"))
tracking = pd.concat(tracking)
tracking.reset_index(drop=True, inplace=True)

In [3]:
from Cleaning import clean_games_data, clean_plays_data, clean_players_data, \
    clean_tackles_data, clean_tracking_data
games = clean_games_data(pd.read_csv("data/games.csv"))
players = clean_players_data(pd.read_csv("data/players.csv"))
plays = clean_plays_data(pd.read_csv("data/plays.csv"))
tackles = clean_tackles_data(pd.read_csv("data/tackles.csv"))
tracking = clean_tracking_data(tracking)

Games data has been cleaned and memory has been reduced by 2.65625 bytes.
Players data has been cleaned and memory has been reduced by 13.1484375 bytes.
Plays data has been cleaned and memory has been reduced by 1041.80859375 bytes.
Tackles data has been cleaned and memory has been reduced by 476.4921875 bytes.
Tracking data has been cleaned and memory has been reduced by 476070.234375 bytes.


In [5]:
from Cleaning import check_for_snap, check_for_end
full_plays = check_for_snap(plays, tracking)
full_plays = check_for_end(full_plays, tracking)

100%|█████████████████████████████████████████| 136/136 [02:46<00:00,  1.23s/it]


Removed 57 plays that do not have ball snap.


100%|█████████████████████████████████████████| 136/136 [02:49<00:00,  1.25s/it]

Removed 136 plays that do not have tracking for the end of the play.





In [7]:
len(full_plays)

11954

In [8]:
len(plays)

12147

In [None]:
tracking[tracking.query('playId == 3483 and gameId == 2022110609')['event'] != None]

In [None]:
print(len(plays))

In [None]:
tackles['pff_missedTackle']

In [None]:
data = pd.merge(plays, tracking, on = ["gameId", "playId"])

Finding who made the tackle on this play

Show the movement of the players during the play

In [None]:
# Make a more customized version in plotly

In [None]:
exampleFrame = data.query('gameId == 2022091103 and '
           'playId == 3126 and '
           'frameId == 20')

In [None]:
from Visualizations import animatePlay
from Preprocessing import create_acceleration_vectors, create_velocity_vectors

tracking = create_acceleration_vectors(tracking)
tracking = create_velocity_vectors(tracking)
animatePlay(games, full_plays, tracking, gameId=2022091103, playId=3126, velocity=True)


In [None]:
play = data[(data['gameId'] == 2022090800) & (data['playId'] == 80)]

In [None]:
playerData = data[(data['gameId'] == 2022090800) & (data['playId'] == 56) & (data['nflId'] == 35472)]

Find the relationship between the player and the ball at all times.

In [None]:
football = play[play['club'] == 'football']

footballAndPlayer = pd.merge(football, play, on = ['gameId', 'playId', 'ballCarrierId', 'ballCarrierDisplayName',
       'playDescription', 'quarter', 'down', 'yardsToGo', 'possessionTeam',
       'defensiveTeam', 'yardlineSide', 'yardlineNumber', 'gameClock',
       'preSnapHomeScore', 'preSnapVisitorScore', 'passResult', 'passLength',
       'penaltyYards', 'prePenaltyPlayResult', 'playResult',
       'playNullifiedByPenalty', 'absoluteYardlineNumber', 'offenseFormation',
       'defendersInTheBox', 'passProbability', 'preSnapHomeTeamWinProbability',
       'preSnapVisitorTeamWinProbability', 'homeTeamWinProbabilityAdded',
       'visitorTeamWinProbilityAdded', 'expectedPoints', 'expectedPointsAdded',
       'foulName1', 'foulName2', 'foulNFLId1', 'foulNFLId2',
        'frameId', 'time',
       'playDirection','event'])

In [None]:
footballAndPlayer['distanceFromBall'] = np.sqrt((footballAndPlayer['x_x'] - footballAndPlayer['x_y'])**2 + (footballAndPlayer['y_x'] - footballAndPlayer['y_y'])**2)

Find the distance between the players and the ball at all times

In [None]:
defense = footballAndPlayer[footballAndPlayer['club_y'] == footballAndPlayer.iloc[0]['defensiveTeam']]

Plot the distance between each player and the ball at all times during the play

In [None]:
playersInGame = set(defense['displayName_y'])
for index, player in enumerate(playersInGame):
    plt.plot(defense[defense['displayName_y'] == player]['frameId'], defense[defense['displayName_y'] == player]['distanceFromBall'])
plt.legend(playersInGame)
plt.axhline(0)
plt.axvline(defense[defense['event'] == "tackle"]['frameId'].iloc[0])
plt.show()

Plot the speed of the player at all times

In [None]:
playersInGame = set(defense['displayName_y'])
for index, player in enumerate(playersInGame):
    plt.plot(defense[defense['displayName_y'] == player]['frameId'], defense[defense['displayName_y'] == player]['s_y'])
plt.legend(playersInGame)
plt.axvline(defense[defense['event'] == "tackle"]['frameId'].iloc[0])
plt.show()

Plot the acceleration of the player at all times

In [None]:
playersInGame = set(defense['displayName_y'])
for index, player in enumerate(playersInGame):
    plt.plot(defense[defense['displayName_y'] == player]['frameId'], defense[defense['displayName_y'] == player]['a_y'])
plt.legend(playersInGame)
plt.axvline(defense[defense['event'] == "tackle"]['frameId'].iloc[0])
plt.show()

# Classify each tackle as open-field, sideline, in the hole, solo, or gang.

Criteria for each tackle:
- open field
    - The defender is 