In [4]:
import os
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import math
import scipy
from random import choice
from scipy.spatial.distance import euclidean
from scipy.special import expit
from IPython.display import HTML
from matplotlib import animation
from tqdm import tqdm
import glob

In [5]:
players_df = pd.read_csv('../data/players.csv')

play_files = sorted(glob.glob('../data/tracking*.csv'))
train_df=pd.concat((pd.read_csv(file,  low_memory=False) for file in play_files))

In [6]:
group_val = train_df.groupby(['gameId','playId'])['frameId'].count()

In [8]:
tracking2024 = train_df

temp_df = pd.merge(tracking2024, players_df, on='nflId')
#temp_df.head()
tracking2024['position']=temp_df['position']
#temp_df.drop()
tracking2024.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event,position
0,2022090800,56,35472.0,Rodger Saffold,1,2022-09-08 20:24:05.200000,76.0,BUF,left,88.37,27.27,1.62,1.15,0.16,231.74,147.9,,G
1,2022090800,56,35472.0,Rodger Saffold,2,2022-09-08 20:24:05.299999,76.0,BUF,left,88.47,27.13,1.67,0.61,0.17,230.98,148.53,pass_arrived,G
2,2022090800,56,35472.0,Rodger Saffold,3,2022-09-08 20:24:05.400000,76.0,BUF,left,88.56,27.01,1.57,0.49,0.15,230.98,147.05,,G
3,2022090800,56,35472.0,Rodger Saffold,4,2022-09-08 20:24:05.500000,76.0,BUF,left,88.64,26.9,1.44,0.89,0.14,232.38,145.42,,G
4,2022090800,56,35472.0,Rodger Saffold,5,2022-09-08 20:24:05.599999,76.0,BUF,left,88.72,26.8,1.29,1.24,0.13,233.36,141.95,,G


In [9]:
games_ids = {}
games_tracking2024 = tracking2024.groupby(by=["gameId"])
#games_tracking2024 = tracking2024.groupby(by=["playId"])
for game, data in games_tracking2024:
    games_ids[game] = list(set(data.playId.tolist()))

In [19]:
def extract_one_game(game_id, play_id, df):
    game = df[(df.gameId == game_id) & (df.playId == play_id)]
    home = {}
    away = {}
    balls = []
    
    players = game.sort_values(['frameId'], ascending=True).groupby('nflId')
    for id, dx in players:
        jerseyNumber = int(dx.jerseyNumber.iloc[0])
        if dx.team.iloc[0] == "home":
            home[jerseyNumber] = list(zip(dx.x.tolist(), dx.y.tolist()))
        elif dx.team.iloc[0] == "away":
            away[jerseyNumber] = list(zip(dx.x.tolist(), dx.y.tolist()))


    ball_df = game.sort_values(['frameId'], ascending=True) 
    ball_df = ball_df[ball_df.club == "football"]
    balls = list(zip(ball_df.x.tolist(), ball_df.y.tolist()))
    return home, away, balls

def extract_one_game_tensor(game_id, play_id, data_loader):
    """
    Extract the data for one game and one play from a data loader that returns tensors.
    Assumes data_loader is an iterable of tensors with the dimensions and meanings described.
    """
    # Find the tensor corresponding to the game_id and play_id
    for tensor_data in data_loader:
        current_game_id = tensor_data[0][0] 
        current_play_id = tensor_data[0][1] 
        if current_game_id == game_id and current_play_id == play_id:
            # The tensor is assumed to be in the shape: (batch_size, number_of_frames, number_of_players_and_ball, features)
            # Where features might be ['nflId', 'frameId', 'jerseyNumber', 'club', 'playDirection', 'event']
            home_positions = tensor_data[:, :, :23, :2]  
            away_positions = tensor_data[:, :, 23:, :2]  
            ball_position = tensor_data[:, :, -1, :2]  
            return home_positions, away_positions, ball_position
    return None, None, None


In [20]:
from matplotlib import animation
from IPython.display import HTML
def animate_one_play(game_id, play_id, df):
    fig, ax = drawPitch(100, 53.3)
    
    home, away, balls = extract_one_game(game_id, play_id, df)

    team_left, = ax.plot([], [], '>', markersize=15, markerfacecolor="r", markeredgewidth=2, markeredgecolor="white", zorder=7)
    team_right, = ax.plot([], [], '<', markersize=15, markerfacecolor="b", markeredgewidth=2, markeredgecolor="white", zorder=7)
    ball, = ax.plot([], [], 'o', markersize=20, markerfacecolor="black", markeredgewidth=2, markeredgecolor="white", zorder=7)
    drawings = [team_left, team_right, ball]

    def init():
        team_left.set_data([], [])
        team_right.set_data([], [])
        ball.set_data([], [])
        return drawings

    def draw_teams(i):
        X = []
        Y = []
        for k, v in home.items():
            x, y = v[i]
            X.append(x)
            Y.append(y)
        team_left.set_data(X, Y)
        
        X = []
        Y = []
        for k, v in away.items():
            x, y = v[i]
            X.append(x)
            Y.append(y)
        team_right.set_data(X, Y)

    def animate(i):
        draw_teams(i)
        
        x, y = balls[i]
        ball.set_data([x, y])
        return drawings
    
    # !May take a while!
    anim = animation.FuncAnimation(fig, animate, init_func=init,
                                   frames=len(balls), interval=100, blit=True)

    return HTML(anim.to_html5_video())

def animate_one_play_tensor(game_id, play_id, data_loader):
    """
    Create an animation for one play using data from a tensor.
    """
    # Extract the data using the new extract function
    home_positions, away_positions, ball_position = extract_one_game_tensor(game_id, play_id, data_loader)
    
    if home_positions is None or away_positions is None or ball_position is None:
        raise ValueError("Data for the specified game and play was not found in the data loader.")


In [13]:
import pandas as pd 
import seaborn as sns 
import matplotlib.pyplot as plt 

# for mpl animation
import matplotlib.animation as animation
from matplotlib import rc
rc('animation', html='html5')

In [21]:
def get_play_by_frame(fid, ax, los, one_play):
  """
  take one frame from one play, plot a scatter plot image  

  inputs:
    fid: frame ID  
    ax: current matplotlib ax  
    los: line of scrimmage (for aesthetics)  
    one_play: pandas dataframe for one play  

  output:
    seaborn axis level scatter plot  
  """
  # clear current axis (or else you'll have a tracer effect)
  ax.cla()

  # get game and play IDs
  gid = one_play['gameId'].unique()[0]
  pid = one_play['playId'].unique()[0]

  # isolates a given frame within one play
  one_frame = one_play.loc[one_play['frameId']==fid]

  # create a scatter plot, hard coded dot size to 100 
  fig1 = sns.scatterplot(x='x',y='y',data=one_frame, 
                         hue='club', ax=ax, s=100)
  
  # plots line of scrimmage 
  fig1.axvline(los, c='k', ls=':')

  # plots a simple end zone 
  fig1.axvline(0, c='k', ls='-')
  fig1.axvline(100, c='k', ls='-')

  # game and play IDs as the title
  fig1.set_title(f"game {gid} play {pid}")

  # takes out the legend (if you leave this, you'll get an annoying legend)
  fig1.legend([]).set_visible(False)

  # takes out the left, top, and right borders on the graph 
  sns.despine(left=True)

  # no y axis label
  fig1.set_ylabel('')

  # no y axis tick marks
  fig1.set_yticks([])

  # set the x and y graph limits to the entire field (from kaggle BDB page)
  fig1.set_xlim(-10,110)    
  fig1.set_ylim(0,54) 

def get_play_by_frame_tensor(frame_index, ax, los, tensor_data):
    ax.patches = []
    ax.lines = []
    ax.texts = []

    player_positions = tensor_data[:, frame_index, :-1, :]  # Assuming the second to last index is players
    ball_position = tensor_data[:, frame_index, -1, :2]  # Assuming the last index is the ball
    
    for player_position in player_positions:
        ax.plot(player_position[0], player_position[1], 'o', color='red' if player_position[-1] == 'home' else 'blue')
    
    # Draw the ball
    ax.plot(ball_position[0], ball_position[1], 'o', color='brown')

def animate_play(one_play):    
  """
  animate a given NFL play from the BDB  

  inputs: 
    one_play: one play from the BDB data. you will want to 
      filter your dataset using gameId and playId.

  output: 
    animated gif, saved to your current working directory 

  """
  # get game and play IDs
  #from cycler import cycler
  #custom_cycler = (cycler(color=['b','k','m','g']))
    
  gid = one_play['gameId'].unique()[0]
  pid = one_play['playId'].unique()[0]

  # get line of scrimmage info from the football X location from the  first frame of data 
  los = one_play.loc[(one_play['frameId']==1) & (one_play['club']=='football'), 'x'].values[0]

  # set figure size; this is hard coded but seemed to work well  
  fig = plt.figure(figsize=(14.4, 6.4))
  #fig, ax = drawPitch(66, 36)
  #ax.set_prop_cycle(custom_cycler)
  #home, away, balls = extract_one_game(gid, pid, one_play)

  # get current axis of the figure
  ax = fig.gca()

  # matplotlib animate function
  # relies on get_play_by_frame()  
  # `interval = 100` is something like frames per second i think 
  # repeat=True is to have the animation continuously repeat  
  ani = animation.FuncAnimation(fig, get_play_by_frame, 
                                frames=one_play['frameId'].unique().shape[0],
                                interval=100, repeat=True, 
                                fargs=(ax,los,one_play,))
  
  # close the matplotlib figure when done (if you're batch processing gifs, this allows you to end one gif and begin another gif of a play)
  plt.close()

  # save the matplotlib animation as a gif
  # requires imagemagick or some sort of gif renderer
  # this works in google colab if you apt install imagemagick
  ani.save(f'{gid}_{pid}.gif', writer='imagemagick', fps=10)
  return ani  

def animate_play_tensor(tensor_data):
    # Set up the figure for animation
    fig, ax = plt.subplots(figsize=(14.4, 6.4))
    num_frames = tensor_data.size(1)

    ani = animation.FuncAnimation(fig, get_play_by_frame_tensor, frames=num_frames,
                                  fargs=(ax, los, tensor_data), interval=100, repeat=True)
    
    ani.save('play_animation.gif', writer='imagemagick', fps=10)

    return ani


In [15]:
data = tracking2024
data.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event,position
0,2022090800,56,35472.0,Rodger Saffold,1,2022-09-08 20:24:05.200000,76.0,BUF,left,88.37,27.27,1.62,1.15,0.16,231.74,147.9,,G
1,2022090800,56,35472.0,Rodger Saffold,2,2022-09-08 20:24:05.299999,76.0,BUF,left,88.47,27.13,1.67,0.61,0.17,230.98,148.53,pass_arrived,G
2,2022090800,56,35472.0,Rodger Saffold,3,2022-09-08 20:24:05.400000,76.0,BUF,left,88.56,27.01,1.57,0.49,0.15,230.98,147.05,,G
3,2022090800,56,35472.0,Rodger Saffold,4,2022-09-08 20:24:05.500000,76.0,BUF,left,88.64,26.9,1.44,0.89,0.14,232.38,145.42,,G
4,2022090800,56,35472.0,Rodger Saffold,5,2022-09-08 20:24:05.599999,76.0,BUF,left,88.72,26.8,1.29,1.24,0.13,233.36,141.95,,G


In [1]:
def animate_plot(gameId, playId,data):
    play = data.loc[(data['gameId']==gameId) & (data['playId']==playId)]
    play.shape
    animate_play_tensor(play)

In [None]:
#gameId = 2022101300
#playId = 826
#play = data.loc[(data['gameId']==gameId) & (data['playId']==playId)]
#play.shape
#import time
#start = time.perf_counter()
#animate_play(play)
#end = time.perf_counter()
#total = end-start
#print("Time per gif:", total)

In [2]:
import time
i = 0
for (game_id, play_id), group_data in data.groupby(['gameId', 'playId']):
    i+=1
    start = time.perf_counter()
    animate_play(group_data)
    end = time.perf_counter()
    total = end-start
    print("Time per gif:", total)
    if i > 3:
        break

NameError: name 'data' is not defined

In [3]:
from dataset import NFLDataModule
import pathlib

data_dir = pathlib.Path("../data")
dmod = NFLDataModule(data_dir)
dmod.setup("val")
dataloader = dmod.val_dataloader()



: 

: 

In [None]:
i=1
for data in dataloader:
    i += 1
    start = time.perf_counter()
    animate_play_tensor(data)
    print("Time per gif: {time.perf_counter-start:.2f}")
    if i > 2:
        break