In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from tqdm.notebook import tqdm
import plotly.express as px
from scipy.spatial import ConvexHull
from scipy.spatial.distance import cdist
from pathlib import Path
import xgboost as xgb
from sklearn.preprocessing import StandardScaler
from plotly_gif import GIF, capture
from collections import Counter
import plotly.subplots as sp

In [2]:
gif = GIF(verbose=False)

In [3]:
def get_convex_hull(df,football_location):
    # Separate football, offense, and defense points

    # Calculate pairwise distances using Euclidean distance for defense
    distances= cdist(football_location[['x', 'y']], df[['x', 'y']], metric='euclidean')

    # Get the indices of the 5 nearest neighbors for defense
    indices = distances.argsort(axis=1)[:, :3]

    # Get the nearest points for defense
    nearest_points = df.iloc[indices.flatten()]

    # Calculate convex hull
    hull = ConvexHull(nearest_points[['x', 'y']])
    # Get the area of the convex hull
    hull_area = hull.area
    x_hull= nearest_points['x'].iloc[hull.vertices].tolist() + [nearest_points['x'].iloc[hull.vertices[0]]]
    y_hull = nearest_points['y'].iloc[hull.vertices].tolist() + [nearest_points['y'].iloc[hull.vertices[0]]]
    return x_hull,y_hull,hull_area

In [4]:
def hex_to_rgb_array(hex_color):
    '''take in hex val and return rgb np array'''
    return np.array(tuple(int(hex_color.lstrip('#')[i:i+2], 16) for i in (0, 2, 4)))

def ColorDistance(hex1,hex2):
    '''d = {} distance between two colors(3)'''
    if hex1 == hex2:
        return 0
    rgb1 = hex_to_rgb_array(hex1)
    rgb2 = hex_to_rgb_array(hex2)
    rm = 0.5*(rgb1[0]+rgb2[0])
    d = abs(sum((2+rm,4,3-rm)*(rgb1-rgb2)**2))**0.5
    return d

def ColorPairs(team1,team2):
    color_array_1 = colors[team1]
    color_array_2 = colors[team2]
    # If color distance is small enough then flip color order
    if ColorDistance(color_array_1[0],color_array_2[0])<500:
        return {team1:[color_array_1[0],color_array_1[1]],team2:[color_array_2[1],color_array_2[0]],'football':colors['football']}
    else:
        return {team1:[color_array_1[0],color_array_1[1]],team2:[color_array_2[0],color_array_2[1]],'football':colors['football']}
colors = {
    'ARI':["#97233F","#000000","#FFB612"],
    'ATL':["#A71930","#000000","#A5ACAF"],
    'BAL':["#241773","#000000"],
    'BUF':["#00338D","#C60C30"],
    'CAR':["#0085CA","#101820","#BFC0BF"],
    'CHI':["#0B162A","#C83803"],
    'CIN':["#FB4F14","#000000"],
    'CLE':["#311D00","#FF3C00"],
    'DAL':["#003594","#041E42","#869397"],
    'DEN':["#FB4F14","#002244"],
    'DET':["#0076B6","#B0B7BC","#000000"],
    'GB' :["#203731","#FFB612"],
    'HOU':["#03202F","#A71930"],
    'IND':["#002C5F","#A2AAAD"],
    'JAX':["#101820","#D7A22A","#9F792C"],
    'KC' :["#E31837","#FFB81C"],
    'LA' :["#003594","#FFA300","#FF8200"],
    'LAC':["#0080C6","#FFC20E","#FFFFFF"],
    'LV' :["#000000","#A5ACAF"],
    'MIA':["#008E97","#FC4C02","#005778"],
    'MIN':["#4F2683","#FFC62F"],
    'NE' :["#002244","#C60C30","#B0B7BC"],
    'NO' :["#101820","#D3BC8D"],
    'NYG':["#0B2265","#A71930","#A5ACAF"],
    'NYJ':["#125740","#000000","#FFFFFF"],
    'PHI':["#004C54","#A5ACAF","#ACC0C6"],
    'PIT':["#FFB612","#101820"],
    'SEA':["#002244","#69BE28","#A5ACAF"],
    'SF' :["#AA0000","#B3995D"],
    'TB' :["#D50A0A","#FF7900","#0A0A08"],
    'TEN':["#0C2340","#4B92DB","#C8102E"],
    'WAS':["#5A1414","#FFB612"],
    'football':["#CBB67C","#663831"]
}

In [5]:
def get_3_lowest_tto(group):
    ids = list(group.sort_values('eTT').iloc[:3]['nflId_defender'])
    return ids

In [6]:
data_dir = Path(r'C:\Users\isaac\Desktop\Proyectos\BDB_24')
assert data_dir.exists()

In [7]:
tracking_dfs = sorted(data_dir.glob("tracking_week_*.csv"))
assert len(tracking_dfs), "no tracking dfs"
play_df = pd.read_csv(Path(data_dir, "plays.csv"))
tackle_df = pd.read_csv(Path(data_dir, "tackles.csv"))
players_df = pd.read_csv(Path(data_dir, "players.csv"))
games_df = pd.read_csv(Path(data_dir, "games.csv"))

In [8]:
ttt_df = pd.read_pickle(r'C:\Users\isaac\Desktop\Proyectos\BDB_24\tracking_data_clean.pkl')

In [9]:
ttt_df = ttt_df[ttt_df['receiptEvent']=='pass_outcome_caught']

In [10]:
ttt_features = ['hull_area_defense',
 'hull_area_offense',
 'dist_to_carrier',
 's_defender',
 's_carrier',
 'o_rel']

In [11]:
def relative_angle(theta_carrier, theta_defender):
    theta_rel = theta_defender - theta_carrier
    return np.where(
        theta_rel.between(-180, 180),
        np.abs(theta_rel),
        360 - np.abs(theta_rel)
    ) / 180


ttt_df["o_rel"] = relative_angle(ttt_df.o_carrier, ttt_df.o_defender)

In [12]:
regression_model = xgb.XGBRegressor()
regression_model.load_model(r"C:\Users\isaac\Desktop\Proyectos\BDB_24\eTtTO_model.json")

In [13]:
ttt_df['eTT'] = regression_model.predict(StandardScaler().fit_transform(ttt_df[ttt_features]))


In [14]:
ttt_df['eTT'] = np.where(ttt_df['eTT']<0,0,ttt_df['eTT'])

In [15]:
tracking_df = []
for _df in tqdm(tracking_dfs):
    if isinstance(_df, Path):
        _df = pd.read_csv(_df)
    tracking_df.append(_df)
tracking_df = pd.concat(tracking_df).reset_index(drop=True)

  0%|          | 0/9 [00:00<?, ?it/s]

In [16]:
@capture(gif)
def figure_play_by_frame(games,tracking_df,play_df,players,gameId,playId,ttt_df,frameId,hull=False):
    specs = [
        [{"rowspan": 3, "colspan": 2}, None],
         [{}, {}],
        [{}, {}],
        [{"rowspan": 2, "colspan": 2}, None],
        [{}, {}]
    ]
    megafig = sp.make_subplots(rows=5, cols=2,
                               specs=specs,
                               subplot_titles=("",'','',"",'','Expected time to tackle opportunity'))
    selected_game_df = games.loc[games['gameId']==gameId].copy()
    selected_play_df = play_df.loc[(play_df['playId']==playId)&(play_df['gameId']==gameId)].copy()
    selected_ttt_df = ttt_df[(ttt_df['playId']==playId)&(ttt_df['gameId']==gameId)].copy()
    tto_lowest_3 = selected_ttt_df.groupby('frameId').apply(get_3_lowest_tto).to_frame()
    selected_tackle_ids = tackle_df.loc[(tackle_df['playId']==playId)&(tackle_df['gameId']==gameId)]['nflId'].unique()
    # tracking_players_df = pd.merge(tracking_df,players,how="left",on = ["nflId",])
    tracking_players_df = tracking_df.copy()
    selected_tracking_df = tracking_players_df.loc[(tracking_players_df['playId']==playId)&(tracking_players_df['gameId']==gameId)&
                                                   (tracking_players_df['frameId']==frameId)].copy()

    # get good color combos
    team_combos = list(set(selected_tracking_df.club.unique())-set(["football"]))
    color_orders = ColorPairs(team_combos[0],team_combos[1])

    # get play General information
    line_of_scrimmage = selected_play_df['absoluteYardlineNumber'].values[0]

    if selected_tracking_df['playDirection'].values[0] == "right":
        first_down_marker = line_of_scrimmage + selected_play_df['yardsToGo'].values[0]
    else:
        first_down_marker = line_of_scrimmage - selected_play_df['yardsToGo'].values[0]
    down = selected_play_df['down'].values[0]
    quarter = selected_play_df['quarter'].values[0]
    gameClock = selected_play_df['gameClock'].values[0]
    playDescription = selected_play_df['playDescription'].values[0]

    # Handle case where we have a really long Play Description and want to split it into two lines
    if len(playDescription.split(" "))>15 and len(playDescription)>115:
        playDescription = " ".join(playDescription.split(" ")[0:16]) + "<br>" + " ".join(playDescription.split(" ")[16:])


    if frameId in tto_lowest_3.index.to_list():
        lowest_3_tto_ids = tto_lowest_3[tto_lowest_3.index==frameId].iloc[0][0]
    else:
        lowest_3_tto_ids = []
    data = []
    # Add Numbers to Field
    data.append(
        go.Scatter(
            x=np.arange(20,110,10),
            y=[5]*len(np.arange(20,110,10)),
            mode='text',
            text=list(map(str,list(np.arange(20, 61, 10)-10)+list(np.arange(40, 9, -10)))),
            textfont_size = 30,
            textfont_family = "Courier New, monospace",
            textfont_color = "#ffffff",
            showlegend=False,
            hoverinfo='none'
        )
    )
    data.append(
        go.Scatter(
            x=np.arange(20,110,10),
            y=[53.5-5]*len(np.arange(20,110,10)),
            mode='text',
            text=list(map(str,list(np.arange(20, 61, 10)-10)+list(np.arange(40, 9, -10)))),
            textfont_size = 30,
            textfont_family = "Courier New, monospace",
            textfont_color = "#ffffff",
            showlegend=False,
            hoverinfo='none'
        )
    )
    # Add line of scrimage
    data.append(
        go.Scatter(
            x=[line_of_scrimmage,line_of_scrimmage],
            y=[0,53.5],
            line_dash='dash',
            line_color='blue',
            showlegend=False,
            hoverinfo='none'
        )
    )
    # Add First down line
    data.append(
        go.Scatter(
            x=[first_down_marker,first_down_marker],
            y=[0,53.5],
            line_dash='dash',
            line_color='yellow',
            showlegend=False,
            hoverinfo='none'
        )
    )
    # Add Endzone Colors
    endzoneColors = {0:color_orders[selected_game_df['homeTeamAbbr'].values[0]][0],
                        110:color_orders[selected_game_df['visitorTeamAbbr'].values[0]][0]}
    for x_min in [0,110]:
        data.append(
            go.Scatter(
                x=[x_min,x_min,x_min+10,x_min+10,x_min],
                y=[0,53.5,53.5,0,0],
                fill="toself",
                fillcolor=endzoneColors[x_min],
                mode="lines",
                line=dict(
                    color="white",
                    width=3
                    ),
                opacity=1,
                showlegend= False,
                hoverinfo ="skip"
            )
        )
    football_location = selected_tracking_df.loc[(selected_tracking_df['club']=='football')&(selected_tracking_df['frameId']==frameId)].copy()
    # Plot Players
    poss_team = selected_play_df['possessionTeam'].iloc[0]
    for team in selected_tracking_df['club'].unique():
        plot_df = selected_tracking_df.loc[(selected_tracking_df['club']==team)&(selected_tracking_df['frameId']==frameId)].copy()
    
        if team != "football":
            hover_text_array=[]
            jerseyNumbers = []
            if team == poss_team:
                for nflId in plot_df['nflId']:
                    selected_player_df = plot_df[plot_df['nflId']==nflId]
                    nflId = int(selected_player_df["nflId"].values[0])
                    jerseyNumber = int(selected_player_df['jerseyNumber'].values[0])
                    jerseyNumbers.append(str(jerseyNumber))
                    displayName = selected_player_df["displayName"].values[0]
                    s = round(selected_player_df["s"].values[0] * 2.23693629205, 3)
                    text_to_append = f"nflId:{nflId}<br>displayName:{displayName}<br>Player Speed:{s} MPH"
                    hover_text_array.append(text_to_append)

                data.append(go.Scatter(x=plot_df["x"], y=plot_df["y"],
                                        mode = "markers+text",
                                        marker=go.scatter.Marker(color=color_orders[team][0],
                                                                line=go.scatter.marker.Line(width=2,
                                                                                            color=color_orders[team][1]),
                                                                size=12),
                                        name=team,hovertext=hover_text_array,hoverinfo="text",
                                         text=jerseyNumbers,
                                    textfont={
                                        "color": ['White']*12,
                                        "family": ["Arial"]*12,
                                        "size": [10]*12},
                                     legendgroup='1'
                                            ))
                if hull:
                    x_hull,y_hull,hull_area = get_convex_hull(plot_df,football_location)
                    data.append(go.Scatter(x=x_hull,y=y_hull,
                                            mode = 'lines',
                                            fill='toself',
                                            marker=go.scatter.Marker(color=color_orders[team][0],
                                                                    line=go.scatter.marker.Line(width=2,
                                                                                                color=color_orders[team][1]),
                                                                    size=10),
                                            name=f'{team} hull',hoverinfo='text',
                                           ))
            else:
                color_points = []
                color_lines = []
                for nflId in plot_df['nflId']:
                    ett = selected_ttt_df[(selected_ttt_df['nflId_defender']==nflId)&
                                          (selected_ttt_df['frameId']==frameId)]['eTT'].iloc[0]
                    selected_player_df = plot_df[plot_df['nflId']==nflId]
                    nflId = int(selected_player_df["nflId"].values[0])
                    jerseyNumber = int(selected_player_df['jerseyNumber'].values[0])
                    jerseyNumbers.append(str(jerseyNumber))
                    displayName = selected_player_df["displayName"].values[0]
                    s = round(selected_player_df["s"].values[0] * 2.23693629205, 3)
                    text_to_append = f"""nflId:{nflId}<br>displayName:{displayName}<br>Player Speed:{s} MPH<br>Expected time to tackle:{format(ett, '.2f')}"""
                    hover_text_array.append(text_to_append)
                    
                    if nflId in lowest_3_tto_ids:
                        color_point = '#F43636'
                        color_line = '#F43636'
                    else:
                        color_point = color_orders[team][0]
                        color_line = color_orders[team][1]
                    color_points.append(color_point)
                    color_lines.append(color_line)
                data.append(go.Scatter(x=plot_df["x"], y=plot_df["y"],
                                        mode = "markers+text",
                                        marker=go.scatter.Marker(color=color_points,
                                                                line=go.scatter.marker.Line(width=2,
                                                                                            color=color_lines),
                                                                size=12),
                                        name=team,hovertext=hover_text_array,hoverinfo="text",
                                        text=jerseyNumbers,
                                    textfont={
                                        "color": ['White']*12,
                                        "family": ["Arial"]*12,
                                        "size": [10]*12
                                            },
                                    legendgroup='1'))
                if hull:
                    x_hull,y_hull,hull_area = get_convex_hull(plot_df,football_location)
                    data.append(go.Scatter(x=x_hull,y=y_hull,
                                            mode = 'lines',
                                            fill='toself',
                                            marker=go.scatter.Marker(color=color_orders[team][0],
                                                                    line=go.scatter.marker.Line(width=2,
                                                                                                color=color_orders[team][1]),
                                                                    size=10),
                                            name=f'{team} hull',hoverinfo='text'))
                
        else:
            data.append(go.Scatter(x=plot_df["x"], y=plot_df["y"],
                                    mode = 'markers',
                                    marker=go.scatter.Marker(
                                        color=color_orders[team][0],
                                        line=go.scatter.marker.Line(width=2,
                                                                    color=color_orders[team][1]),
                                        size=10),
                                    name=team,hoverinfo='none',
                                    legendgroup='1'))
    scale=10
    title=f"GameId: {gameId}, PlayId: {playId}<br>{gameClock} {quarter}Q"+"<br>"*27+f"{playDescription}"
    # Create First Down Markers
    for y_val in [0,53]:
        megafig.add_annotation(
                x=first_down_marker,
                y=y_val,
                text=str(down),
                showarrow=False,
                font=dict(
                    family="Courier New, monospace",
                    size=16,
                    color="black"
                    ),
                align="center",
                bordercolor="black",
                borderwidth=2,
                borderpad=4,
                bgcolor="#ff7f0e",
                opacity=1
                )
    # Add Team Abbreviations in EndZone's
    for x_min in [0,110]:
        if x_min == 0:
            angle = 270
            teamName=selected_game_df['homeTeamAbbr'].values[0]
        else:
            angle = 90
            teamName=selected_game_df['visitorTeamAbbr'].values[0]
        megafig.add_annotation(
            x=x_min+5,
            y=53.5/2,
            text=teamName,
            showarrow=False,
            font=dict(
                family="Courier New, monospace",
                size=32,
                color="White"
                ),
            textangle = angle
        )
    scatter_times_data = []
    dict_colors = {
    0: 'red',
    1: 'blue',
    2: 'yellow',
    3: 'green',
    4: 'purple',
    5: 'orange',
    6: 'cyan',
    7: 'magenta',
    8: 'lime',
    9: 'pink',
    10: 'brown'
    }

    for id_ in selected_tackle_ids:
        count_color = 0
        player_ttt_df = selected_ttt_df[(selected_ttt_df['frameId']<=frameId)&
                        (selected_ttt_df['nflId_defender']==id_)].copy()
        scatter_times_data.append(go.Scatter(x=player_ttt_df["frameId"], y=player_ttt_df["eTT"],
                                        mode = 'lines',
                                        marker=go.scatter.Marker(
                                            line=go.scatter.marker.Line(width=2,
                                                                        color=dict_colors[count_color])),
                                        name=f"{player_ttt_df['displayName_defender'].values[0]} ({player_ttt_df['jerseyNumber_defender'].values[0]})",
                                        legendgroup='2'
                                        )
                                        )
        count_color += 1
    for i in range(len(data)):
        megafig.add_trace(data[i],row=1,col=1)
    for i in range(len(scatter_times_data)):
        megafig.add_trace(scatter_times_data[i],row=4,col=1)
    megafig.update_layout(title_text=title,
                          xaxis=dict(range=[0, 120], autorange=False, tickmode='array',tickvals=np.arange(10, 111, 5).tolist(),showticklabels=False),
                          yaxis=dict(range=[0, 53.3], autorange=False,showgrid=False,showticklabels=False),
                          xaxis5 = dict(showgrid=False,title='Frame'),
                          yaxis5 = dict(showgrid=False,title='eTtTO'),
                          legend_tracegroupgap=280,
                          width=120*scale,
                         height=70*scale)
    megafig.update_layout(
    shapes=[
        # Change field color
            dict(type="rect",
                xref="x",
                yref="paper",
                x0=0,
                y0=1,
                x1=120,
                y1=0.445,
                fillcolor='#00B140',
                opacity=0.5,
                layer="below",
                line_width=0,
                    ),
                ]
            )

    return megafig

In [17]:
df_frames = ttt_df.loc[(ttt_df['playId']==458)&(ttt_df['gameId']==2022091100)]
for frame in range(df_frames['frameId'].min(),df_frames['frameId'].max()+1):
    figure_play_by_frame(games_df,tracking_df,play_df,players_df,2022091100,458,ttt_df,frame)

In [18]:
gif.create_gif(gif_path=r'C:\Users\isaac\Desktop\Proyectos\BDB_24\Vis\fig_most_difficult_tackle.gif',length=15000)

In [19]:
ttt_df['jerseyNumber_defender']

0          45
1          99
2           2
3           5
4          54
           ..
3681266     6
3681267    26
3681268    99
3681269    21
3681270    98
Name: jerseyNumber_defender, Length: 1222771, dtype: int32

In [20]:
players_df[players_df['displayName']=='Marcus Maye']

Unnamed: 0,nflId,height,weight,birthDate,collegeName,position,displayName
435,44851,6-0,207,1993-03-09,Florida,FS,Marcus Maye
