In [1]:
import re
import math
import pandas as pd
import numpy as np
import matplotlib.animation as animation
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from tqdm import tqdm

pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_colwidth', 1000)
pd.set_option('display.max_rows', 1000)
plt.rcParams["animation.html"] = "jshtml"
plt.rcParams['figure.dpi'] = 150  
plt.ioff()

In [2]:
games = pd.read_csv("./data/games.csv")
players = pd.read_csv("./data/players.csv")
plays = pd.read_csv("./data/plays_with_target.csv")
weeks = [] 
for i in range(1,18):
    weeks.append(pd.read_csv("./data/week{}.csv".format(i)))

In [3]:
def posession(plays_row):
    if games.query("gameId=={}".format(plays_row.gameId)).visitorTeamAbbr.iloc[0] == plays_row.possessionTeam:
        return 'away'
    else:
        return 'home'
    
def get_week(gameID, playID):
    return games.query("gameId=={}".format(gameID)).iloc[0].week - 1
    
def get_week_df(gameID, playID):
    wk = get_week(gameID, playID)
    week_df = weeks[wk]
    information = week_df[(week_df.playId==playID)&(week_df.gameId==gameID)]
    return information, wk

# this function requires 'plays' dataframe
def animate_play(gameID, playID):
    plt.close('all')
    fig, ax = plt.subplots()
    fig.set_figheight(7)
    fig.set_figwidth(14)
    
    information, wk = get_week_df(gameID, playID)
    max_frame = information.frameId.max()
    print("GAME: {}\tPLAY: {}\tWEEK: {}\tFRAME: {}".format(gameID, playID, wk, max_frame))
    
    home, = ax.plot([],[], linestyle='None', marker='o', markersize=12, c='C0')
    away, = ax.plot([],[], linestyle='None', marker='o', markersize=12, c='C1')
    foot, = ax.plot([],[], linestyle='None', marker='o', markersize=12, c='C2')
    lines = [home, away, foot]
    
    def draw_field():
        ax.set_xlim(0, 120)
        ax.set_ylim(-5, 58.3)
        # Endzones
        ax.add_patch(Rectangle((  0, 0), width=10, height=53.3, alpha=0.5, color='grey'))
        ax.add_patch(Rectangle((110, 0), width=10, height=53.3, alpha=0.5, color='grey'))
        # Sidelines
        ax.plot([0,120],[0,0], c='grey', linewidth=2)
        ax.plot([0,120],[53.3,53.3], c='grey', linewidth=2)
        for i in range(10, 120, 10):
            # 10 Yard lines
            ax.plot([i,i],[0,53.3], c='grey', linewidth=2)
            if abs(60-i) <=40:
                # Numbers
                ax.text(i-2.5, 12, str(50-abs(60-i)), color="grey", fontsize=24)
                ax.text(i-2.5, 41.3, str(50-abs(60-i)), color="grey", fontsize=24, rotation=180)
        # Hash marks
        for i in range(10, 110):
            ax.plot([i,i], [0, 1], c='grey')
            ax.plot([i,i], [23, 24], c='grey')
            ax.plot([i,i], [29.3, 30.3], c='grey')
            ax.plot([i,i], [52.3, 53.3], c='grey')
        
        # Drawing the line of scrimmage
        scrimmage = information.query("frameId==1 and displayName=='Football'").iloc[0].x
        ax.plot([scrimmage,scrimmage],[0, 53.3], c='darkblue', linewidth=2)
        
        # Drawing the First Down Line
        play_information = plays.query("playId=={} and gameId=={}".format(playID, gameID)).iloc[0]
        ax.set_title(play_information.playDescription)
        home_team = games[games.gameId==gameID].iloc[0].homeTeamAbbr
        poss_team = play_information.possessionTeam
        team_query_term = "home"
        if home_team != poss_team:
            team_query_term = "away"
        shit = information.query("frameId==1 and team=='{}'".format(team_query_term)).iloc[0].x
        first_down_line = scrimmage - play_information.yardsToGo
        if shit < scrimmage:
            first_down_line = scrimmage + play_information.yardsToGo
        ax.plot([first_down_line,first_down_line],[0, 53.3], c='gold', linewidth=2)
        return lines

    def plot_players(t):
        home_filtered = information[(information.frameId==t)&(information.team=="home")]
        away_filtered = information[(information.frameId==t)&(information.team=="away")]
        foot_filtered = information[(information.frameId==t)&(information.team=="football")]
        home.set_data(home_filtered.x.array, home_filtered.y.array)
        away.set_data(away_filtered.x.array, away_filtered.y.array)
        foot.set_data(foot_filtered.x.array, foot_filtered.y.array)
        lines = [home, away, foot]
        
        if t%10 == 0:
            print(t,end = ',')
        return lines
    
    return animation.FuncAnimation(fig, plot_players, frames=range(1, max_frame+1), init_func=draw_field, blit=True)
    #return animation.FuncAnimation(fig, plot_players, frames=range(20, 21), init_func=draw_field, blit=True)

# For every play, who is covering the target?
One way you could figure this out is by the proximity to the target. I've already abandonded this method before, but I have a feeling that this might work pretty well for figuring out the cover since we already know the target. Essentially, just figuring out which defenders are near.

For every row in all week_\s.csv dfs, I want to know the distance from the football. However, for some plays, the location of the football is missing. Here are the plays that football location is missing:

In [40]:
for i in range(len(weeks)):
    w = weeks[i]
    f = w.query("displayName=='Football'")
    shit = w[["gameId", "playId", "frameId"]].groupby(["gameId", "playId"], as_index=False).nunique().frameId
    shat = f[["gameId", "playId", "frameId"]].groupby(["gameId", "playId"], as_index=False).nunique().frameId
    if (shit-shat).sum() > 0:
        fuck = w[["gameId", "playId", "frameId"]].groupby(["gameId", "playId"], as_index=False).nunique()
        print(fuck[shit!=shat])
    print(i+1, "=============")

         gameId  playId  frameId
423  2018092304    1687       61
         gameId  playId  frameId
445  2018101404    2003       53
869  2018101410     431       67
         gameId  playId  frameId
185  2018102101    3078       47
         gameId  playId  frameId
690  2018111108    2581       77
          gameId  playId  frameId
1007  2018112510    3572       57
         gameId  playId  frameId
585  2018120206    3991       72
          gameId  playId  frameId
1184  2018122400    2493       63
         gameId  playId  frameId
941  2018123013    2436       84


There aren't a lot of them, but we need to know that there are error cases. If you animate some of these plays, you'll see that the ball simply goes missing:

In [42]:
animate_play(2018102101, 3078)

GAME: 2018102101	PLAY: 3078	WEEK: 6	FRAME: 47
10,20,30,40,

So when we reach these cases, we just assign np.Inf. Now we calculate the distance to the ball for every row in weeks_c\d.csv

In [44]:
# find fx and fy for every row
for j in range(2, len(weeks)):
    print(j, " started.")
    shit = weeks[j]
    dists = []
    football = {}
    # record all (gameId, playId, frameId) = (x,y)
    shat = shit.query("displayName=='Football'")
    for i in range(len(shat)):
        fuck = shat.iloc[i]
        football[(fuck.gameId, fuck.playId, fuck.frameId)] = (fuck.x, fuck.y)
    
    for i in tqdm(range(len(shit))):
        fuck = shit.iloc[i]
        k = (fuck.gameId, fuck.playId, fuck.frameId)
        x,y = np.Inf, np.Inf
        if k in football:
            x, y = football[k]            
        dist = math.sqrt((x - fuck.x)**2 + (y-fuck.y)**2)
        dists.append(dist)
    weeks[j] = shit.assign(distFootball=dists)
    weeks[j].to_csv("./data/week{}_with_dist.csv".format(j+1))

2  started.


100%|██████████████████████████████████████████████████████████████████████| 1168345/1168345 [02:57<00:00, 6584.86it/s]


3  started.


100%|██████████████████████████████████████████████████████████████████████| 1205527/1205527 [03:01<00:00, 6659.92it/s]


4  started.


100%|██████████████████████████████████████████████████████████████████████| 1171908/1171908 [02:54<00:00, 6703.24it/s]


5  started.


100%|██████████████████████████████████████████████████████████████████████| 1072563/1072563 [02:43<00:00, 6568.57it/s]


6  started.


100%|████████████████████████████████████████████████████████████████████████| 982583/982583 [02:27<00:00, 6662.48it/s]


7  started.


100%|██████████████████████████████████████████████████████████████████████| 1001501/1001501 [02:30<00:00, 6640.52it/s]


8  started.


100%|████████████████████████████████████████████████████████████████████████| 958464/958464 [02:30<00:00, 6371.60it/s]


9  started.


100%|████████████████████████████████████████████████████████████████████████| 964889/964889 [02:27<00:00, 6524.60it/s]


10  started.


100%|████████████████████████████████████████████████████████████████████████| 932240/932240 [02:18<00:00, 6717.43it/s]


11  started.


100%|██████████████████████████████████████████████████████████████████████| 1024868/1024868 [02:32<00:00, 6739.97it/s]


12  started.


100%|██████████████████████████████████████████████████████████████████████| 1172517/1172517 [02:54<00:00, 6723.68it/s]


13  started.


100%|██████████████████████████████████████████████████████████████████████| 1161644/1161644 [02:51<00:00, 6772.59it/s]


14  started.


100%|██████████████████████████████████████████████████████████████████████| 1081222/1081222 [02:40<00:00, 6722.46it/s]


15  started.


100%|██████████████████████████████████████████████████████████████████████| 1144037/1144037 [02:49<00:00, 6743.37it/s]


16  started.


100%|██████████████████████████████████████████████████████████████████████| 1049265/1049265 [02:32<00:00, 6896.78it/s]
