In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import matplotlib.pyplot as plt
import pylab as pl
from matplotlib import collections as mc
from PIL import ImageColor

pd.options.mode.chained_assignment = None
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
pd.set_option('display.width', 1000)
pd.options.display.max_colwidth = 400

#plt.rcParams["font.family"] = "serif"

team_colors = {"TB": "#D50A0A", "DET": "#0076b6", "TEN": "#0C2340", "BAL": "#241773",
               "GB": "#FFB612", "MIA": "#FC4C02", "DAL": "#041E42", "BUF": "#00338D",
               "CLE": "#311D00", "HOU": "#03202f", "WAS": "#FFB612", "LAC": "#0080C6",
               "CHI": "#c83803", "KC": "#E31837", "NYJ": "#125740", "PHI": "#004C54",
               "NYG": "#0B2265", "IND": "#002C5F", "ATL": "#000000", "MIN": "#4F2683",
               "CAR": "#0085CA", "CIN": "#fb4f14", "LA": "#866D4B", "ARI": "#000000",
               "NE": "#B0B7BC", "SF": "#AA0000", "OAK": "#000000", "JAX": "#006778",
               "PIT": "#FFB612", "NO": "#D3BC8D", "DEN": "#FB4F14", "SEA": "#69BE28",
               }

team_colors2 = {"TB": "#FF7900", "DET": "#B0B7BC", "TEN": "#4B92DB", "BAL": "#000000",
                "GB": "#203731", "MIA": "#008E97", "DAL": "#869397", "BUF": "#C60C30",
                "CLE": "#ff3c00", "HOU": "#A71930", "WAS": "#773141", "LAC": "#FFC20E",
                "CHI": "#0B162A", "KC": "#FFB81C", "NYJ": "#000000", "PHI": "#000000",
                "NYG": "#a71930", "IND": "#A2AAAD", "ATL": "#a71930", "MIN": "#FFC62F",
                "CAR": "#BFC0BF", "CIN": "#000000", "LA": "#002244", "ARI": "#97233F",
                "NE": "#002244", "SF": "#B3995D", "OAK": "#A5ACAF", "JAX": "#9F792C",
                "PIT": "#101820", "NO": "#101820", "DEN": "#002244", "SEA": "#002244",
                }


def fix_positions(test_df):
    test_df.loc[test_df.name == "J.C. Jackson", 'position'] = "CB"
    test_df.loc[test_df.name == "Jimmie Ward", 'position'] = "CB"
    test_df.loc[test_df.name == "Isaac Yiadom", 'position'] = "CB"
    return test_df


def rgb2hex(r, g, b):
    return '#{:02x}{:02x}{:02x}'.format(r, g, b)


def season_grades(df):
    #print(df)
    df.loc[df.incoverage == 1, 'cpoa'] = df['cp'] - df['aycp']
    df.loc[((df.incoverage == 1) & (df.targeted == 1)), 'cpoa_targ'] = df['cp'] - df['aycp']
    df.loc[((df.incoverage == 1) & (df.targeted != 1)), 'cpoa_nontarg'] = df['cp'] - df['aycp']
    df.loc[df.incoverage == 1, 'aycpoe'] =  df['outcome'] - df['aycp']
    df.loc[((df.incoverage == 1) & (df.cpoa <= -0.15)), 'lockdown'] = 1
    df.loc[((df.incoverage == 1) & (df.cpoa >= 0.15)), 'blown'] = 1
    
    df.loc[((df.sack == 1) & (df.lockdown != 1)), 'epa'] = np.nan
    df.loc[((df.sack == 1) & (df.lockdown == 1)), 'epa'] = df['epa']
    
    average_grades = df.loc[()].groupby(['id', 'name'])[['aycp', 'cp', 'cpoa', 'cpoa_targ', 'cpoa_nontarg', 'cpoe', 'aycpoe', 'epa', 'lockdown', 'blown', 'incoverage', 'targeted', 'outcome']].sum()
    average_grades = average_grades.loc[average_grades.incoverage >= 100]
          
    average_grades['position'] = df.loc[()].groupby(['id', 'name'])[['position']].last()
    average_grades['compPercentage'] = average_grades['outcome'] / average_grades['targeted']
    average_grades['targtedPercentage'] = average_grades['targeted'] / average_grades['incoverage']
    average_grades['cpoa'] = average_grades['cpoa'] / average_grades['incoverage']
    average_grades['cpoa_targ'] = average_grades['cpoa_targ'] / (average_grades['targeted'])
    average_grades['cpoa_nontarg'] = average_grades['cpoa_nontarg'] / (average_grades['incoverage'] - average_grades['targeted'])
    average_grades['cpoa_targdiff'] = average_grades['cpoa_targ'] - average_grades['cpoa_nontarg']
    average_grades['cp'] = average_grades['cp'] / average_grades['incoverage']
    average_grades['aycp'] = average_grades['aycp'] / average_grades['incoverage']
    average_grades['cpoe'] = average_grades['cpoe'] / average_grades['targeted']
    
    average_grades['lockdown'] = average_grades['lockdown'] / average_grades['incoverage']
    average_grades['blown'] = average_grades['blown'] / average_grades['incoverage']
        
    average_grades['aycpoe'] = average_grades['aycpoe'] / average_grades['targeted']
    
    epa_count = df.loc[()].groupby(['id', 'name'])[['epa']].mean()
    average_grades['epa'] = epa_count['epa']
   
    average_grades['cpoa_norm'] = -1 * ((average_grades['cpoa'] - average_grades['cpoa'].mean()) / (average_grades['cpoa'].std()))
    average_grades['cpoa_targ_norm'] = -1 * ((average_grades['cpoa_targ'] - average_grades['cpoa_targ'].mean()) / (average_grades['cpoa_targ'].std()))
    average_grades['cpoa_nontarg_norm'] = -1 * ((average_grades['cpoa_nontarg'] - average_grades['cpoa_nontarg'].mean()) / (average_grades['cpoa_nontarg'].std()))
    average_grades['cpoe_norm'] = -1 * ((average_grades['cpoe'] - average_grades['cpoe'].mean()) / (average_grades['cpoe'].std()))
    average_grades['epa_norm'] = -1 * ((average_grades['epa'] - average_grades['epa'].mean()) / (average_grades['epa'].std()))
    average_grades['lockdown_norm'] = 1 * ((average_grades['lockdown'] - average_grades['lockdown'].mean()) / (average_grades['lockdown'].std()))
    average_grades['blown_norm'] = -1 * ((average_grades['blown'] - average_grades['blown'].mean()) / (average_grades['blown'].std()))
    
    average_grades['overall_grade'] = 0.1 * ((0.33 * average_grades['cpoa_targ_norm'] +
                                       0.33 * average_grades['cpoe_norm'] + 
                                       0.33 * average_grades['epa_norm']) + 5.0) + 0.25
    average_grades.sort_values('overall_grade', ascending=False, inplace=True)

    #print(average_grades)
    
    Players = average_grades.index.values
    average_grades_dict = average_grades.to_dict('dict')
   
    return Players, average_grades, average_grades_dict


def coverage_sacks(df):
    maxCPperPlay = df.loc[(df.sack==1)].groupby(['playid'])[['cp', 'sack']].max()
    meanCPperPlay = df.loc[(df.sack==1)].groupby(['playid'])[['cp', 'sack']].mean()
    team = df.loc[(df.sack==1)].groupby(['playid'])[['defTeam']].last()
    maxCPperPlay['defTeam'] = team['defTeam']
    maxCPperPlay['meancp'] = meanCPperPlay['cp']
    #print(maxCPperPlay)
    CoverageSack = maxCPperPlay.loc[(maxCPperPlay.cp < 0.4) | (maxCPperPlay.meancp < 0.3)].groupby(['defTeam'])[['sack']].count()
    CoverageSack.sort_values('sack', ascending=False, inplace=True)
    return CoverageSack


def create_field(minY, maxY, ax, LOS, YardsToGo):
    
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)

    ax.set_facecolor('#ADDDA7')
    ax.set_alpha(0.5)
    # Paint stripes
    if 1:
        for i in range(10,110,5):
            if i < maxY and i > minY:
                plt.axhline(i, color='white', linewidth=3, alpha=0.4, zorder=0) # y = 0
        if minY < 10:
            plt.axhline(0, color='white', linewidth=5, alpha=0.4, zorder=0) # y = 0
            plt.axhline(10, color='white', linewidth=5, alpha=0.4, zorder=0) # y = 0
        if maxY > 110:
            plt.axhline(110, color='white', linewidth=5, alpha=0.4, zorder=0) # y = 0
            plt.axhline(120, color='white', linewidth=5, alpha=0.4, zorder=0) # y = 0
        if 60 > minY and 60 < maxY:
            plt.axhline(60, color='white', linewidth=5, alpha=0.4, zorder=0) # y = 0
    # Paint numbers
    if 1:
        for i in range(10,50,10):
            if i+10 > minY and i+10 < maxY:
                plt.text(53.3-12, i+10, str(i), color='white', fontsize=40, verticalalignment='center', alpha=0.6, horizontalalignment='center', weight='bold', rotation=90, zorder=0)
            if 110-i > minY and 110-i < maxY:    
                plt.text(53.3-12, 110-i, str(i), color='white', fontsize=40, verticalalignment='center', alpha=0.6, horizontalalignment='center', weight='bold', rotation=90, zorder=0)
        if 60 < maxY and 60 > minY:
            plt.text(53.3-12, 60, str(50), color='white', fontsize=40, verticalalignment='center', alpha=0.6, horizontalalignment='center', weight='bold', rotation=90, zorder=0)

        for i in range(10,50,10):
            if i+10 > minY and i+10 < maxY:
                plt.text(12, i+10, str(i), color='white', fontsize=40, verticalalignment='center', alpha=0.6, horizontalalignment='center', weight='bold', rotation=270, zorder=0)
            if 110-i > minY and 110-i < maxY:       
                plt.text(12, 110-i, str(i), color='white', fontsize=40, verticalalignment='center', alpha=0.6, horizontalalignment='center', weight='bold', rotation=270, zorder=0)
        if 60 < maxY and 60 > minY:
            plt.text(12, 60, str(50), color='white', fontsize=40, verticalalignment='center', alpha=0.6, horizontalalignment='center', weight='bold', rotation=270, zorder=0)
    if 1:
        if minY < 10:
            plt.text(26.5, 5, "END ZONE", color='white', fontsize=40, verticalalignment='center', alpha=0.6, horizontalalignment='center', weight='bold', rotation=0, zorder=0)
        if maxY > 110:
            plt.text(26.5, 115, "END ZONE", color='white', fontsize=40, verticalalignment='center', alpha=0.6, horizontalalignment='center', weight='bold', rotation=0, zorder=0)
    # Add hash marks
    lines = []
    for i in range(10,110):
        if i % 5 != 0:
            if i > minY and i < maxY:
                lines.append([(22.6, i), (23.6, i)])
                lines.append([(53.3-22.6, i), (53.3-23.6, i)])
    lc = mc.LineCollection(lines, colors='white', linewidths=2, alpha=0.6, zorder=0)
    ax.add_collection(lc)
    # Line of Scrimmage
    plt.axhline(LOS, color='blue', linewidth=3, alpha=0.4, zorder=0) # y = 0
    plt.axhline(LOS + YardsToGo, color='yellow', linewidth=3, alpha=0.4, zorder=0) # y = 0   



def plot_single(GAMES, TRACKING, PLAYERS, PLAYS, test_df, playID, Coverage_sacks):

    fig, ax = pl.subplots(figsize=(14,25))

    criteria = ((TRACKING.uniquePlayId==int(playID)))
    frame = TRACKING.loc[criteria]

    # Determine teams and colors
    gameId = np.array(frame['gameId'])[0]
    teamz = GAMES.loc[(GAMES.gameId==gameId)][['visitorTeamAbbr', 'homeTeamAbbr']].to_numpy()
    awayteam = teamz[0,0]
    hometeam = teamz[0,1]

    if 1:
        playz = PLAYS.loc[(PLAYS.uniquePlayId==int(playID))][['possessionTeam', 'absoluteYardlineNumber', 'yardlineNumber', 'yardsToGo']].to_numpy()
        possessionteam = playz[0, 0]
        LOS = playz[0, 1]
        YardsToGo = playz[0, 3]

    frame.loc[frame.team=="home", 'teamcolor'] = team_colors[hometeam]
    frame.loc[frame.team=="away", 'teamcolor'] = team_colors[awayteam]
    frame.loc[frame.team=="home", 'teamcolor2'] = team_colors2[hometeam]
    frame.loc[frame.team=="away", 'teamcolor2'] = team_colors2[awayteam]
    frame.loc[frame.team=="football", 'teamcolor'] = 'brown'
    frame.loc[frame.team=="football", 'teamcolor2'] = 'white'

    frame['jerseryNumberStr'] = frame['jerseyNumber'].astype('str')
    frame.loc[frame.team=="football", 'playerweight'] = 40

    playernames = frame.index.values
    dictOnePlay = frame.to_dict('dict')

    minY = 120
    for player in playernames:

        PlayerId = dictOnePlay['nflId'][player]
        defenderId = dictOnePlay['defenderId'][player]

        DisplayName = dictOnePlay['displayName'][player]
        Position = dictOnePlay['position'][player]

        CP = dictOnePlay['cp'][player]
        CPOE = dictOnePlay['cpoe'][player]
        epa = test_df.loc[(test_df.playid==int(playID)) & (test_df.name==DisplayName)][['epa', 'aycp']].to_numpy()
        EPA = np.nan
        if (epa.shape[0] > 0):
            EPA = epa[0, 0]
            AYCP = epa[0, 1]
            CPOA = CP - AYCP

        PlayerWeight = PLAYERS[['weight']].loc[PLAYERS.nflId==PlayerId].to_numpy()
        if PlayerWeight.shape[0] > 0:
            PlayerWeight = float(PlayerWeight[0,0])*1.6
        else:
            PlayerWeight = 40

        teamcolor = dictOnePlay['teamcolor'][player]
        teamcolor2 = dictOnePlay['teamcolor2'][player]

        if DisplayName != "Football":
            JerseyNumber = int(dictOnePlay['jerseyNumber'][player])

        S = dictOnePlay['s'][player]

        Y = dictOnePlay['x'][player]
        X = (-1 * dictOnePlay['y'][player]) + (160/3.0)

        if Position == "QB":
            if Y - LOS > 0 or Y - LOS < -10:
                LOS = 120 - LOS
                #print("LOS", LOS)

        Orientation = 90 - dictOnePlay['o'][player]
        Dir = dictOnePlay['dir'][player] - 90

        # Text Color picker
        RGB = ImageColor.getrgb(teamcolor)
        textColor = 'white'
        if (RGB[0]*0.299 + RGB[1]*0.587 + RGB[2]*0.114) > 150:
            textColor = 'black'

        if frame['targetedReceiver'][player] == 1:
            teamcolor = 'red'
            textColor = 'yellow'
            
        zorder = 12
        alpha=0.8
        #print(DisplayName)
        if DisplayName == "Football":
            zorder = 14
            alpha = 0.6

        # Plot Player
        ax.scatter(X, Y, s=(PlayerWeight*1.5), alpha=0.9, color=teamcolor, edgecolors=teamcolor2, linewidths=1.8, zorder=zorder)
        if DisplayName != "Football":
            ax.annotate(str(JerseyNumber), # this is the text
                                     (X, Y), # this is the point to label
                                     textcoords="offset points", # how to position the text
                                     xytext=(0,0), # distance from text to points (x,y)
                                     verticalalignment='center', horizontalalignment='center',
                                     ha='center',
                                     va='center',
                                     fontsize=12,
                                     color=textColor,
                                     alpha=0.99,
                                     weight='bold',
                                     rotation=Orientation,
                                     zorder=13) # horizontal alignment can be left, right or center
        if str(CP) != "nan":
            CP = np.round(CP, 2)
            A = np.clip(CPOA*2+0.5, 0, 1)
            grade_color = rgb2hex(int(255*A), 0, int(255*(1.0-A)))
            if np.isnan(EPA) == 0 and CPOE == -1:
                EPAtext =  "EPA: " + str(np.round(EPA, 2))
                ax.annotate(EPAtext,
                            (X, Y),
                            textcoords="offset points",
                            xytext=(0,32),
                            ha='center',
                            va='bottom',
                            fontsize=14,
                            color='black',
                            zorder=13)
            if np.isnan(EPA) == 0 and CPOE != -1:
                CPOEtext =  "EPA: " + str(np.round(EPA, 2)) + "\nCPOE: " + str(np.round(CPOE, 2))
                ax.annotate(CPOEtext, # this is the text
                                     (X, Y), # this is the point to label
                                     textcoords="offset points", # how to position the text
                                     xytext=(0,32), # distance from text to points (x,y)
                                     ha='center',
                                     va='bottom',
                                     fontsize=14,
                                     color='black',
                                     zorder=13) # horizontal alignment can be left, right or center
            if CPOA <= -0.2:
                CP = "CPOA: " + str(np.round(CPOA, 2)) + ' ' + u'★'
            else:
                CP = "CPOA: " + str(np.round(CPOA, 2))   
            ax.annotate(CP, # this is the text
                                     (X, Y), # this is the point to label
                                     textcoords="offset points", # how to position the text
                                     xytext=(0,22), # distance from text to points (x,y)
                                     ha='center',
                                     va='center',
                                     fontsize=14,
                                     weight="bold",
                                     color=grade_color,
                                     zorder=13) # horizontal alignment can be left, right or center
        if str(defenderId) != "nan":
            yyy = frame['x'].loc[frame.nflId==defenderId]
            xxx = frame['y'].loc[frame.nflId==defenderId]
            yyy = float(yyy)
            xxx = float(xxx)
            ystop = yyy - Y
            xstop = ((-1 * xxx)  + (160/3.0)) - X
            #print(X, Y, xstop, ystop)
            plt.arrow(X, Y, xstop, ystop, head_width=0.2, width=0.02, alpha=0.5, color='red')

        if Y < minY:
            minY = Y

        # Plot Speed and direction arrow
        xstop = np.sin(Dir* np.pi/180) * S
        ystop = np.cos(Dir* np.pi/180) * S
        plt.arrow(X, Y, xstop, ystop, head_width=0.6, width=0.25, alpha=0.25, color=team_colors['NYG'])

    #minY = int(minY / 5) * 5
    minY = minY - 5
    maxY = minY + 40
    #print(minY)
        
    create_field(minY, maxY, ax, LOS, YardsToGo)
    #TITLE = str(playID) + ' ' + str(PLAYS[['playDescription']].loc[PLAYS.uniquePlayId==int(playID)].to_numpy()[0])
    #TITLE = str(playID)
    #plt.title(TITLE, size=20)
    
    plt.gca().set_aspect(1)
    plt.xlim(0, 160/3.0)
    plt.ylim(minY, maxY)
    #plt.ylim(0, 120)
    #plt.tight_layout()
    plt.text(26, minY + 1, str(PLAYS[['playDescription']].loc[PLAYS.uniquePlayId==int(playID)].to_numpy()[0]), size = 12, ha='center')
    plt.text(1, maxY - 2, str(playID), color='white', size = 12)
    
    plt.show()
    


PATH_PLAYERS = '/kaggle/input/nfl-big-data-bowl-2021/players.csv'
PLAYERSpd = pd.read_csv(PATH_PLAYERS)

PATH_GAMES = '/kaggle/input/nfl-big-data-bowl-2021/games.csv'
GAMES = pd.read_csv(PATH_GAMES)

PATH_PLAYS = '/kaggle/input/extractfeatures/PLAYS-targetData.csv'
PLAYS = pd.read_csv(PATH_PLAYS)

PATH_TRACKING = '/kaggle/input/testmodel/TRACKING-tminus5-targetData-with-CP-CPOE.csv'
TRACKING = pd.read_csv(PATH_TRACKING)

PATH_TESTDF = '/kaggle/input/testmodel/test_df.csv'
test_df = pd.read_csv(PATH_TESTDF)

test_df = fix_positions(test_df)

Players, average_grades, average_grades_dict = season_grades(test_df)
Coverage_sacks = coverage_sacks(test_df)


# 🏈 Comprehensive Pass Coverage Grading 🏈

In [None]:
playID = '20181125023507'
plot_single(GAMES, TRACKING, PLAYERSpd, PLAYS, test_df, playID, Coverage_sacks)

## Introduction
My goal was to create a automatic and comprehensive method to grade pass coverage for individual pass defenders.  A comprehensive grade should encompass most, if not all, aspects of a pass defender's job.  This includes how well the defender covers before the pass is thrown (coverage skills), how well the defender plays when the ball is in the air (ball skills), forcing turnovers, and limiting yards after the catch.  A method which can objectively, automatically, and comprehensively grade pass defenders in this way would be very useful in evaluating the overall effectiveness of pass defenders.  

## Approach
I chose to approach this problem by modeling completion probability (CP).  I strategically chose to look at only one moment during the play - half of a second before the QB releases the pass, or is sacked.  I chose this particular moment because the QB still has the ball, and the pass defenders are still covering their assignment, as opposed to moving towards the intended receiver.  Therefore, we can compute a completion probability for all defenders in primary coverage at this moment during the play. 

**Coverage Skills -> Completion Probability Over Average (CPOA)**

One minor issue with using CP as a proxy for coverage is that the CP models are highly dependent on air yards.  We know that the further the distance the ball travels down field, generally the lower the completion probability.  Therefore, defenders that are defending deeper down the field will automatically have lower CP.  In order to account for this, I trained a second CP model using only the X and Y distance between the passer and receiver as input features.  I call this model air yard completion probablity (AYCP).  The difference between CP and AYCP produces a measure that accounts for the depth of the receiver.  I call this completion ***probability*** over average (CPOA).  CPOA in effect measures how well a defender is covering his assignment relative to other defenders at a similar depth. The values typically range from -0.3 to +0.3, where lower is better.

**Ball Skills -> Completion Percentage Over Expectation (CPOE)**

The same CP model can also be utilized to measure "ball skills".  I use this term to refer to what the defender does between the time the ball is thrown, and the arrival of the pass to the receiver.  If a defender is targeted, how quickly does he close in on the intended receiver to break up the pass and prevent the completion?  We can create a proxy for this as well, by using the aforementioned completion percentage over expectation (CPOE).  This is simply the difference between the binary outcome of the play, and the completion probability.

**Context -> Expected Points Added (EPA)**

And finally, I incorporate expected points added (EPA).  EPA accounts for situational importance, penalties, forcing turnovers, and tackling after the catch.  It is very useful because it gives context to a play, because we know that not all plays are created equal (6 yards on 3rd and 15 is not the same as 6 gards on 3rd and 2). EPA is independent of the CP model and provided in the BDB data. 

**Final Grade -> Fusion of CPOA, CPOE, EPA**

To compute one final comprehensive grade, CPOA, CPOE, and EPA are normalized, averaged, scaled to fit a 0 - 1 grading system.  

## Data Processing
1. Determine the intended receiver.  For most plays this information was extracted from the play description.  On plays resulting in a sack, and in rare instances of incompletions, the description does not have an intended receiver.  For these plays CP can still be computed, but CPOE cannot.
2. Assign a generic route for receivers on sack plays.  The data does not provide route information on plays that ended in a sack.  Offensive skill players that were a minimum distance downfield, or moving at a mininum speed were assigned a generic route.  This allowed me to incoroporate plays resulting in a sack into the grading. Intentional grounding was also treated as a sack.
3. Assign a primary defender to each player running a route.  In order to train a CP model that measures how well a coverage defender is performing, we must first assign coverage responsibility.  The defender assignment was done using an average of the current distance between receiver and defender (tail of the arrows in figures), and the projected distance (head of the arrows in the figures). 
4. Account for penalties.  A defender can play good coverage, prevent an incompletion, and the play can still result in a positive EPA if there is a roughing the passer penalty.  In such situations, I do not count the EPA against the pass defender.  I took such penalty scenarios into account.  I did this by parsing the play descriptions.

## Feature Extraction
The features used in this work were inspired by the winning 2020 BDB solution.  For each receiver running a route, I compute the following features:
* X position, Y position, X and Y speed components of the route runner
* X position, Y position, X and Y speed components of the defender
* Relative X and Y between route runner and defender
* Relative X and Y speed components between route runner and defender
* Relative X and Y between route runner and quarterback
* Distance between defender and sideline
* One hot features indicating route runner's position (WR, TE, RB, FB)

The features were normalized to be on a similar scale.  For each route runner, I compute 18 features for the 3 nearest defenders to each potential receiver, creating a 3 x 18 feature matrix. 

Why three defenders? Not all defenders on the field will influence the CP of a given route runner. Some defenders are covering a different receiver on the opposite side of the field, or even rushing the passer.  However, there are many instances where receivers are double covered, or the cornerback has safety help over the top.  In these scenarios, the secondary and possibly third defender can make an impact on the CP.  This is the reason for modeling the 3 nearest defenders, but not including all 11 defenders on the field

## Completion Probability Model
The core of this work revolves around the completion probability model, since it's used to compute both CPOA and CPOE. The model I designed was a neural network with time distributed dense layers. Although I use *time* distributed layers, there actually is no time component. The fully connected dense layers are distributed across the three nearest defenders. This in effect mimics one dimensional convolutional layers, which were also used by the winner of BDB 2020.  Below are more details about the training of the model.

* 10-fold cross validation for training models (So CPs are averaged across 10 models)
* Dropout layers to prevent over-fitting and carefully monitored the loss
* Batch normalization after each hidden layer
* Sigmoid activations
* Binary cross entropy loss
* Adam Optimizer
* Trained each fold for 50 epochs
* Learning rate 0.01

![Architecture](https://i.imgur.com/94x6Fk9.png)

**Figure 2** : Basic model architecture.  Excluding batch normalization, dropout and activation layers for sake of simplicity

## Analysis
In order to better visualize the method I created plots.  Although I can only show a few examples, my code will automatically generate these figures for any play in this dataset.  
* The thin red lines between bubbles link the primary defender to receiver
* The arrows show the magnitude and direction of player speed
* The size of the bubble is proportional to player weight, so TEs and LBs will generally appear as bigger circles
* The number inside each bubble corresponds to the player's jersey number.  Rotation corresponds to the orientation
* The bubbles are color coded by team. But, the intended receiver is always red
* The Line of scrimmage is blue and first down line is yellow
* The numbers above the defenders are what each defender is credited on a given play in terms of CPOA, CPOE and EPA
* CPOA is color coded. Red -> bad coverage.  Blue -> good coverage.  I plot a "☆​" when a defender has a CPOA < -0.2.

 

In [None]:
playID = '20181223064098' # Jameis to Evans
plot_single(GAMES, TRACKING, PLAYERSpd, PLAYS, test_df, playID, Coverage_sacks)

**Figure 3** : Example of a blown coverage.  #24 of the Cowboys lets #13 of the Bucs get open in the back of the endzone resulting in an easy touchdown.  The rest of the defenders in coverage did a good job, and it is reflected in their CPOA grade on this play.  #24 is also credited for the EPA since his assignment was targeted.

In [None]:
playID = '20181104022215' # Cam is sacked due to good coverage
plot_single(GAMES, TRACKING, PLAYERSpd, PLAYS, test_df, playID, Coverage_sacks)

**Figure 4** : Example of a coverage sack.  All of the receivers  on the field are tightly covered.  The quarterback has nowhere to throw the ball, and gets sacked for a loss of 9 yards. Defenders with expectional coverage on a play resulting in a sack are given credit for EPA, but it is clipped at -2.0

In [None]:
import plotly.graph_objects as go
from plotly.colors import n_colors

CB = average_grades.loc[(average_grades.incoverage >= 300) & (average_grades.aycp < 0.59)]
criteria = ((CB.position == "CB"))
CB = CB.loc[criteria][['cpoa', 'cpoa_targ', 'cpoe', 'epa', 'lockdown', 'blown', 'overall_grade']]
#print(CB)

playernames = CB.index.values
names = []
colors2 = []
for p in playernames:
    id, name = p
    name = str(name)
    name = name.replace("Robey-Coleman", "Robey-Cole")
    name = name.replace("Boddy-Calhoun", "Boddy-Calh")
    names.append(name)
    if name == "Stephon Gilmore" or name == "Xavien Howard" or name == "Byron Jones" or name == "Kyle Fuller" or name == "Desmond King":
        colors2.append('yellow')
    elif name == "Patrick Peterson" or name == "Trae Waynes" or name == "James Bradberry" or name == "Jalen Ramsey" or name == "Darius Slay" or name == "Tre'Davious White" or name == "Marlon Humphrey":
        colors2.append('lime')
    else:
        colors2.append('black')


colors = n_colors('rgb(108, 165, 212)', 'rgb(182, 96, 96)', 20, colortype='rgb')
a = CB[['cpoa_targ']].to_numpy().flatten()
b = CB[['cpoe']].to_numpy().flatten()
c = CB[['epa']].to_numpy().flatten()
d = CB[['overall_grade']].to_numpy().flatten()
anorm = ((a - np.min(a)) / (np.max(a) - np.min(a)) * 19)
bnorm = ((b - np.min(b)) / (np.max(b) - np.min(b)) * 19)
cnorm = ((c - np.min(c)) / (np.max(c) - np.min(c)) * 19)
dnorm = ((d - np.min(d)) / (np.max(d) - np.min(d)) * 19)
rank = np.arange(1, len(anorm)+1, 1)
anorm = np.array(anorm, dtype='int')
bnorm = np.array(bnorm, dtype='int')
cnorm = np.array(cnorm, dtype='int')
dnorm = 19-np.array(dnorm, dtype='int')


fig = go.Figure(data=[go.Table(
  columnwidth = [5,20],
  header=dict(
    values=['<b>Rank</b>', '<b>Player</b>', '<b>Average CPOA</b>', '<b>Average CPOE</b>', '<b>Average EPA</b>', '<b>Grade</b>'],
    line_color='LightSteelBlue', fill_color='LightSteelBlue',
    align='center',font=dict(color='black', size=12)
  ),
  cells=dict(
    values=[rank, names, np.round(a,3), np.round(b,3), np.round(c,3), np.round(d,3)],
    fill_color=[np.array(colors)[dnorm], np.array(colors)[dnorm], np.array(colors)[anorm], np.array(colors)[bnorm], np.array(colors)[cnorm], np.array(colors)[dnorm]],
    line_color=[np.array(colors)[dnorm], np.array(colors)[dnorm], np.array(colors)[anorm], np.array(colors)[bnorm], np.array(colors)[cnorm], np.array(colors)[dnorm]],
    align='center', font=dict(color=['black', colors2, 'black', 'black', 'black', 'black'], size=12)
    ))
])

fig.update_layout(
    autosize=False,
    width=800,
    height=1200,
    margin=dict(
        l=20,
        r=20,
        b=20,
        t=50,
        pad=20
    ),
    paper_bgcolor="LightSteelBlue",
)

fig.update_layout(
    title={
        'text': "Top Cornerbacks of 2018",
        'yanchor': 'top',
        'x' : 0.5,
        'y' : 0.99},
    titlefont={
        'size': 32,
        'color' : 'black'
    })

fig.show()

**Figure 5** : Comprehensive grades for cornerbacks in 2018.  Minimum 300 snaps in coverage as primary defender.

[AP All Pro selections](https://apnews.com/article/2820450002) are highlighted in yellow.  2 of the top 4 CBs using this grading method were All Pro players (Gilmore, Howard).  I consider this evidence that this method is a good and comprehensive approach to grading cornerbacks.

[Highest paid cornerbacks](https://overthecap.com/position/cornerback/) are highlighted in green. 4 of the top 6 CBs using this grading method are 4 of the top 10 highest paid CBs (Humphrey, Howard, Slay, White).

How about the other guys in the top 10?  Well, it is possible that they just had an isolated good year.  I did some research and found evidence that both [5. Steven Nelson](https://twitter.com/PFF_Chiefs/status/1105613988967170048/photo/1) and [7. Coty Sensabaugh](https://twitter.com/PFF/status/1112388084522459136) were good cornerbacks in 2018 by some  measure.

In [None]:
CB = average_grades.loc[(average_grades.incoverage >= 200) & (average_grades.aycp >= 0.59)]
criteria = ((CB.position == "CB"))
CB = CB.loc[criteria][['cpoa', 'cpoa_targ', 'cpoe', 'epa', 'lockdown', 'blown', 'overall_grade']]

playernames = CB.index.values
names = []
colors2 = []
for p in playernames:
    id, name = p
    name = str(name)
    name = name.replace("Robey-Coleman", "Robey-Cole")
    name = name.replace("Boddy-Calhoun", "Boddy-Calh")
    names.append(name)
    if name == "Stephon Gilmore" or name == "Xavien Howard" or name == "Byron Jones" or name == "Kyle Fuller" or name == "Desmond King":
        colors2.append('yellow')
    elif name == "Trae Waynes" or name == "James Bradberry" or name == "Jalen Ramsey" or name == "Darius Slay" or name == "Patrick Peterson" or name == "Tre'Davious White" or name == "Marlon Humphrey":
        colors2.append('lime')
    else:
        colors2.append('black')


colors = n_colors('rgb(108, 165, 212)', 'rgb(182, 96, 96)', 20, colortype='rgb')
a = CB[['cpoa_targ']].to_numpy().flatten()
b = CB[['cpoe']].to_numpy().flatten()
c = CB[['epa']].to_numpy().flatten()
d = CB[['overall_grade']].to_numpy().flatten()
anorm = ((a - np.min(a)) / (np.max(a) - np.min(a)) * 19)
bnorm = ((b - np.min(b)) / (np.max(b) - np.min(b)) * 19)
cnorm = ((c - np.min(c)) / (np.max(c) - np.min(c)) * 19)
dnorm = ((d - np.min(d)) / (np.max(d) - np.min(d)) * 19)
rank = np.arange(1, len(anorm)+1, 1)
anorm = np.array(anorm, dtype='int')
bnorm = np.array(bnorm, dtype='int')
cnorm = np.array(cnorm, dtype='int')
dnorm = 19-np.array(dnorm, dtype='int')


fig = go.Figure(data=[go.Table(
  columnwidth = [5,20],
  header=dict(
    values=['<b>Rank</b>', '<b>Player</b>', '<b>Average CPOA</b>', '<b>Average CPOE</b>', '<b>Average EPA</b>', '<b>Grade</b>'],
    line_color='LightSteelBlue', fill_color='LightSteelBlue',
    align='center',font=dict(color='black', size=12)
  ),
  cells=dict(
    values=[rank, names, np.round(a,3), np.round(b,3), np.round(c,3), np.round(d,3)],
    fill_color=[np.array(colors)[dnorm], np.array(colors)[dnorm], np.array(colors)[anorm], np.array(colors)[bnorm], np.array(colors)[cnorm], np.array(colors)[dnorm]],
    line_color=[np.array(colors)[dnorm], np.array(colors)[dnorm], np.array(colors)[anorm], np.array(colors)[bnorm], np.array(colors)[cnorm], np.array(colors)[dnorm]],
    align='center', font=dict(color=['black', colors2, 'black', 'black', 'black', 'black'], size=12)
    ))
])

fig.update_layout(
    autosize=False,
    width=800,
    height=640,
    margin=dict(
        l=20,
        r=20,
        b=20,
        t=50,
        pad=20
    ),
    paper_bgcolor="LightSteelBlue",
)

fig.update_layout(
    title={
        'text': "Top Nickelbacks of 2018",
        'yanchor': 'top',
        'x' : 0.5,
        'y' : 0.99},
    titlefont={
        'size': 32,
        'color' : 'black'
    })

fig.show()

**Figure 6** : Comprehensive grades for nickelbacks in 2018.  Minimum 200 snaps in coverage as primary defender.  

To separate nickelbacks from cornerbacks I simply thresholded the average AYCP at 0.59 (determined emperically).  This method works because nickelbacks tend to play closer to the quarterback and line of scrimmage, therefore their AYCP is higher compared to outside corners.  

Desmond King, who was third among nickelbacks in this grading system, was given AP All Pro honors for his great season for the Chargers in 2018

In [None]:
S = average_grades.loc[(average_grades.incoverage >= 100)]
criteria = ((S.position == "S") | (S.position == "SS") | (S.position == "FS") | (S.position == "DB"))
S = S.loc[criteria][['cpoa', 'cpoa_targ', 'cpoe', 'epa', 'lockdown', 'blown', 'overall_grade']]

playernames = S.index.values
names = []
colors2 = []
for p in playernames:
    id, name = p
    names.append(name)
    if name == "Jamal Adams" or name == "Eddie Jackson" or name == "Derwin James" or name == "Harrison Smith":
        colors2.append('yellow')
    else:
        colors2.append('black')

colors = n_colors('rgb(108, 165, 212)', 'rgb(182, 96, 96)', 20, colortype='rgb')
a = S[['cpoa_targ']].to_numpy().flatten()
b = S[['cpoe']].to_numpy().flatten()
c = S[['epa']].to_numpy().flatten()
d = S[['overall_grade']].to_numpy().flatten()
anorm = ((a - np.min(a)) / (np.max(a) - np.min(a)) * 19)
bnorm = ((b - np.min(b)) / (np.max(b) - np.min(b)) * 19)
cnorm = ((c - np.min(c)) / (np.max(c) - np.min(c)) * 19)
dnorm = ((d - np.min(d)) / (np.max(d) - np.min(d)) * 19)
anorm = np.array(anorm, dtype='int')
bnorm = np.array(bnorm, dtype='int')
cnorm = np.array(cnorm, dtype='int')
dnorm = 19-np.array(dnorm, dtype='int')
rank = np.arange(1, len(anorm)+1, 1)

fig = go.Figure(data=[go.Table(
  columnwidth = [5,20],
  header=dict(
    values=['<b>Rank</b>', '<b>Player</b>', '<b>Average CPOA</b>', '<b>Average CPOE</b>', '<b>Average EPA</b>', '<b>Grade</b>'],
    line_color="LightSteelBlue", fill_color="LightSteelBlue",
    align='center',font=dict(color='black', size=12)
  ),
  cells=dict(
    values=[rank, names, np.round(a,3), np.round(b,3), np.round(c,3), np.round(d,3)],
    fill_color=[np.array(colors)[dnorm], np.array(colors)[dnorm], np.array(colors)[anorm], np.array(colors)[bnorm], np.array(colors)[cnorm], np.array(colors)[dnorm]],
    line_color=[np.array(colors)[dnorm], np.array(colors)[dnorm], np.array(colors)[anorm], np.array(colors)[bnorm], np.array(colors)[cnorm], np.array(colors)[dnorm]],
    align='center', font=dict(color=['black', colors2, 'black', 'black', 'black', 'black'], size=12)
    ))
])

fig.update_layout(
    autosize=False,
    width=800,
    height=1340,
    margin=dict(
        l=20,
        r=20,
        b=20,
        t=50,
        pad=20
    ),
    paper_bgcolor="LightSteelBlue",
)

fig.update_layout(
    title={
        'text': "Top Safeties of 2018",
        'yanchor': 'top',
        'x' : 0.5,
        'y' : 0.99},
    titlefont={
        'size': 32,
        'color' : 'black'
    })

fig.show()

**Figure 7** : Comprehensive grades for safeties in 2018.  Minimum 100 snaps in coverage as primary defender.  

Once again, 3 of the top 8 safeties using this grading method were AP All Pro selections.  Safeties are primary defenders in coverage far less often compared to cornerbacks, so I had to lower the mimimum snap threshold to 100.  This lower sample size makes this method a little less effective for grading safeties compared to grading cornerbacks.

## Final thoughts
As with any model, there is going to be some noise and variance.  However, any model that we use to measure player performance confirm our priors.  Using this grading method, most of the top graded defenders have a good reputation and are paid accordingly, and most of the All Pro players have high grades. This gives me confidence that this approach is viable for grading pass coverage.

Although the purpose of this project was to grade defensive coverage, this same model can be applied to gain other interesting insight.  For example, it can be used to determine whether the QB is making the correct read, or how much risk he is willing to take.  We can also gain insight about how open receivers are getting in their routes.  However, these questions were beyond the scope of this project. Maybe this can be explored in future work.


## Tools:
Python | Pandas | Keras | Tensorflow | Matplotlib | Plotly

