In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 400)

In [2]:
COLORS = {
    'ARI':'#97233F','ATL':'#A71930','BAL':'#241773','BUF':'#00338D','CAR':'#0085CA','CHI':'#00143F',
    'CIN':'#FB4F14','CLE':'#FB4F14','DAL':'#B0B7BC','DEN':'#002244','DET':'#046EB4','GB':'#24423C',
    'HOU':'#C9243F','IND':'#003D79','JAX':'#136677','KC':'#CA2430','LA':'#002147','LAC':'#2072BA',
    'LV':'#C4C9CC','MIA':'#0091A0','MIN':'#4F2E84','NE':'#0A2342','NO':'#A08A58','NYG':'#192E6C',
    'NYJ':'#203731','PHI':'#014A53','PIT':'#FFC20E','SEA':'#7AC142','SF':'#C9243F','TB':'#D40909',
    'TEN':'#4095D1','WAS':'#FFC20F'}

SECONDARY_COLORS = {
    'ARI':'#000000','ATL':'#000000','BAL':'#000000','BUF':'#00338D','CAR':'#101820',
    'CHI':'#00143F','CIN':'#000000','CLE':'#FF3C00','DAL':'#869397','DEN':'#002244',
    'DET':'#B0B7BC','GB':'#FFB612','HOU':'#A71930','IND':'#A2AAAD','JAX':'#006778',
    'KC':'#FFB81C','LA':'#FFA300','LAC':'#FFC20E','LV':'#A5ACAF','MIA':'#0091A0',
    'MIN':'#FFC62F','NE':'#C60C30','NO':'#101820','NYG':'#A71930','NYJ':'#000000',
    'PHI':'#A5ACAF','PIT':'#101820','SEA':'#69BE28','SF':'#B3995D','TB':'#FF7900',
    'TEN':'#C8102E','WAS':'#FFB612'}

In [8]:
range(2000, 2010)[-1]

2009

In [None]:
# 'https://github.com/nflverse/nflverse-data/releases/download/pbp/play_by_play_{year}.csv.gz'.

def get_pbp_data(seasons):
    years = range(seasons)
    data = pd.DataFrame()
    
    for _ in years:  
        i_data = pd.read_csv('https://github.com/nflverse/nflverse-data/releases/download/pbp/play_by_play_' + str(years) + '.csv.gz',
                   compression= 'gzip', low_memory= False)

    data = data.append(i_data, sort=True)
    data.reset_index(drop=True, inplace=True)

#YEARS = range(2018, 2021)
#data = pd.DataFrame()

#for _ in YEARS:  
#    i_data = pd.read_csv('https://github.com/nflverse/nflverse-data/releases/download/pbp/play_by_play_' + str(YEAR) + '.csv.gz', compression= 'gzip', low_memory= False)
#    data = data.append(i_data, sort=True)   
#data.reset_index(drop=True, inplace=True)

In [None]:
data = data.loc[data.season_type=='REG']
data = data.loc[(data.play_type.isin(['no_play', 'pass', 'run'])) & (data.epa.isna() == False)]
data.loc[data['pass'] == 1, 'play_type'] = 'pass'
data.loc[data.rush == 1, 'play_type'] = 'run'
data.reset_index(drop=True, inplace=True)

In [None]:
data.groupby('posteam')[['epa']].mean()
data.groupby('posteam')[['epa']].mean().sort_values('epa', ascending=False)

#Create table
team_epa = data.groupby('posteam')[['epa']].mean()
#Get phi EPA per play
team_epa.loc[team_epa.index == 'PHI']

data.groupby(['receiver','posteam'])[['play_id']].count()
data.groupby(['receiver','posteam'], as_index=False)[['play_id']].count()

data.groupby(['rusher','posteam'], as_index=False).agg({
    'rushing_yards':'mean', 
    'play_id':'count',
    'epa':'sum'})

In [None]:
qbs = data.groupby(['passer','posteam'], as_index=False).agg({
    'qb_epa':'mean',
    'cpoe':'mean',
    'play_id':'count'})

#Filter to players with 200 or more dropbacks
qbs = qbs.loc[qbs.play_id>199]

#Sort in descending order by EPA 
qbs.sort_values('qb_epa', ascending=False, inplace=True)

#Round to two decimal places where appropriate
qbs = qbs.round(2)

#Rename columns
qbs.columns = ['Player','Team','EPA per Dropback','CPOE','Dropbacks']

qbs

In [None]:
rush_epa = data.epa.loc[data.play_type=='run']
pass_epa = data.epa.loc[data.play_type=='pass']

plt.figure(figsize=(12, 8))

#Place a histogram on the figure with the EPA of all pass plays
#Bins are how many groupings or buckets the data will be split into
#Assign a label for the legend and choose a color
plt.hist(pass_epa, bins=25, label='Pass', color='slategrey')

#Place a second histogram this time for rush plays, 
#The alpha < 1 will make this somewhat transparent
plt.hist(rush_epa, bins=25, label='Run', alpha=.7, color='lime')

#Add labels and title
plt.xlabel('Expected Points Added',fontsize=12)
plt.ylabel('Number of Plays',fontsize=12)
plt.title('EPA Distribution Based on Play Type',fontsize=14)

#Add source, the first two numbers are x and y 
#coordinates as a decimal of the whole image
plt.figtext(.8,.04,'Data: nflfastR', fontsize=10)

#Add a legend
plt.legend()

In [None]:
#Create dataframe of just plays in neutral situations
neutral_situation = data.loc[(data.down<3) & (data.half_seconds_remaining>120) &
                             (data.wp>=.2) & (data.wp<=.8)]
                             
#Groupby team game, taking the average of the 'pass' column
#Pass column = 1 when the play call is a pass and 0 when the play call is a run
pass_rates = neutral_situation.groupby(['game_id','posteam'])[['pass']].mean()

#Add a new column to pass rates dataframe for the EPA per dropback
#Filter to pass plays and groupby the same game_id and posteam
pass_rates['epa'] = neutral_situation.loc[neutral_situation['pass']==1].groupby(
    ['game_id','posteam'])[['epa']].mean()

pass_rates.reset_index(inplace=True)

In [None]:
#Create figure and enter in a figsize
plt.figure(figsize=(10,10))

#Make a scatter plot with neutral situation pass rate on the x-axis, EPA per dropback on the y
plt.scatter(pass_rates['pass'], pass_rates.epa, alpha=.7, color='turquoise')

#Create line of best fit
#Linestyle gives a dashed line
plt.plot(np.unique(pass_rates['pass']), np.poly1d(np.polyfit(pass_rates['pass'], pass_rates.epa, 1))(np.unique(x)), 
         color='grey', linestyle='--')

#Add grid lines
plt.grid(zorder=0, alpha=.4)

#Add labels and title
plt.xlabel('Neutral Situation Pass Rate', fontsize=12)
plt.ylabel('EPA per Dropback', fontsize=12)
plt.title('Neutral Situation Pass Rate and EPA per Dropback \n' \
          '1st & 2nd Down, WP between 20-80%, Outside of Two Minute Warnings',fontsize=14)

#Add source, the first two numbers are x and y 
#coordinates as a decimal of the whole image
plt.figtext(.79, .05, 'Data: nflfastR', fontsize=10)

In [None]:
# Getting Team Logos

import os
import urllib.request
from matplotlib.offsetbox import OffsetImage, AnnotationBbox

urls = pd.read_csv('https://raw.githubusercontent.com/statsbylopez/BlogPosts/master/nfl_teamlogos.csv')

for i in range(0,len(urls)):
    urllib.request.urlretrieve(urls['url'].iloc[i], os.getcwd() + '/FOLDER/' + urls['team_code'].iloc[i] + '.png')
    
def getImage(path): 
    return OffsetImage(plt.imread(path), zoom=.5)

logos = os.listdir(os.getcwd() + '/FOLDER')
logo_paths = []

for i in logos:
    logo_paths.append(os.getcwd() + '/FOLDER/' + str(i))

In [None]:
#Filter to pass plays and groupby offensive team
team_epa = data.loc[data['pass']==1].groupby('posteam')[['epa']].mean()
#Do the same but for rushing plays
team_epa['rush_epa'] = data.loc[data.rush==1].groupby('posteam')[['epa']].mean()

#Create a figure with size 12x12
fig, ax = plt.subplots(figsize=(15,15))

#Adding logos to the chart
for x0, y0, path in zip(team_epa.rush_epa, team_epa.epa, logo_paths):
    ab = AnnotationBbox(getImage(path), (x0, y0), frameon=False, fontsize=4)
    ax.add_artist(ab)
    
#Add a grid
ax.grid(zorder=0,alpha=.4)
ax.set_axisbelow(True)
    
#Adding labels and text
ax.set_xlabel('EPA per Rush', fontsize=16)
ax.set_ylabel('EPA per Dropback', fontsize=16)
ax.set_title('Avg. EPA by Team & Play Type - 2020', fontsize=20)
plt.figtext(.81, .07, 'Data: nflfastR', fontsize=12)
plt.show()

In [None]:
neutral_plays = data.loc[(data.home_wp<=.8) & 
                         (data.away_wp<=.8) & 
                         (data.half_seconds_remaining>120) & 
                         (data.down<3)]


neutral_teams = neutral_plays.groupby('posteam')[['pass']].mean()
neutral_teams['color'] = COLORS.values()
neutral_teams['path'] = logo_paths
#Sort highest to lowest so bar chart is left to right - high to low
neutral_teams.sort_values('pass',ascending=False,inplace=True)

In [None]:
fig, ax = plt.subplots(figsize=(30,10))

#Create league average line
ax.axhline(y=neutral_plays['pass'].mean(), linestyle='--', color='black')

#Add team logos
for x0, y0, path in zip(np.arange(0,32), neutral_teams['pass']+.005, neutral_teams['path']):
    ab = AnnotationBbox(getImage(path), (x0, y0), frameon=False, fontsize=4)
    ax.add_artist(ab)

#Add bar chart, x axis is an array from 0-31 (length of 32, one per team)
ax.bar(np.arange(0,32), neutral_teams['pass'], color=neutral_teams.color, width=.5)
    
#Add a grid across the y-axis
ax.grid(zorder=0,alpha=.6,axis='y')
ax.set_axisbelow(True)
ax.set_xticks(np.arange(0,32))
#Add team abbreviations as x tick labels
ax.set_xticklabels(neutral_teams.index,fontsize=16)

#Start y-axis at .3 (30%) to eliminate wasted space
ax.set_ylim(.3,.7)
ax.set_yticks([.3,.4,.5,.6,.7])
ax.set_yticklabels([0.3,0.4,0.5,0.6,0.7],fontsize=16)

ax.set_ylabel('Pass Rate', fontsize=20, labelpad=20)
ax.set_title('Neutral Situation Pass Rates - 2020', fontsize=26, pad=20)
plt.figtext(.845, .04, 'Data: nflfastR', fontsize=14)
plt.text(31, neutral_plays['pass'].mean()+.005, 'NFL Average', fontsize=12)

In [None]:
from adjustText import adjust_text

#Create QBs dataframe with avg epa, avg cpoe, and number of plays
qbs = data.groupby(['passer','posteam'], as_index=False).agg({'qb_epa':'mean',
                                                              'cpoe':'mean',
                                                              'play_id':'count'})

#Set minimum limit of 200 dropbacks
qbs = qbs.loc[qbs.play_id>200]

qbs['color'] = qbs.posteam.map(COLORS)

fig, ax = plt.subplots(figsize=(15,15))

#Create vertical and horizontal lines for averages of each metric
ax.axvline(x=qbs.cpoe.mean(), linestyle='--', alpha=.5, color='black')
ax.axhline(y=qbs.qb_epa.mean(), linestyle='--', alpha=.5, color='black')

#Create scatter plot
#s stands for size, the dot size is proportional to the QBs number of plays
ax.scatter(qbs.cpoe, qbs.qb_epa, 
           s=qbs.play_id, 
           alpha=.7, 
           color=qbs.color)

#Add text to each dot
texts = [plt.text(x0,y0,name,ha='right',va='bottom') for x0,y0,name in zip(
    qbs.cpoe, qbs.qb_epa, qbs.passer)]

adjust_text(texts)

#Add grid
ax.grid(zorder=0,alpha=.4)
ax.set_axisbelow(True)

#Remove top and right boundary lines
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)

#Add title, labels, and source
ax.set_title('CPOE & EPA - 2020',fontsize=20,pad=15)
ax.set_xlabel('Completion % Over Expected (CPOE)',fontsize=16,labelpad=15)
ax.set_ylabel('EPA per Attempt',fontsize=16,labelpad=15)
plt.figtext(.8,.06,'Data: nflfastR',fontsize=12)