In [1]:
from collections import defaultdict

import numpy as np
import pandas as pd

from extract_drives import team_drives_pipeline

In [34]:
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
teams = [
    'PHI', 'ATL', 'BUF', 'BAL', 'CLE', 'PIT', 'IND', 'CIN', 'MIA',
    'TEN', 'SF', 'MIN', 'HOU', 'NE', 'TB', 'NO', 'NYG', 'JAX', 'KC',
    'LAC', 'ARI', 'WAS', 'CAR', 'DAL', 'SEA', 'DEN', 'CHI', 'GB',
    'DET', 'NYJ', 'LA', 'OAK', 'JAC', 'SD', 'STL'
]

In [3]:
def score_team(team, game_file):
    team_drives = team_drives_pipeline(
        game_file=game_file, team=team, side='o',
        url='http://www.nfl.com/liveupdate/game-center/'
    )
    df = pd.DataFrame(team_drives)
    df['drive_id'] = df.index
    df['points'] = 0
    df.loc[df['result'] == 'Touchdown', 'points'] = 7
    df.loc[df['result'] == 'Field Goal', 'points'] = 3
    df['total_yards'] = df['penalty_yards'] + df['yards_gained']
    lower = np.arange(0, 100, 25)
    upper = np.arange(25, 125, 25)
    bins_list = list(zip(lower, upper))
    bins = pd.IntervalIndex.from_tuples(bins_list)
    df['yard_line_bin'] = pd.cut(df['start_yard_line'], bins)
    df['yard_line_bin'] = df['yard_line_bin'].map(lambda x: '%s-%s' % (x.left, x.right))
    agg = df.groupby('yard_line_bin').agg(
        {'points': 'mean', 'drive_id': 'count', 'total_yards': 'median'}
    )
    agg.rename({'points': 'mean_points', 'drive_id': 'drive_count', 'total_yards': 'avg_yards'}, 
               axis=1, inplace=True)
    return agg

In [4]:
team_summaries = defaultdict(dict)

for season in range(2009, 2019):
    for team in teams:
        try:
            team_summaries[season][team] = score_team(
                team, game_file='./data/%i_games_dict.json' % season
            )
        except:
            print(season, team)

2009 JAX
2009 LAC
2009 LA
2010 JAX
2010 LAC
2010 LA
2011 JAX
2011 LAC
2011 LA
2012 JAX
2012 LAC
2012 LA
2013 JAX
2013 LAC
2013 LA
2014 JAX
2014 LAC
2014 LA
2015 JAX
2015 LAC
2015 LA
2016 LAC
2016 STL
2017 JAC
2017 SD
2017 STL
2018 JAC
2018 SD
2018 STL


In [26]:
team_summaries[2011]['GB']

Unnamed: 0_level_0,mean_points,drive_count,avg_yards
yard_line_bin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0-25,2.5,100,36.0
25-50,2.983607,61,49.0
50-75,3.5,26,26.5
75-100,2.5,4,1.5


In [25]:
team_summaries[2016]['ATL']

Unnamed: 0_level_0,mean_points,drive_count,avg_yards
yard_line_bin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0-25,2.870968,124,46.5
25-50,3.604651,43,49.0
50-75,3.0,17,9.0
75-100,3.75,8,8.0


In [6]:
team_summaries[2018]['NO']

Unnamed: 0_level_0,mean_points,drive_count,avg_yards
yard_line_bin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0-25,2.49505,101,33.0
25-50,3.145455,55,36.0
50-75,2.4,15,21.0
75-100,6.0,8,16.5


In [54]:
team_summaries[2018]['KC']

Unnamed: 0_level_0,mean_points,drive_count,avg_yards
yard_line_bin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0-25,2.772727,110,47.5
25-50,3.632653,49,51.0
50-75,2.928571,14,9.0
75-100,5.25,8,6.5


In [55]:
team_summaries[2013]['DEN']

Unnamed: 0_level_0,mean_points,drive_count,avg_yards
yard_line_bin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0-25,2.25,140,35.5
25-50,3.015873,63,38.0
50-75,4.611111,18,30.0
75-100,5.125,8,17.5


In [15]:
team_summaries.keys()

dict_keys([2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 'LA'])

In [22]:
max_points = 0

avg_team_score = []
avg_team_yds = []

for season in team_summaries.keys():
    season_dict = team_summaries[season]
    for team, summary in season_dict.items():
        avg_points = summary.loc[summary.index == '0-25', 'mean_points'].values[0]
        avg_yds = summary.loc[summary.index == '0-25', 'avg_yards'].values[0]
        avg_team_score.append((season, team, avg_points))
        avg_team_yds.append((season, team, avg_yds))

In [23]:
sorted(avg_team_score, key=lambda x: x[2])[::-1]

[(2016, 'ATL', 2.870967741935484),
 (2018, 'KC', 2.772727272727273),
 (2011, 'NO', 2.633587786259542),
 (2011, 'GB', 2.5),
 (2018, 'NO', 2.495049504950495),
 (2010, 'NE', 2.4831460674157304),
 (2011, 'NE', 2.479338842975207),
 (2016, 'NO', 2.40625),
 (2017, 'NE', 2.4044117647058822),
 (2009, 'IND', 2.319672131147541),
 (2016, 'GB', 2.3089430894308944),
 (2016, 'NE', 2.282051282051282),
 (2016, 'DAL', 2.2549019607843137),
 (2013, 'DEN', 2.25),
 (2012, 'NE', 2.2338709677419355),
 (2009, 'MIN', 2.2298850574712645),
 (2009, 'NO', 2.202020202020202),
 (2018, 'NE', 2.168),
 (2018, 'PIT', 2.1551724137931036),
 (2012, 'NO', 2.1484375),
 (2014, 'GB', 2.1454545454545455),
 (2014, 'NE', 2.1363636363636362),
 (2018, 'ATL', 2.1315789473684212),
 (2016, 'IND', 2.125),
 (2012, 'GB', 2.1192660550458715),
 (2017, 'NO', 2.1083333333333334),
 (2017, 'PHI', 2.0977443609022557),
 (2018, 'LAC', 2.09375),
 (2011, 'SD', 2.0792079207920793),
 (2009, 'SD', 2.0625),
 (2012, 'SF', 2.0350877192982457),
 (2016, 'SD

In [24]:
sorted(avg_team_yds, key=lambda x: x[2])[::-1]

[(2011, 'NO', 52),
 (2011, 'NE', 48),
 (2018, 'KC', 47.5),
 (2016, 'ATL', 46.5),
 (2011, 'SD', 45.0),
 (2013, 'SD', 43.0),
 (2017, 'PIT', 42),
 (2018, 'NE', 41),
 (2016, 'NO', 40.5),
 (2012, 'NE', 40.5),
 (2016, 'NE', 40.0),
 (2013, 'GB', 38),
 (2009, 'NO', 38.0),
 (2017, 'NE', 37.5),
 (2018, 'LA', 37.0),
 (2016, 'GB', 37.0),
 (2016, 'WAS', 37.0),
 (2012, 'SF', 37),
 (2018, 'ATL', 36.5),
 (2011, 'GB', 36.0),
 (2009, 'NE', 36.0),
 (2009, 'MIN', 36),
 (2013, 'DEN', 35.5),
 (2017, 'CAR', 35.0),
 (2017, 'KC', 35.0),
 (2017, 'PHI', 35),
 (2014, 'PIT', 35),
 (2009, 'SD', 35.0),
 (2017, 'ATL', 34.5),
 (2017, 'MIN', 34),
 (2016, 'DET', 34),
 (2016, 'DAL', 34),
 (2012, 'NYG', 34.0),
 (2017, 'NO', 33.5),
 (2018, 'NO', 33.0),
 (2018, 'BAL', 33.0),
 (2017, 'TB', 33),
 (2015, 'PIT', 33.0),
 (2015, 'ATL', 33.0),
 (2012, 'ATL', 33),
 (2011, 'CAR', 33),
 (2014, 'DET', 32.5),
 (2018, 'DAL', 32.0),
 (2017, 'SF', 32.0),
 (2016, 'IND', 32.0),
 (2014, 'GB', 32.0),
 (2012, 'IND', 32),
 (2015, 'CHI', 31.5),


In [27]:
kc_drives = team_drives_pipeline(
    game_file='./data/%i_games_dict.json' % 2018, team='KC', side='o',
    url='http://www.nfl.com/liveupdate/game-center/'
)

In [78]:
def format_team_drives(team, game_file):
    team_drives = team_drives_pipeline(
        game_file=game_file, team=team, side='o',
        url='http://www.nfl.com/liveupdate/game-center/'
    )
    df = pd.DataFrame(team_drives)
    df['drive_id'] = df.index
    df['points'] = 0
    df.loc[df['result'] == 'Touchdown', 'points'] = 7
    df.loc[df['result'] == 'Field Goal', 'points'] = 3
    df['total_yards'] = df['penalty_yards'] + df['yards_gained']
    lower = np.arange(0, 100, 10)
    upper = np.arange(10, 110, 10)
    bins_list = list(zip(lower, upper))
    bins = pd.IntervalIndex.from_tuples(bins_list)
    df['yard_line_bin'] = pd.cut(df['start_yard_line'], bins)
    df['yard_line_bin'] = df['yard_line_bin'].map(lambda x: '%s-%s' % (x.left, x.right))
    df['is_touchdown'] = 0
    df.loc[df['result'] == 'Touchdown', 'is_touchdown'] = 1
    return df

In [79]:
kc = format_team_drives('KC', game_file='./data/%i_games_dict.json' % 2018)

kc.head()

Unnamed: 0,drive_time,game_id,n_plays,offensive_team,opponent,penalty_yards,result,start_quarter,start_time,start_yard_line,team,yards_gained,drive_id,points,total_yards,yard_line_bin,is_touchdown
0,2:41,2018090908,7,KC,LAC,0,Touchdown,1,09:20,25.0,KC,75,0,7,75,20-30,1
1,1:01,2018090908,4,KC,LAC,0,Punt,1,05:03,12.0,KC,4,1,0,4,10-20,0
2,1:23,2018090908,7,KC,LAC,0,Punt,1,00:21,25.0,KC,30,2,0,30,20-30,0
3,5:47,2018090908,13,KC,LAC,0,Field Goal,2,07:47,25.0,KC,47,3,3,47,20-30,0
4,0:51,2018090908,8,KC,LAC,-5,End of Half,2,00:51,10.0,KC,47,4,0,42,0-10,0


In [93]:
den = format_team_drives('DEN', game_file='./data/%i_games_dict.json' % 2013)

den.head()

Unnamed: 0,drive_time,game_id,n_plays,offensive_team,opponent,penalty_yards,result,start_quarter,start_time,start_yard_line,team,yards_gained,drive_id,points,total_yards,yard_line_bin,is_touchdown
0,2:01,2013090500,7,DEN,BAL,0,Punt,1,13:10,23.0,DEN,18,0,0,18,20-30,0
1,3:32,2013090500,9,DEN,BAL,0,Punt,1,07:11,20.0,DEN,41,1,0,41,10-20,0
2,1:48,2013090500,7,DEN,BAL,5,Punt,2,14:45,29.0,DEN,8,2,0,13,20-30,0
3,0:05,2013090500,2,DEN,BAL,0,Touchdown,2,11:40,76.0,DEN,24,3,7,24,70-80,1
4,2:29,2013090500,9,DEN,BAL,-10,Touchdown,2,08:03,20.0,DEN,90,4,7,80,10-20,1


In [80]:
atl = format_team_drives('ATL', game_file='./data/%i_games_dict.json' % 2016)

atl.head()

Unnamed: 0,drive_time,game_id,n_plays,offensive_team,opponent,penalty_yards,result,start_quarter,start_time,start_yard_line,team,yards_gained,drive_id,points,total_yards,yard_line_bin,is_touchdown
0,2:06,2016091100,5,ATL,TB,0,Punt,1,15:00,19.0,ATL,9,0,0,9,10-20,0
1,2:44,2016091100,6,ATL,TB,0,Field Goal,1,09:52,25.0,ATL,59,1,3,59,20-30,0
2,1:16,2016091100,4,ATL,TB,0,Punt,1,05:34,36.0,ATL,6,2,0,6,30-40,0
3,2:20,2016091100,7,ATL,TB,-9,Touchdown,1,02:31,87.0,ATL,22,3,7,13,80-90,1
4,3:31,2016091100,8,ATL,TB,0,Punt,2,10:25,25.0,ATL,31,4,0,31,20-30,0


In [81]:
kc['total_yards'].describe()

count    183.000000
mean      38.928962
std       32.341590
min      -15.000000
25%        5.000000
50%       35.000000
75%       73.000000
max       99.000000
Name: total_yards, dtype: float64

In [82]:
atl['total_yards'].describe()

count    193.000000
mean      39.834197
std       32.368057
min      -21.000000
25%        9.000000
50%       39.000000
75%       74.000000
max       99.000000
Name: total_yards, dtype: float64

In [83]:
kc['start_yard_line'].describe([.1, .2, .3, .4])

count    181.000000
mean      30.088398
std       18.604746
min        1.000000
10%       11.000000
20%       20.000000
30%       23.000000
40%       25.000000
50%       25.000000
max       98.000000
Name: start_yard_line, dtype: float64

In [84]:
atl['start_yard_line'].describe([.1, .2, .3, .4])

count    192.000000
mean      29.348958
std       18.717746
min        1.000000
10%        9.100000
20%       19.000000
30%       21.000000
40%       25.000000
50%       25.000000
max       97.000000
Name: start_yard_line, dtype: float64

In [85]:
np.log(1.1)

0.09531017980432493

In [86]:
np.log(1.9)

0.6418538861723947

In [87]:
np.log(1.5)

0.4054651081081644

### Combine all drives, weight by np.log(100 - start_yard_line)

Score all defenses this way, as well, to get an offenses average opponent strength.

In [88]:
kc['is_touchdown'].mean()

0.3879781420765027

In [91]:
atl['is_touchdown'].mean()

0.3626943005181347

In [89]:
kc.groupby('yard_line_bin')['is_touchdown'].agg(['count', 'mean'])

Unnamed: 0_level_0,count,mean
yard_line_bin,Unnamed: 1_level_1,Unnamed: 2_level_1
0-10,17,0.411765
10-20,31,0.258065
20-30,77,0.402597
30-40,22,0.5
40-50,12,0.25
50-60,6,0.166667
60-70,7,0.428571
70-80,5,0.8
80-90,1,1.0
90-100,3,0.666667


In [90]:
atl.groupby('yard_line_bin')['is_touchdown'].agg(['count', 'mean'])

Unnamed: 0_level_0,count,mean
yard_line_bin,Unnamed: 1_level_1,Unnamed: 2_level_1
0-10,23,0.304348
10-20,31,0.354839
20-30,83,0.39759
30-40,17,0.235294
40-50,13,0.461538
50-60,12,0.416667
60-70,5,0.2
70-80,1,0.0
80-90,5,0.4
90-100,2,0.5


In [95]:
den.groupby('yard_line_bin')['is_touchdown'].agg(['count', 'mean'])

Unnamed: 0_level_0,count,mean
yard_line_bin,Unnamed: 1_level_1,Unnamed: 2_level_1
0-10,21,0.190476
10-20,101,0.29703
20-30,33,0.363636
30-40,28,0.25
40-50,20,0.4
50-60,10,0.5
60-70,5,0.6
70-80,9,0.666667
80-90,2,1.0
90-100,0,


In [97]:
np.log(1.8)

0.5877866649021191

In [99]:
.58 / .40

1.4499999999999997

In [100]:
.5 / .3

1.6666666666666667