In [1]:
import numpy as np
import pandas as pd

In [2]:
pd.options.display.max_columns = 100
pd.options.display.max_colwidth = 100

In [3]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('seaborn-muted')

In [4]:
df = pd.read_csv('./data/pbp-2018.csv')
df.sort_values(['GameDate', 'GameId', 'Quarter', 'Minute', 'Second'], 
               ascending=[True, True, True, False, False], inplace=True)

df.head()

Unnamed: 0,GameId,GameDate,Quarter,Minute,Second,OffenseTeam,DefenseTeam,Down,ToGo,YardLine,Unnamed: 10,SeriesFirstDown,Unnamed: 12,NextScore,Description,TeamWin,Unnamed: 16,Unnamed: 17,SeasonYear,Yards,Formation,PlayType,IsRush,IsPass,IsIncomplete,IsTouchdown,PassType,IsSack,IsChallenge,IsChallengeReversed,Challenger,IsMeasurement,IsInterception,IsFumble,IsPenalty,IsTwoPointConversion,IsTwoPointConversionSuccessful,RushDirection,YardLineFixed,YardLineDirection,IsPenaltyAccepted,PenaltyTeam,IsNoPlay,PenaltyType,PenaltyYards
981,2018090600,2018-09-06,1,15,0,PHI,ATL,0,0,35,,1,,0,"4-J.ELLIOTT KICKS 65 YARDS FROM PHI 35 TO END ZONE, TOUCHBACK.",0,,,2018,0,UNDER CENTER,KICK OFF,0,0,0,0,,0,0,0,,0,0,0,0,0,0,,35,OWN,0,,0,,0
982,2018090600,2018-09-06,1,15,0,ATL,PHI,1,10,25,,0,,0,"(15:00) PENALTY ON ATL-82-L.PAULSEN, FALSE START, 5 YARDS, ENFORCED AT ATL 25 - NO PLAY.",0,,,2018,0,UNDER CENTER,NO PLAY,0,0,0,0,,0,0,0,,0,0,0,1,0,0,,25,OWN,1,ATL,1,FALSE START,5
983,2018090600,2018-09-06,1,15,0,ATL,PHI,1,15,20,,0,,0,(15:00) 2-M.RYAN PASS SHORT RIGHT TO 11-J.JONES PUSHED OB AT ATL 30 FOR 10 YARDS (27-M.JENKINS).,0,,,2018,10,UNDER CENTER,PASS,0,1,0,0,SHORT RIGHT,0,0,0,,0,0,0,0,0,0,,20,OWN,0,,0,,0
984,2018090600,2018-09-06,1,14,22,ATL,PHI,2,5,30,,1,,0,(14:22) 11-J.JONES LEFT END PUSHED OB AT ATL 41 FOR 11 YARDS (96-D.BARNETT).,0,,,2018,11,UNDER CENTER,RUSH,1,0,0,0,,0,0,0,,0,0,0,0,0,0,LEFT END,30,OWN,0,,0,,0
985,2018090600,2018-09-06,1,13,46,ATL,PHI,1,10,41,,1,,0,(13:46) 24-D.FREEMAN RIGHT END TO PHI 39 FOR 20 YARDS (27-M.JENKINS).,0,,,2018,20,UNDER CENTER,RUSH,1,0,0,0,,0,0,0,,0,0,0,0,0,0,RIGHT END,41,OWN,0,,0,,0


In [5]:
df.columns

Index(['GameId', 'GameDate', 'Quarter', 'Minute', 'Second', 'OffenseTeam',
       'DefenseTeam', 'Down', 'ToGo', 'YardLine', 'Unnamed: 10',
       'SeriesFirstDown', 'Unnamed: 12', 'NextScore', 'Description', 'TeamWin',
       'Unnamed: 16', 'Unnamed: 17', 'SeasonYear', 'Yards', 'Formation',
       'PlayType', 'IsRush', 'IsPass', 'IsIncomplete', 'IsTouchdown',
       'PassType', 'IsSack', 'IsChallenge', 'IsChallengeReversed',
       'Challenger', 'IsMeasurement', 'IsInterception', 'IsFumble',
       'IsPenalty', 'IsTwoPointConversion', 'IsTwoPointConversionSuccessful',
       'RushDirection', 'YardLineFixed', 'YardLineDirection',
       'IsPenaltyAccepted', 'PenaltyTeam', 'IsNoPlay', 'PenaltyType',
       'PenaltyYards'],
      dtype='object')

In [6]:
df['OffenseTeam'].unique()

array(['PHI', 'ATL', nan, 'BUF', 'BAL', 'CLE', 'PIT', 'IND', 'CIN', 'MIA',
       'TEN', 'SF', 'MIN', 'HOU', 'NE', 'TB', 'NO', 'NYG', 'JAX', 'KC',
       'LAC', 'ARI', 'WAS', 'CAR', 'DAL', 'SEA', 'DEN', 'CHI', 'GB',
       'DET', 'NYJ', 'LA', 'OAK'], dtype=object)

In [7]:
df['PlayType'].unique()

array(['KICK OFF', 'NO PLAY', 'PASS', 'RUSH', 'PUNT', 'SCRAMBLE', 'SACK',
       'FIELD GOAL', nan, 'TIMEOUT', 'QB KNEEL', 'EXTRA POINT',
       'TWO-POINT CONVERSION', 'FUMBLES', 'EXCEPTION', 'CLOCK STOP',
       'PENALTY'], dtype=object)

**To do:**
* Script to score games (cols: selected_team, opponent)
* Script to assign drive ID
* Drive efficiency (average points based on field position, field position given on turnovers/punts)
    * AKA what offense is best in terms of drive success in both points scored and expected points given up
* Percentage of drives with big plays, percentage of drives with scores, percentage of scoring drives with big plays
    * Break out by big runs and big passes

In [8]:
import re
player_regex = r'\d+-\w+\.\s*\w+'

In [9]:
df.loc[df['PlayType'] == 'PENALTY', ['Description', 'Yards']].head()

Unnamed: 0,Description,Yards
28726,"(1:08) (RUN FORMATION) PENALTY ON DEN, DELAY OF GAME, DECLINED.",0
39784,"(3:04) (SHOTGUN) PENALTY ON GB, DELAY OF GAME, DECLINED.",0


In [10]:
def clean_exceptions(df):
    """Clean df where PlayType = 'EXCEPTION' to parse out rush attempts incorrectly 
    recorded."""
    # Define RegEx
    yardage_regex = r'FOR\s-*\d+\sYARD'
    nogain_regex = r'FOR\sNO\sGAIN'
    # Init lists to store corrected values.
    yards = []
    plays = []
    # Find rushes incorrectly recorded and correct.
    for description in df.loc[df['PlayType'] == 'EXCEPTION', 'Description'].values:
        yds = re.findall(yardage_regex, description)
        ng = re.findall(nogain_regex, description)
        if yds:
            play_yards = int(yds[0].split(' ')[1])
            play_type = 'RUSH'
        elif ng:
            play_yards = 0
            play_type = 'RUSH'
        else:
            play_yards = 0
            play_type = 'EXCEPTION'
        yards.append(play_yards)
        plays.append(play_type)
    # Set corrected values.
    df.loc[df['PlayType'] == 'EXCEPTION', 'Yards'] = yards
    df.loc[df['PlayType'] == 'EXCEPTION', 'PlayType'] = plays
    return df


def remove_irrelevant_rows(df, rows):
    """Remove playtype rows."""
    df = df.loc[df['IsNoPlay'] == 0]
    df = df.loc[~df['PlayType'].isnull()]
    return df.loc[~df['PlayType'].isin(rows)]


def get_team_data(df, team='KC', remove_penalties=False):
    """
    
    Handle:
        * Remove irrelevant rows
        * Sort data -- also sort on game order
        * DriveID added
        * PlayID added
    """
    df = clean_exceptions(df)
    df = remove_irrelevant_rows(
        df, ['TIMEOUT', 'CLOCK STOP', 'NO PLAY', 'EXCEPTION', 'PENALTY']
    )
    team_df = df.loc[(df['OffenseTeam'] == team) | (df['DefenseTeam'] == team)].copy()
    team_df = sort_team_data(team_df)
    team_df['Team'] = team
    mask = team_df[['OffenseTeam', 'DefenseTeam']].values != team
    team_df['Opponent'] = team_df[['OffenseTeam', 'DefenseTeam']].values[mask]
    return team_df
    

game_order_dict = {
    'KICK OFF': 2, 
    'QB KNEEL': 3, 
    'RUSH': 3, 
    'PASS': 3, 
    'SACK': 3,
    'PUNT': 3, 
    'SCRAMBLE': 3, 
    'FIELD GOAL': 1, 
    'EXTRA POINT': 1,
    'TWO-POINT CONVERSION': 1, 
    'FUMBLES': 3, 
}


def sort_team_data(team_df):
    """Sort the data by play occurence."""
    game_order_dict = {
        'KICK OFF': 2, 'QB KNEEL': 3, 'RUSH': 3, 
        'PASS': 3, 'SACK': 3, 'PUNT': 3, 
        'SCRAMBLE': 3, 'FIELD GOAL': 1, 'EXTRA POINT': 1,
        'TWO-POINT CONVERSION': 1, 'FUMBLES': 3, 
    }
    team_df['ScoreOrder'] = team_df['PlayType'].map(lambda x: game_order_dict.get(x, 0))
    team_df.sort_values(
            ['GameDate', 'Quarter', 'Minute', 'Second', 'ScoreOrder'],
            ascending=[True, True, False, False, True], inplace=True
    )
    
    team_df.reset_index(inplace=True, drop=True)
    team_df['PlayId'] = team_df.index
    return team_df

In [11]:
kc = get_team_data(df, team='KC')

In [12]:
df.loc[(df['Description'].str.contains(r'SAFETY')), 'Description'].values

array(['(12:09) 27-K.HUNT UP THE MIDDLE TACKLED IN END ZONE FOR -1 YARDS, SAFETY (25-A.BURNS, 42-M.BURNETT).',
       '(12:27) 20-I.CROWELL LEFT END TACKLED IN END ZONE FOR -2 YARDS, SAFETY (99-M.DAREUS, 93-C.CAMPBELL).',
       '(:25) (SHOTGUN) 12-A.LUCK FUMBLES (ABORTED) AT IND 9, BALL OUT OF BOUNDS IN END ZONE, SAFETY.',
       '(9:22) 25-P.BARBER LEFT END TACKLED IN END ZONE FOR -1 YARDS, SAFETY (93-T.COLEY).',
       '(13:03) 5-B.PINION PUNT IS BLOCKED BY 58-C.LITTLETON, CENTER-86-K.NELSON, BALL OUT OF BOUNDS IN END ZONE, SAFETY.',
       '(8:13) 6-B.MAYFIELD PASS SHORT LEFT TO 24-N.CHUBB TO CLE 6 FOR 1 YARD (98-V.WILLIAMS). PENALTY ON CLE-69-D.HARRISON, OFFENSIVE HOLDING, 5 YARDS, ENFORCED IN END ZONE, SAFETY - NO PLAY.',
       '(4:17) (SHOTGUN) 3-J.ROSEN PASS INCOMPLETE SHORT MIDDLE [54-C.MARSH]. PENALTY ON ARI-3-J.ROSEN, INTENTIONAL GROUNDING, 8 YARDS, ENFORCED IN END ZONE, SAFETY.',
       '(2:54) 33-A.JONES UP THE MIDDLE TACKLED IN END ZONE FOR -1 YARDS, SAFETY (26-M.BARRON)

### Clean touchdowns
* Was the touchdown reversed? Look for occurences of `TOUCHDOWN` before `REVERSED` and `TOUCHDOWN NULLIFIED`.

In [13]:
def clean_touchdowns(team_df):
    """Mark 0 for IsTouchdown if the touchdown wasn't counted."""
    team_df.loc[
        (team_df['IsTouchdown'] == 1) & 
        (team_df['Description'].str.contains(r'TOUCHDOWN.*REVERSED')), 
        'IsTouchdown'
    ] = 0
    team_df.loc[
        (team_df['IsTouchdown'] == 1) & 
        (team_df['Description'].str.contains('TOUCHDOWN NULLIFIED')), 
        'IsTouchdown'
    ] = 0
    return team_df

In [14]:
def clean_turnovers(team_df):
    """Clean turnovers."""
    team_df['OffenseTeamNextPlay'] = team_df['OffenseTeam'].shift(-1)
    team_df.loc[
        (team_df['IsInterception'] == 1) & 
        (team_df['Description'].str.contains('INTERCEPTED.*REVERSED')) &
        (team_df['OffenseTeam'] == team_df['OffenseTeamNextPlay']),
        'IsInterception'
    ] = 0
    team_df.loc[
        (team_df['IsFumble'] == 1) & 
        (team_df['OffenseTeam'] == team_df['OffenseTeamNextPlay']),
        'IsFumble'
    ] = 0
    return team_df

### Clean scoring
* Handle safeties -- ensure play wasn't nullified or reversed
* Was the field goal made?
* Was the extra point made?

In [15]:
def add_scoring_flags(team_df):
    """Add scoring flags for field goals, extra points, and safeties."""
    team_df['IsFieldGoalSuccessful'] = 0
    team_df.loc[
        (team_df['PlayType'] == 'FIELD GOAL') & 
        (team_df['Description'].str.contains('IS GOOD')), 
        'IsFieldGoalSuccessful'
    ] = 1
    team_df['IsExtraPointSuccessful'] = 0
    team_df.loc[
        (team_df['PlayType'] == 'EXTRA POINT') & 
        (team_df['Description'].str.contains('IS GOOD')), 
        'IsExtraPointSuccessful'
    ] = 1
    team_df['IsSafety'] = 0
    team_df.loc[
        (team_df['Description'].str.contains('SAFETY')) &
        (~team_df['Description'].str.contains('REVERSED')) &
        (~team_df['Description'].str.contains('REVERSED')),
        'IsSafety'
    ] = 1
    return team_df

### Add scores
* Which team scored? 
    * Account for interceptions.
    * Account for fumbles. Check `RECOVERED BY {team}`.
* Scores
    * TDs (O & D)
    * Field Goals
    * Extra Points
    * Two Point Conversions
    * Safeties

In [16]:
def add_touchdowns(team_df):
    """Add team and opponent scores:
        * TDs (Offense & Defense)
    """
    team_df['TeamScore'] = 0
    team_df['OpponentScore'] = 0
    team_df.loc[
        (team_df['IsTouchdown'] == 1) &
        (team_df['OffenseTeam'] == team_df['Team']) &
        (team_df['IsInterception'] == 0) &
        (team_df['IsFumble'] == 0) &
        (team_df['PlayType'] != 'KICK OFF') &
        (team_df['PlayType'] != 'PUNT'), 
        'TeamScore'
    ] = 6
    team_df.loc[
        (team_df['IsTouchdown'] == 1) &
        (team_df['OffenseTeam'] == team_df['Opponent']) &
        (team_df['IsInterception'] == 0) &
        (team_df['IsFumble'] == 0) &
        (team_df['PlayType'] != 'KICK OFF') &
        (team_df['PlayType'] != 'PUNT'), 
        'OpponentScore'
    ] = 6
    team_df.loc[
        (team_df['IsTouchdown'] == 1) &
        (team_df['DefenseTeam'] == team_df['Team']) &
        ((team_df['IsInterception'] == 1) |
        (team_df['IsFumble'] == 1) |
        (team_df['PlayType'] == 'PUNT') |
        (team_df['PlayType'] == 'KICK OFF')), 
        'TeamScore'
    ] = 6
    team_df.loc[
        (team_df['IsTouchdown'] == 1) &
        (team_df['DefenseTeam'] == team_df['Opponent']) &
        ((team_df['IsInterception'] == 1) |
        (team_df['IsFumble'] == 1) |
        (team_df['PlayType'] == 'PUNT') |
        ((team_df['PlayType'] == 'KICK OFF'))), 
        'OpponentScore'
    ] = 6
    return team_df
    
    
    
def add_other_scores(team_df):
    """Add team and opponent scores:
        * Field goals
        * Extra points
        * Two point conversions
        * Safeties
    """
    team_df.loc[
        (team_df['IsExtraPointSuccessful'] == 1) &
        (team_df['OffenseTeam'] == team_df['Team']),
        'TeamScore'
    ] = 1
    team_df.loc[
        (team_df['IsExtraPointSuccessful'] == 1) &
        (team_df['OffenseTeam'] == team_df['Opponent']),
        'OpponentScore'
    ] = 1
    team_df.loc[
        (team_df['IsTwoPointConversionSuccessful'] == 1) &
        (team_df['OffenseTeam'] == team_df['Team']),
        'TeamScore'
    ] = 2
    team_df.loc[
        (team_df['IsTwoPointConversionSuccessful'] == 1) &
        (team_df['OffenseTeam'] == team_df['Opponent']),
        'OpponentScore'
    ] = 2
    team_df.loc[
        (team_df['IsFieldGoalSuccessful'] == 1) &
        (team_df['OffenseTeam'] == team_df['Team']),
        'TeamScore'
    ] = 3
    team_df.loc[
        (team_df['IsFieldGoalSuccessful'] == 1) &
        (team_df['OffenseTeam'] == team_df['Opponent']),
        'OpponentScore'
    ] = 3
    team_df.loc[
        (team_df['IsSafety'] == 1) &
        (~team_df['Description'].str.contains(r'SAFETY.*REVERSED')) &
        (~team_df['Description'].str.contains(r'SAFETY.*NULLIFIED')) &
        (team_df['DefenseTeam'] == team_df['Team']),
        'TeamScore'
    ] = 2
    team_df.loc[
        (team_df['IsSafety'] == 1) &
        (~team_df['Description'].str.contains(r'SAFETY.*REVERSED')) &
        (~team_df['Description'].str.contains(r'SAFETY.*NULLIFIED')) &
        (team_df['DefenseTeam'] == team_df['Opponent']),
        'OpponentScore'
    ] = 2
    return team_df

In [17]:
def scoring_pipeline(team_df):
    scoring_functions = [
        clean_touchdowns, clean_turnovers, add_scoring_flags,
        add_touchdowns, add_other_scores
    ]
    for func in scoring_functions:
        team_df = func(team_df)
    return team_df

In [18]:
kc = scoring_pipeline(kc)

kc.head(20)

Unnamed: 0,GameId,GameDate,Quarter,Minute,Second,OffenseTeam,DefenseTeam,Down,ToGo,YardLine,Unnamed: 10,SeriesFirstDown,Unnamed: 12,NextScore,Description,TeamWin,Unnamed: 16,Unnamed: 17,SeasonYear,Yards,Formation,PlayType,IsRush,IsPass,IsIncomplete,IsTouchdown,PassType,IsSack,IsChallenge,IsChallengeReversed,Challenger,IsMeasurement,IsInterception,IsFumble,IsPenalty,IsTwoPointConversion,IsTwoPointConversionSuccessful,RushDirection,YardLineFixed,YardLineDirection,IsPenaltyAccepted,PenaltyTeam,IsNoPlay,PenaltyType,PenaltyYards,ScoreOrder,PlayId,Team,Opponent,OffenseTeamNextPlay,IsFieldGoalSuccessful,IsExtraPointSuccessful,IsSafety,TeamScore,OpponentScore
0,2018090908,2018-09-09,1,15,0,KC,LAC,0,0,35,,1,,0,"7-H.BUTKER KICKS 65 YARDS FROM KC 35 TO END ZONE, TOUCHBACK.",0,,,2018,0,UNDER CENTER,KICK OFF,0,0,0,0,,0,0,0,,0,0,0,0,0,0,,35,OWN,0,,0,,0,2,0,KC,LAC,LAC,0,0,0,0,0
1,2018090908,2018-09-09,1,15,0,LAC,KC,1,10,25,,0,,0,(15:00) (SHOTGUN) 17-P.RIVERS PASS SHORT MIDDLE TO 13-K.ALLEN TO LAC 33 FOR 8 YARDS (21-E.MURRAY).,0,,,2018,8,SHOTGUN,PASS,0,1,0,0,SHORT MIDDLE,0,0,0,,0,0,0,0,0,0,,25,OWN,0,,0,,0,3,1,KC,LAC,LAC,0,0,0,0,0
2,2018090908,2018-09-09,1,14,25,LAC,KC,2,2,33,,0,,0,"(14:25) (SHOTGUN) 28-M.GORDON UP THE MIDDLE TO LAC 34 FOR 1 YARD (55-D.FORD, 23-K.FULLER).",0,,,2018,1,SHOTGUN,RUSH,1,0,0,0,,0,0,0,,0,0,0,0,0,0,CENTER,33,OWN,0,,0,,0,3,2,KC,LAC,LAC,0,0,0,0,0
3,2018090908,2018-09-09,1,14,3,LAC,KC,3,1,34,,0,,0,(14:03) (SHOTGUN) 28-M.GORDON UP THE MIDDLE TO LAC 34 FOR NO GAIN (59-R.RAGLAND; 38-R.PARKER).,0,,,2018,0,SHOTGUN,RUSH,1,0,0,0,,0,0,0,,0,0,0,0,0,0,CENTER,34,OWN,0,,0,,0,3,3,KC,LAC,LAC,0,0,0,0,0
4,2018090908,2018-09-09,1,13,24,LAC,KC,4,1,34,,0,,0,"(13:24) 8-D.KASER PUNTS 57 YARDS TO KC 9, CENTER-47-M.WINDT. 10-T.HILL FOR 91 YARDS, TOUCHDOWN.",0,,,2018,0,PUNT,PUNT,0,0,0,1,,0,0,0,,0,0,0,0,0,0,,34,OWN,0,,0,,0,3,4,KC,LAC,KC,0,0,0,6,0
5,2018090908,2018-09-09,1,13,3,KC,LAC,0,0,85,,1,,0,"7-H.BUTKER EXTRA POINT IS GOOD, CENTER-41-J.WINCHESTER, HOLDER-2-D.COLQUITT.",0,,,2018,0,UNDER CENTER,EXTRA POINT,0,0,0,0,,0,0,0,,0,0,0,0,0,0,,15,OPP,0,,0,,0,1,5,KC,LAC,KC,0,1,0,1,0
6,2018090908,2018-09-09,1,13,3,KC,LAC,0,0,35,,1,,0,"7-H.BUTKER KICKS 65 YARDS FROM KC 35 TO END ZONE, TOUCHBACK.",0,,,2018,0,UNDER CENTER,KICK OFF,0,0,0,0,,0,0,0,,0,0,0,0,0,0,,35,OWN,0,,0,,0,2,6,KC,LAC,LAC,0,0,0,0,0
7,2018090908,2018-09-09,1,13,3,LAC,KC,1,10,25,,0,,0,(13:03) 30-A.EKELER LEFT END TO LAC 32 FOR 7 YARDS (21-E.MURRAY; 59-R.RAGLAND).,0,,,2018,7,UNDER CENTER,RUSH,1,0,0,0,,0,0,0,,0,0,0,0,0,0,LEFT END,25,OWN,0,,0,,0,3,7,KC,LAC,LAC,0,0,0,0,0
8,2018090908,2018-09-09,1,12,21,LAC,KC,1,10,37,,0,,0,(12:21) 28-M.GORDON UP THE MIDDLE TO LAC 40 FOR 3 YARDS (98-X.WILLIAMS; 59-R.RAGLAND).,0,,,2018,3,UNDER CENTER,RUSH,1,0,0,0,,0,0,0,,0,0,0,0,0,0,CENTER,37,OWN,0,,0,,0,3,8,KC,LAC,LAC,0,0,0,0,0
9,2018090908,2018-09-09,1,11,44,LAC,KC,2,7,40,,1,,0,(11:44) (SHOTGUN) 17-P.RIVERS PASS SHORT RIGHT TO 13-K.ALLEN RAN OB AT LAC 48 FOR 8 YARDS.,0,,,2018,8,SHOTGUN,PASS,0,1,0,0,SHORT RIGHT,0,0,0,,0,0,0,0,0,0,,40,OWN,0,,0,,0,3,9,KC,LAC,LAC,0,0,0,0,0


In [19]:
kc.groupby('GameId')[['TeamScore', 'OpponentScore']].sum()

Unnamed: 0_level_0,TeamScore,OpponentScore
GameId,Unnamed: 1_level_1,Unnamed: 2_level_1
2018090908,38,28
2018091605,42,37
2018092305,38,27
2018100100,27,23
2018100705,30,14
2018101412,40,43
2018102105,45,10
2018102805,30,23
2018110403,37,21
2018111105,26,14


In [20]:
kc.loc[(kc['Opponent'] == 'PIT') & (kc['IsSafety'] > 0), ['Description', 'OpponentScore']].values

array([['(12:09) 27-K.HUNT UP THE MIDDLE TACKLED IN END ZONE FOR -1 YARDS, SAFETY (25-A.BURNS, 42-M.BURNETT).',
        2]], dtype=object)

### Get drives
* Remove KICKOFFS and PUNTS from part of drive -- include for scoring
* max(IsTouchdown) by driveId for td_drive

In [21]:
def add_drive_id(team_df):
    """Add DriveId for drive analysis."""
    for offense, defense, description, penalty, penalty_team

SyntaxError: invalid syntax (<ipython-input-21-22cdbd5031d5>, line 3)