# Day 21

Use NFL play-by-play data from 1999 to create a dataset at the game-team level where I can see the score and the number of different types of scoring plays each team had that made up their final score. Ultimately I want to compare the percentage of score by offense/defense to see how teams perform and if there are any outliers at the weekly, team, or season level.

Current challenges:
- Safeties are hard to parse as there are different conditions that can lead to a safety.

Solutions:
- Get the score from another table, join on game_id and team, and just use the non-defense and special teams touchdowns
- Utilize other variables from the play-by-play data to help with data cleaning

In [1]:
import pandas as pd
import sqlite3
import nfl_data_py as nfl

# Create database connection
conn = sqlite3.connect('../../data/db/database.db')

## Scrape Play-by-Play Data

In [2]:
cols = [
    'game_id',
    'season',
    'week',
    'season_type',
    'home_team',
    'away_team',
    'posteam',
    'defteam',
    'touchdown',
    'pass_touchdown',
    'rush_touchdown',
    'return_touchdown',
    'extra_point_attempt',
    'extra_point_result',
    'two_point_attempt',
    'field_goal_attempt',
    'field_goal_result',
    'two_point_conv_result',
    'safety',
    'success',
    'td_team',
    'posteam_score',
    'defteam_score',
    'posteam_score_post',
    'defteam_score_post',
    'desc']

pbp = nfl.import_pbp_data(range(1999, 2023), cols, downcast=True, cache=False, alt_path=None)
pbp.to_sql('pbp', conn, index=False, if_exists='replace')

1999 done.
2000 done.
2001 done.
2002 done.
2003 done.
2004 done.
2005 done.
2006 done.
2007 done.
2008 done.
2009 done.
2010 done.
2011 done.
2012 done.
2013 done.
2014 done.
2015 done.
2016 done.
2017 done.
2018 done.
2019 done.
2020 done.
2021 done.
2022 done.
Downcasting floats.


  plays.loc[:, cols] = plays.loc[:, cols].astype(numpy.float32)


1125100

In [3]:
pbp = pbp[cols]
pbp.to_sql('pbp', conn, index=False, if_exists='replace')

1125100

In [4]:
condition = (pbp['safety'] == 1) & (pbp['posteam_score_post'] != pbp['posteam_score'])

pbp[condition]

Unnamed: 0,game_id,season,week,season_type,home_team,away_team,posteam,defteam,touchdown,pass_touchdown,...,field_goal_result,two_point_conv_result,safety,success,td_team,posteam_score,defteam_score,posteam_score_post,defteam_score_post,desc
9573,1999_04_PHI_NYG,1999,4,REG,NYG,PHI,PHI,NYG,0.0,0.0,...,,,1.0,0.0,,0.0,7.0,2.0,7.0,(6:36) D.Pederson pass intended for T.Small IN...
51571,2000_03_ATL_CAR,2000,3,REG,CAR,ATL,ATL,CAR,0.0,0.0,...,,,1.0,1.0,,13.0,10.0,15.0,10.0,(2:24) J.Anderson left end to CAR 16 for 42 ya...
86331,2000_16_OAK_SEA,2000,16,REG,SEA,LV,SEA,LV,0.0,0.0,...,,,1.0,1.0,,19.0,24.0,21.0,24.0,(2:40) R.Watters left guard to OAK 28 for 53 y...
125436,2001_14_DAL_SEA,2001,14,REG,SEA,DAL,SEA,DAL,0.0,0.0,...,,,1.0,1.0,,10.0,3.0,12.0,3.0,"(4:01) 10-J.Feagles punts 58 yards to DAL 8, C..."
192439,2003_03_NO_TEN,2003,3,REG,TEN,NO,NO,TEN,0.0,0.0,...,,,1.0,1.0,,0.0,3.0,2.0,3.0,(6:18) (Punt formation) 17-M.Berger punts 46 y...
489358,2009_08_STL_DET,2009,8,REG,DET,LA,DET,LA,0.0,0.0,...,,,1.0,0.0,,0.0,3.0,2.0,3.0,(12:17) (Shotgun) 9-M.Stafford pass short midd...
875877,2017_08_LAC_NE,2017,8,REG,NE,LAC,NE,LAC,0.0,0.0,...,,,1.0,1.0,,7.0,7.0,9.0,7.0,"(9:19) 6-R.Allen punts 54 yards to LAC 11, Cen..."
887661,2017_13_DEN_MIA,2017,13,REG,MIA,DEN,MIA,DEN,0.0,0.0,...,,,1.0,1.0,,33.0,9.0,35.0,9.0,(9:03) (Punt formation) 16-M.Haack punts 52 ya...
939302,2018_14_JAX_TEN,2018,14,REG,TEN,JAX,JAX,TEN,0.0,0.0,...,,,1.0,1.0,,0.0,7.0,2.0,7.0,(:59) (Punt formation) 9-L.Cooke punts 47 yard...
1038237,2020_15_KC_NO,2020,15,REG,NO,KC,NO,KC,0.0,0.0,...,,,1.0,1.0,,7.0,14.0,9.0,14.0,"(:20) 6-T.Morstead punts 51 yards to KC 13, Ce..."


## Get Score Summary Table

In [5]:
query = """
WITH offense AS (
    SELECT
        game_id,
        season,
        week,
        home_team,
        away_team,
        posteam,
        SUM(touchdown) AS tot_tds,
        SUM(pass_touchdown) AS tot_pass_tds,
        SUM(rush_touchdown) AS tot_rush_tds,
        SUM(return_touchdown) AS tot_ret_tds
    FROM pbp
    WHERE posteam IS NOT NULL 
        AND posteam <> ""
        AND posteam = td_team
    GROUP BY game_id, posteam), 
extra_pts AS (
    SELECT
        game_id,
        posteam,
        COUNT(*) AS tot_extra_pts
    FROM pbp
    WHERE extra_point_attempt = 1 AND extra_point_result = 'good'
    GROUP BY game_id, posteam),
field_goals AS (
    SELECT 
        game_id,
        posteam,
        COUNT(*) AS tot_field_goals
    FROM pbp
    WHERE field_goal_attempt = 1 AND field_goal_result = 'made'
    GROUP BY game_id, posteam), 
two_pt_convs AS (
    SELECT 
        game_id,
        posteam,
        COUNT(*) AS tot_2pt_conv
    FROM pbp
    WHERE two_point_attempt = 1 AND two_point_conv_result = 'success'
    GROUP BY game_id, posteam),
-- Counts defensive TDs and punt/kickoff return TDs
defense AS (
    SELECT
        game_id,
        td_team AS team,
        COUNT(*) AS tot_def_tds
    FROM pbp
    WHERE td_team = defteam
    GROUP BY game_id, td_team),
safeties AS (
    SELECT
        game_id,
        defteam AS team,
        COUNT(*) AS tot_safeties
    FROM pbp
    WHERE safety = 1
    GROUP BY game_id, defteam),
joined AS (
    SELECT 
        offense.*,
        CASE WHEN tot_extra_pts IS NULL THEN 0
        ELSE tot_extra_pts
        END AS tot_extra_pts,
        CASE WHEN tot_field_goals IS NULL THEN 0
        ELSE tot_field_goals
        END AS tot_field_goals,
        CASE WHEN tot_2pt_conv IS NULL THEN 0
        ELSE tot_2pt_conv
        END AS tot_2pt_conv,
        CASE WHEN tot_def_tds IS NULL THEN 0
        ELSE tot_def_tds
        END AS tot_def_tds,
        CASE WHEN tot_safeties IS NULL THEN 0
        ELSE tot_safeties
        END AS tot_safeties
    FROM offense
    LEFT JOIN extra_pts
        ON extra_pts.game_id = offense.game_id
            AND extra_pts.posteam = offense.posteam
    LEFT JOIN field_goals
        ON field_goals.game_id = offense.game_id
            AND field_goals.posteam = offense.posteam
    LEFT JOIN two_pt_convs
        ON two_pt_convs.game_id = offense.game_id
            AND two_pt_convs.posteam = offense.posteam
    LEFT JOIN defense
        ON defense.game_id = offense.game_id
            AND defense.team = offense.posteam
    LEFT JOIN safeties
        ON safeties.game_id = offense.game_id
            AND safeties.team = offense.posteam
)
SELECT *,
    (tot_pass_tds * 6
    + tot_rush_tds * 6
    + tot_ret_tds * 6
    + tot_extra_pts * 1
    + tot_field_goals * 3
    + tot_2pt_conv * 2
    + tot_def_tds * 6
    + tot_safeties * 2) AS score
FROM joined
"""

df_pbp = pd.read_sql(query, conn)
df_pbp.head(10)

Unnamed: 0,game_id,season,week,home_team,away_team,posteam,tot_tds,tot_pass_tds,tot_rush_tds,tot_ret_tds,tot_extra_pts,tot_field_goals,tot_2pt_conv,tot_def_tds,tot_safeties,score
0,1999_01_ARI_PHI,1999,1,PHI,ARI,ARI,2.0,1.0,1.0,0.0,1,4,0,0,0,25.0
1,1999_01_ARI_PHI,1999,1,PHI,ARI,PHI,3.0,2.0,1.0,0.0,3,1,0,0,0,24.0
2,1999_01_BUF_IND,1999,1,IND,BUF,BUF,1.0,1.0,0.0,0.0,0,2,1,0,0,14.0
3,1999_01_BUF_IND,1999,1,IND,BUF,IND,3.0,2.0,1.0,0.0,4,1,0,1,0,31.0
4,1999_01_CAR_NO,1999,1,NO,CAR,CAR,1.0,1.0,0.0,0.0,1,1,0,0,0,10.0
5,1999_01_CAR_NO,1999,1,NO,CAR,NO,1.0,1.0,0.0,0.0,1,2,0,1,0,19.0
6,1999_01_CIN_TEN,1999,1,TEN,CIN,CIN,4.0,2.0,2.0,0.0,1,2,2,0,0,35.0
7,1999_01_CIN_TEN,1999,1,TEN,CIN,TEN,4.0,3.0,1.0,0.0,4,2,0,0,1,36.0
8,1999_01_DAL_WAS,1999,1,WAS,DAL,DAL,6.0,5.0,1.0,0.0,5,0,0,0,0,41.0
9,1999_01_DAL_WAS,1999,1,WAS,DAL,WAS,4.0,2.0,2.0,0.0,3,2,1,0,0,35.0
