# Day 50

I'm adding onto my work from Day 46 and adjusting my query to pull data from as far back as 1999. I want to see who are the historical leaders for each condition:
- Teams that led at halftime and lost
- Teams that led at the end of the 3rd Quarter and lost
- Teams that trailed at halftime and won
- Teams that trailed at the end of the 3rd Quarter and won


The first half lead to disappointments, the last half lead to anxious fans.

In [47]:
import pandas as pd
import numpy as np
import sqlite3
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_palette('deep')

# Create database connection
conn = sqlite3.connect('../../data/db/database.db')

## Query the Data

In [48]:
query = """
WITH data AS (
    -- Get the score at the end of each quarter
    SELECT
        game_id,
        season,
        week,
        home_team,
        away_team,
        total_home_score,
        total_away_score,
        away_score,
        home_score,
        desc
    FROM pbp
    WHERE
        week <= 14 
        --season = 2022
        AND desc IN ('END QUARTER 1', 'END QUARTER 2', 'END QUARTER 3', 'END GAME')
), 
-- Stack the dataset to make it easier to work with
stacked AS (
    WITH home_team AS (
        SELECT 
            game_id,
            season,
            week,
            home_team AS team,
            total_home_score AS total_score,
            total_away_score AS total_opp_score,
            home_score AS score,
            away_score AS opp_score,
            desc,
            ROW_NUMBER() OVER(PARTITION BY game_id, home_team) AS quarter
        FROM data
    ), away_team AS (
        SELECT 
            game_id,
            season,
            week,
            away_team AS team,
            total_away_score AS total_score,
            total_home_score AS total_opp_score,
            away_score AS score,
            home_score AS opp_score,
            desc,
            ROW_NUMBER() OVER(PARTITION BY game_id, away_team) AS quarter
        FROM data
    )
    SELECT *
    FROM home_team
    UNION
    SELECT *
    FROM away_team
    ORDER BY game_id, team, quarter
)
SELECT
    *,
    /*
    Get the amount of points scored in the quarter
    If the result is NULL from the LAG function,
    COALESCE() will return the total_score value for the quarter
    */
    COALESCE(total_score - LAG(total_score, 1) OVER(PARTITION BY game_id, team ORDER BY quarter), total_score) AS points_scored,
    CASE
        WHEN score > opp_score THEN 1
        WHEN score < opp_score THEN 0
        ELSE NULL
    END AS win_loss,
    CASE
        WHEN total_score > total_opp_score THEN 1
        WHEN total_score < total_opp_score THEN 0
        ELSE NULL
    END AS lead_trail
FROM stacked
-- GROUP BY season
ORDER BY game_id, team
"""

df = pd.read_sql(query, conn)
df.head(15)

Unnamed: 0,game_id,season,week,team,total_score,total_opp_score,score,opp_score,desc,quarter,points_scored,win_loss,lead_trail
0,1999_01_ARI_PHI,1999,1,ARI,0.0,21.0,25,24,END QUARTER 1,1,0.0,1.0,0.0
1,1999_01_ARI_PHI,1999,1,ARI,6.0,24.0,25,24,END QUARTER 2,2,6.0,1.0,0.0
2,1999_01_ARI_PHI,1999,1,ARI,12.0,24.0,25,24,END QUARTER 3,3,6.0,1.0,0.0
3,1999_01_ARI_PHI,1999,1,ARI,25.0,24.0,25,24,END GAME,4,13.0,1.0,1.0
4,1999_01_ARI_PHI,1999,1,PHI,21.0,0.0,24,25,END QUARTER 1,1,21.0,0.0,1.0
5,1999_01_ARI_PHI,1999,1,PHI,24.0,6.0,24,25,END QUARTER 2,2,3.0,0.0,1.0
6,1999_01_ARI_PHI,1999,1,PHI,24.0,12.0,24,25,END QUARTER 3,3,0.0,0.0,1.0
7,1999_01_ARI_PHI,1999,1,PHI,24.0,25.0,24,25,END GAME,4,0.0,0.0,0.0
8,1999_01_BUF_IND,1999,1,BUF,0.0,7.0,14,31,END QUARTER 1,1,0.0,0.0,0.0
9,1999_01_BUF_IND,1999,1,BUF,6.0,14.0,14,31,END QUARTER 2,2,6.0,0.0,0.0


This query produces a table at the game, team, quarter level. From here I can aggregate aggregate by quarter or by half. I'll need to create a flag that tells me if a team is winning at half time and if team won game.

In [49]:
# Get the number of times a team has led at halftime and lost the game
df.query("desc == 'END QUARTER 2' & lead_trail == 1 & win_loss == 0")\
    .groupby(['season', 'team'])['game_id']\
    .count()\
    .sort_values(ascending=False)[:10]
    

season  team
2003    ATL     6
2017    IND     6
2013    HOU     5
2006    CAR     5
2016    LAC     5
2020    CIN     5
2007    WAS     5
2022    DEN     5
2020    LAC     5
2006    LV      5
Name: game_id, dtype: int64

The Falcons in 2023 and Colts in 2017 are tied for teams with the most of these losses since 1999.

In [50]:
df.query("desc == 'END QUARTER 2' & lead_trail == 1 & win_loss == 0 & team == 'ATL' & season == 2003")

Unnamed: 0,game_id,season,week,team,total_score,total_opp_score,score,opp_score,desc,quarter,points_scored,win_loss,lead_trail
6739,2003_02_WAS_ATL,2003,2,ATL,24.0,17.0,31,33,END QUARTER 2,2,21.0,0.0,1.0
7025,2003_05_MIN_ATL,2003,5,ATL,20.0,12.0,26,39,END QUARTER 2,2,14.0,0.0,1.0
7487,2003_09_PHI_ATL,2003,9,ATL,13.0,10.0,16,23,END QUARTER 2,2,13.0,0.0,1.0
7647,2003_11_ATL_NO,2003,11,ATL,20.0,3.0,20,23,END QUARTER 2,2,10.0,0.0,1.0
7879,2003_12_TEN_ATL,2003,12,ATL,21.0,14.0,31,38,END QUARTER 2,2,0.0,0.0,1.0
7903,2003_13_ATL_HOU,2003,13,ATL,7.0,3.0,13,17,END QUARTER 2,2,7.0,0.0,1.0


In [51]:
df.query("desc == 'END QUARTER 2' & lead_trail == 1 & win_loss == 0 & team == 'IND' & season == 2017")

Unnamed: 0,game_id,season,week,team,total_score,total_opp_score,score,opp_score,desc,quarter,points_scored,win_loss,lead_trail
29879,2017_02_ARI_IND,2017,2,IND,10.0,3.0,13,16,END QUARTER 2,2,0.0,0.0,1.0
30171,2017_04_IND_SEA,2017,4,IND,15.0,10.0,18,46,END QUARTER 2,2,13.0,0.0,1.0
30403,2017_06_IND_TEN,2017,6,IND,13.0,9.0,22,36,END QUARTER 2,2,10.0,0.0,1.0
30655,2017_08_IND_CIN,2017,8,IND,13.0,10.0,23,24,END QUARTER 2,2,13.0,0.0,1.0
30907,2017_10_PIT_IND,2017,10,IND,10.0,3.0,17,20,END QUARTER 2,2,10.0,0.0,1.0
31155,2017_12_TEN_IND,2017,12,IND,13.0,6.0,16,20,END QUARTER 2,2,13.0,0.0,1.0


Both of these teams had one huge blowout but they kept it pretty close all the way to the end.

Let's find the opposite – the number of times a team has *trailed* at halftime and ended up winning the game.

In [52]:
df.query("desc == 'END QUARTER 2' & lead_trail == 0 & win_loss == 1")\
    .groupby(['season', 'team'])['game_id']\
    .count()\
    .sort_values(ascending=False)[:10]

season  team
2011    ARI     6
2019    SEA     6
2005    JAX     5
2008    IND     5
2012    IND     5
2011    CIN     4
        DEN     4
2022    KC      4
2001    CHI     4
2013    SEA     4
Name: game_id, dtype: int64

The Cardinals in 2011 and Seahawks in 2019 are the top teams here.

In [53]:
df.query("desc == 'END QUARTER 2' & lead_trail == 0 & win_loss == 1 & team == 'ARI' & season == 2011")

Unnamed: 0,game_id,season,week,team,total_score,total_opp_score,score,opp_score,desc,quarter,points_scored,win_loss,lead_trail
19803,2011_01_CAR_ARI,2011,1,ARI,7.0,14.0,28,21,END QUARTER 2,2,0.0,1.0,0.0
20811,2011_09_STL_ARI,2011,9,ARI,3.0,9.0,19,13,END QUARTER 2,2,0.0,1.0,0.0
20827,2011_10_ARI_PHI,2011,10,ARI,7.0,14.0,21,17,END QUARTER 2,2,7.0,1.0,0.0
21067,2011_12_ARI_STL,2011,12,ARI,3.0,10.0,23,20,END QUARTER 2,2,0.0,1.0,0.0
21227,2011_13_DAL_ARI,2011,13,ARI,3.0,10.0,19,13,END QUARTER 2,2,0.0,1.0,0.0
21427,2011_14_SF_ARI,2011,14,ARI,7.0,12.0,21,19,END QUARTER 2,2,7.0,1.0,0.0


In [54]:
df.query("desc == 'END QUARTER 2' & lead_trail == 0 & win_loss == 1 & team == 'SEA' & season == 2019")

Unnamed: 0,game_id,season,week,team,total_score,total_opp_score,score,opp_score,desc,quarter,points_scored,win_loss,lead_trail
33101,2019_01_CIN_SEA,2019,1,SEA,14.0,17.0,21,20,END QUARTER 2,2,14.0,1.0,0.0
33309,2019_02_SEA_PIT,2019,2,SEA,7.0,10.0,28,26,END QUARTER 2,2,7.0,1.0,0.0
33779,2019_06_SEA_CLE,2019,6,SEA,18.0,20.0,32,28,END QUARTER 2,2,12.0,1.0,0.0
34127,2019_09_TB_SEA,2019,9,SEA,13.0,21.0,40,34,END QUARTER 2,2,6.0,1.0,0.0
34247,2019_10_SEA_SF,2019,10,SEA,7.0,10.0,27,24,END QUARTER 2,2,7.0,1.0,0.0
34531,2019_13_MIN_SEA,2019,13,SEA,10.0,17.0,37,30,END QUARTER 2,2,3.0,1.0,0.0


Both teams kept it pretty close.

Lastly, I'll look to see teams that were leading at the end of the 3rd quarter and managed to lose the game. Not good

In [55]:
df.query("desc == 'END QUARTER 3' & lead_trail == 1 & win_loss == 0")\
    .groupby(['season', 'team'])['game_id']\
    .count()\
    .sort_values(ascending=False)[:10]

season  team
2017    IND     5
2011    PHI     5
1999    LV      4
2013    DET     4
2009    WAS     4
2020    LAC     4
2001    DET     4
2012    LAC     4
2011    MIA     4
        MIN     4
Name: game_id, dtype: int64

The 2017 Colts end up on the top of this list too. They ended up 4-12 that year.

Can't finish without checking the opposite...the teams that were losing at at the end of the 3rd and came back to win the game.

In [56]:
df.query("desc == 'END QUARTER 3' & lead_trail == 0 & win_loss == 1")\
    .groupby(['season', 'team'])['game_id']\
    .count()\
    .sort_values(ascending=False)[:10]

season  team
2011    ARI     5
2006    SEA     4
2021    LAC     4
2000    NYJ     4
2010    ATL     4
2001    CHI     4
2005    DAL     4
2011    CIN     4
        DAL     4
        DEN     4
Name: game_id, dtype: int64

The 2011 Cardinals are on top of this one too. They ended up winning 8 games that season. So 5 of their 8 wins came in the 4th quarter! Interesting.