# Day 20

For each game, get the touchdowns (pass, rush, rec, special teams) and then see if any of those are significant different than the mean


I've heard analysts talk about the 2022 NFL Season being boring and/or low scoring. One metric we can look at is win margin, which is calculated as winning team's points - losing team's points. I want to see how the average win margin of games through Week 9 stacks up against previous seasons. If there is a significant difference in close games, maybe that can help to build a case that this NFL season isn't the most entertaining from an offensive standpoint.

In [1]:
import pandas as pd
import sqlite3
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy import stats
import pingouin

# Create database connection
conn = sqlite3.connect('../../data/db/database.db')

In [3]:
pd.read_sql("""
SELECT game_id, season, week, home_team, away_team, posteam, defteam, COUNT(*) AS tot_extra_pts 
FROM pbp
WHERE extra_point_attempt = 1 AND extra_point_result = 'good'
GROUP BY game_id, posteam;
""", conn)

Unnamed: 0,game_id,season,week,home_team,away_team,posteam,defteam,tot_extra_pts
0,1999_01_ARI_PHI,1999,1,PHI,ARI,ARI,PHI,1
1,1999_01_ARI_PHI,1999,1,PHI,ARI,PHI,ARI,3
2,1999_01_BUF_IND,1999,1,IND,BUF,IND,BUF,4
3,1999_01_CAR_NO,1999,1,NO,CAR,CAR,NO,1
4,1999_01_CAR_NO,1999,1,NO,CAR,NO,CAR,1
...,...,...,...,...,...,...,...,...
11230,2022_10_MIN_BUF,2022,10,BUF,MIN,MIN,BUF,3
11231,2022_10_NO_PIT,2022,10,PIT,NO,NO,PIT,1
11232,2022_10_NO_PIT,2022,10,PIT,NO,PIT,NO,2
11233,2022_10_SEA_TB,2022,10,TB,SEA,SEA,TB,1


In [2]:
query = """
WITH home_games AS (
    SELECT
        game_id,
        season,
        week,
        home_team AS team,
        home_score AS score
    FROM schedules
    WHERE week <= 9
        AND game_type = 'REG'
), away_games AS (
    SELECT
        game_id,
        season,
        week,
        away_team AS team,
        away_score AS score
    FROM schedules
    WHERE week <= 9
        AND game_type = 'REG'
), stacked AS (
    SELECT *
    FROM home_games
    UNION ALL
    SELECT *
    FROM away_games
), data AS (
    SELECT
        s.game_id,
        s.season,
        s.week,
        s.team,
        s.score,
        weekly_totals.tot_pass_tds,
        weekly_totals.tot_rush_tds,
        weekly_totals.tot_rec_tds, -- Will be equal to pass tds but keeping in just in case
        weekly_totals.tot_st_tds
        --(s.score / weekly_totals.tot_pass_tds + weekly_totals.tot_rush_tds) AS score_off_td_pct
    FROM stacked s
    LEFT JOIN (
        SELECT
            season,
            week,
            recent_team,
            SUM(passing_tds) AS tot_pass_tds,
            SUM(rushing_tds) AS tot_rush_tds,
            SUM(receiving_tds) AS tot_rec_tds,
            SUM(special_teams_tds) AS tot_st_tds
        FROM weekly
        WHERE week <= 9
        GROUP BY season, week, recent_team
    ) AS weekly_totals
        ON weekly_totals.season = s.season
            AND weekly_totals.week = s.week
            AND weekly_totals.recent_team = s.team
)
SELECT *
FROM data
"""

df = pd.read_sql(query, conn)

In [6]:
df

Unnamed: 0,game_id,season,week,team,score,tot_pass_tds,tot_rush_tds,tot_rec_tds,tot_st_tds
0,1999_01_MIN_ATL,1999,1,ATL,14.0,1.0,1.0,1.0,0.0
1,1999_01_KC_CHI,1999,1,CHI,20.0,2.0,0.0,2.0,0.0
2,1999_01_PIT_CLE,1999,1,CLE,0.0,0.0,0.0,0.0,0.0
3,1999_01_OAK_GB,1999,1,GB,28.0,4.0,0.0,4.0,0.0
4,1999_01_BUF_IND,1999,1,IND,31.0,2.0,1.0,2.0,0.0
...,...,...,...,...,...,...,...,...,...
6297,2022_09_MIN_WAS,2022,9,MIN,20.0,2.0,0.0,2.0,0.0
6298,2022_09_SEA_ARI,2022,9,SEA,31.0,2.0,2.0,2.0,0.0
6299,2022_09_LA_TB,2022,9,LA,13.0,1.0,0.0,1.0,0.0
6300,2022_09_TEN_KC,2022,9,TEN,17.0,0.0,2.0,0.0,0.0


### Play by Play Data

In [69]:
import nfl_data_py as nfl

cols = [
    'game_id',
    'season',
    'week',
    'home_team',
    'away_team',
    'posteam',
    'defteam',
    'touchdown',
    'pass_touchdown',
    'rush_touchdown',
    'return_touchdown',
    'extra_point_attempt',
    'extra_point_result',
    'two_point_attempt',
    'field_goal_attempt',
    'field_goal_result',
    'two_point_conv_result',
    'safety',
    'success',
    'td_team',
    'desc']

pbp = nfl.import_pbp_data(range(1999, 2023), cols, downcast=True, cache=False, alt_path=None)

1999 done.
2000 done.
2001 done.
2002 done.
2003 done.
2004 done.
2005 done.
2006 done.
2007 done.
2008 done.
2009 done.
2010 done.
2011 done.
2012 done.
2013 done.
2014 done.
2015 done.
2016 done.
2017 done.
2018 done.
2019 done.
2020 done.
2021 done.
2022 done.
Downcasting floats.


  plays.loc[:, cols] = plays.loc[:, cols].astype(numpy.float32)


In [70]:
pbp = pbp[cols]

In [71]:
pbp.to_sql('pbp', conn, index=False, if_exists='replace')

1124923

In [9]:
# Only keep Weeks 1-9
pbp = pbp[pbp['week'] <= 9]
pbp

Unnamed: 0,game_id,week,home_team,away_team,posteam,defteam,touchdown,pass_touchdown,rush_touchdown,return_touchdown,...,offense_formation,offense_personnel,defenders_in_box,defense_personnel,number_of_pass_rushers,players_on_play,offense_players,defense_players,n_offense,n_defense
0,1999_01_ARI_PHI,1,PHI,ARI,PHI,ARI,0.0,0.0,0.0,0.0,...,,,,,,,,,,
1,1999_01_ARI_PHI,1,PHI,ARI,PHI,ARI,0.0,0.0,0.0,0.0,...,,,,,,,,,,
2,1999_01_ARI_PHI,1,PHI,ARI,PHI,ARI,0.0,0.0,0.0,0.0,...,,,,,,,,,,
3,1999_01_ARI_PHI,1,PHI,ARI,PHI,ARI,0.0,0.0,0.0,0.0,...,,,,,,,,,,
4,1999_01_ARI_PHI,1,PHI,ARI,PHI,ARI,0.0,0.0,0.0,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1122572,2022_09_TEN_KC,9,KC,TEN,TEN,KC,0.0,0.0,0.0,0.0,...,PISTOL,"1 RB, 1 TE, 3 WR",6.0,"3 DL, 3 LB, 5 DB",5.0,45280;52546;35493;38629;43334;47846;54534;5306...,00-0033386;00-0029413;00-0032764;00-0037754;00...,00-0036374;00-0027662;00-0035625;00-0038043;00...,11.0,11.0
1122573,2022_09_TEN_KC,9,KC,TEN,TEN,KC,0.0,0.0,0.0,0.0,...,SHOTGUN,"1 RB, 1 TE, 3 WR",5.0,"3 DL, 2 LB, 6 DB",4.0,52546;54596;35493;38629;47846;54534;53063;5460...,00-0037617;00-0029413;00-0037754;00-0036171;00...,00-0036374;00-0027662;00-0035625;00-0038043;00...,11.0,11.0
1122574,2022_09_TEN_KC,9,KC,TEN,,,0.0,0.0,0.0,0.0,...,,,,,,,,,0.0,0.0
1122575,2022_09_TEN_KC,9,KC,TEN,TEN,KC,0.0,0.0,0.0,0.0,...,EMPTY,"1 RB, 1 TE, 3 WR",4.0,"3 DL, 2 LB, 6 DB",4.0,45280;52546;35493;38629;47846;54534;43334;5306...,00-0033386;00-0029413;00-0037754;00-0032764;00...,00-0036374;00-0027662;00-0035625;00-0038043;00...,11.0,11.0


In [30]:
# Subset columns
pbp = pbp[cols]
pbp.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 564656 entries, 0 to 1122576
Data columns (total 21 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   game_id                564656 non-null  object 
 1   season                 564656 non-null  int64  
 2   week                   564656 non-null  int32  
 3   home_team              564656 non-null  object 
 4   away_team              564656 non-null  object 
 5   posteam                526028 non-null  object 
 6   defteam                526028 non-null  object 
 7   touchdown              538809 non-null  float32
 8   pass_touchdown         538809 non-null  float32
 9   rush_touchdown         538809 non-null  float32
 10  return_touchdown       538809 non-null  float32
 11  extra_point_attempt    538809 non-null  float32
 12  extra_point_result     14504 non-null   object 
 13  two_point_attempt      538809 non-null  float32
 14  field_goal_attempt     538809 non-n

In [31]:
# Drop null posession team or defensive team since these are non-play records (timeouts, end of quarter, etc)
pbp[(pbp['posteam'].isna()) | (pbp['defteam'].isna())]['desc'].value_counts().head(10)

END QUARTER 2                  3139
END GAME                       3139
GAME                           2745
END QUARTER 4                   192
END QUARTER 1                    13
END QUARTER 3                    11
Timeout #3 by IND at 00:03.      10
Timeout at 11:37.                 9
Timeout at 10:13.                 9
Name: desc, dtype: int64

In [32]:
pbp.dropna(subset=['posteam', 'defteam'], inplace=True)
pbp.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 526028 entries, 0 to 1122575
Data columns (total 21 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   game_id                526028 non-null  object 
 1   season                 526028 non-null  int64  
 2   week                   526028 non-null  int32  
 3   home_team              526028 non-null  object 
 4   away_team              526028 non-null  object 
 5   posteam                526028 non-null  object 
 6   defteam                526028 non-null  object 
 7   touchdown              518211 non-null  float32
 8   pass_touchdown         518211 non-null  float32
 9   rush_touchdown         518211 non-null  float32
 10  return_touchdown       518211 non-null  float32
 11  extra_point_attempt    518211 non-null  float32
 12  extra_point_result     14504 non-null   object 
 13  two_point_attempt      518211 non-null  float32
 14  field_goal_attempt     518211 non-n

In [33]:
# Clean fields
pbp['extra_point_fixed'] = pbp['extra_point_result'].apply(lambda x: 1 if x=='good' else 0)
pbp['two_point_conv_fixed'] = pbp['two_point_conv_result'].apply(lambda x: 1 if x=='success' else 0)
pbp['field_goal_fixed'] = pbp['field_goal_result'].apply(lambda x: 1 if x=='made' else 0)

In [57]:
# Clean blanks from posteam

def posteam_fix(row):
    teams = []
    teams.append(row['home_team'])
    teams.append(row['away_team'])

    if row['posteam'] == '':
        teams.remove(row['defteam'])
        return teams[0]
    else:
        return row['posteam']

pbp['posteam_fixed'] = pbp.apply(lambda row: posteam_fix(row), axis=1)

In [58]:
# Aggregate
metrics = [
    'pass_touchdown', 
    'rush_touchdown', 
    'return_touchdown', 
    'extra_point_fixed', 
    'two_point_conv_fixed', 
    'field_goal_fixed',
    'safety']

agg_pbp = pbp.groupby(['game_id', 'season', 'week', 'posteam_fixed'])[metrics].sum().reset_index()

In [59]:
agg_pbp['score'] = (agg_pbp['pass_touchdown'] * 6
                + agg_pbp['rush_touchdown'] * 6
                + agg_pbp['return_touchdown'] * 6
                + agg_pbp['extra_point_fixed'] * 1
                + agg_pbp['two_point_conv_fixed'] * 2
                + agg_pbp['field_goal_fixed'] * 3
                + agg_pbp['safety'] * 2)

In [60]:
# Join datasets
merged = agg_pbp.reset_index(drop=True).merge(
    right=df[['game_id', 'season', 'week', 'team', 'score']].reset_index(drop=True),
    how='left',
    left_on=['game_id', 'posteam_fixed'],
    right_on=['game_id', 'team']
    # on=['game_id', 'posteam']
)
merged.head()

Unnamed: 0,game_id,season_x,week_x,posteam_fixed,pass_touchdown,rush_touchdown,return_touchdown,extra_point_fixed,two_point_conv_fixed,field_goal_fixed,safety,score_x,season_y,week_y,team,score_y
0,1999_01_ARI_PHI,1999,1,ARI,1.0,1.0,0.0,1,0,4,0.0,25.0,1999.0,1.0,ARI,25.0
1,1999_01_ARI_PHI,1999,1,PHI,2.0,1.0,0.0,3,0,1,0.0,24.0,1999.0,1.0,PHI,24.0
2,1999_01_BUF_IND,1999,1,BUF,1.0,0.0,1.0,0,1,2,0.0,20.0,1999.0,1.0,BUF,14.0
3,1999_01_BUF_IND,1999,1,IND,2.0,1.0,0.0,4,0,1,0.0,25.0,1999.0,1.0,IND,31.0
4,1999_01_CAR_NO,1999,1,CAR,1.0,0.0,1.0,1,0,1,0.0,16.0,1999.0,1.0,CAR,10.0


In [61]:
merged.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6296 entries, 0 to 6295
Data columns (total 16 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   game_id               6296 non-null   object 
 1   season_x              6296 non-null   int64  
 2   week_x                6296 non-null   int64  
 3   posteam_fixed         6296 non-null   object 
 4   pass_touchdown        6296 non-null   float32
 5   rush_touchdown        6296 non-null   float32
 6   return_touchdown      6296 non-null   float32
 7   extra_point_fixed     6296 non-null   int64  
 8   two_point_conv_fixed  6296 non-null   int64  
 9   field_goal_fixed      6296 non-null   int64  
 10  safety                6296 non-null   float32
 11  score_x               6296 non-null   float64
 12  season_y              5838 non-null   float64
 13  week_y                5838 non-null   float64
 14  team                  5838 non-null   object 
 15  score_y              

In [63]:
merged[merged['score_x'] != merged['score_y']]

Unnamed: 0,game_id,season_x,week_x,posteam_fixed,pass_touchdown,rush_touchdown,return_touchdown,extra_point_fixed,two_point_conv_fixed,field_goal_fixed,safety,score_x,season_y,week_y,team,score_y
2,1999_01_BUF_IND,1999,1,BUF,1.0,0.0,1.0,0,1,2,0.0,20.0,1999.0,1.0,BUF,14.0
3,1999_01_BUF_IND,1999,1,IND,2.0,1.0,0.0,4,0,1,0.0,25.0,1999.0,1.0,IND,31.0
4,1999_01_CAR_NO,1999,1,CAR,1.0,0.0,1.0,1,0,1,0.0,16.0,1999.0,1.0,CAR,10.0
5,1999_01_CAR_NO,1999,1,NO,1.0,0.0,0.0,1,0,2,0.0,13.0,1999.0,1.0,NO,19.0
6,1999_01_CIN_TEN,1999,1,CIN,2.0,2.0,0.0,1,2,2,1.0,37.0,1999.0,1.0,CIN,35.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6278,2022_09_IND_NE,2022,9,IND,0.0,0.0,1.0,0,0,1,0.0,9.0,2022.0,9.0,IND,3.0
6279,2022_09_IND_NE,2022,9,NE,1.0,0.0,0.0,2,0,4,0.0,20.0,2022.0,9.0,NE,26.0
6287,2022_09_MIA_CHI,2022,9,MIA,3.0,1.0,0.0,5,0,0,0.0,29.0,2022.0,9.0,MIA,35.0
6292,2022_09_SEA_ARI,2022,9,ARI,2.0,0.0,0.0,3,0,0,0.0,15.0,2022.0,9.0,ARI,21.0


In [66]:
df.sort_values(['game_id', 'team']).head(10)

Unnamed: 0,game_id,season,week,team,score,tot_pass_tds,tot_rush_tds,tot_rec_tds,tot_st_tds
3159,1999_01_ARI_PHI,1999,1,ARI,25.0,1.0,1.0,1.0,0.0
8,1999_01_ARI_PHI,1999,1,PHI,24.0,2.0,1.0,2.0,0.0
3161,1999_01_BAL_STL,1999,1,BAL,10.0,,,,
10,1999_01_BAL_STL,1999,1,STL,27.0,,,,
3155,1999_01_BUF_IND,1999,1,BUF,14.0,1.0,0.0,1.0,0.0
4,1999_01_BUF_IND,1999,1,IND,31.0,2.0,1.0,2.0,0.0
3157,1999_01_CAR_NO,1999,1,CAR,10.0,1.0,0.0,1.0,0.0
6,1999_01_CAR_NO,1999,1,NO,19.0,1.0,0.0,1.0,1.0
3163,1999_01_CIN_TEN,1999,1,CIN,35.0,2.0,2.0,2.0,0.0
12,1999_01_CIN_TEN,1999,1,TEN,36.0,3.0,1.0,3.0,0.0


In [68]:
df['team'].unique()

array(['ATL', 'CHI', 'CLE', 'GB', 'IND', 'JAX', 'NO', 'NYJ', 'PHI', 'SEA',
       'STL', 'TB', 'TEN', 'WAS', 'DEN', 'BAL', 'BUF', 'CAR', 'CIN',
       'DET', 'KC', 'MIA', 'MIN', 'NE', 'NYG', 'SF', 'DAL', 'OAK', 'PIT',
       'SD', 'ARI', 'HOU', 'LA', 'LAC', 'LV'], dtype=object)

In [None]:
Go back and look at all processing done to play-by-play and try to figure this out

## Check Work

In [24]:
query = """
WITH offense AS (
    SELECT
        game_id,
        season,
        week,
        home_team,
        away_team,
        posteam,
        SUM(touchdown) AS tot_tds,
        SUM(pass_touchdown) AS tot_pass_tds,
        SUM(rush_touchdown) AS tot_rush_tds,
        SUM(return_touchdown) AS tot_ret_tds
    FROM pbp
    WHERE posteam IS NOT NULL 
        AND posteam <> ""
        AND posteam = td_team
    GROUP BY game_id, posteam), 
extra_pts AS (
    SELECT
        game_id,
        posteam,
        COUNT(*) AS tot_extra_pts
    FROM pbp
    WHERE extra_point_attempt = 1 AND extra_point_result = 'good'
    GROUP BY game_id, posteam),
field_goals AS (
    SELECT 
        game_id,
        posteam,
        COUNT(*) AS tot_field_goals
    FROM pbp
    WHERE field_goal_attempt = 1 AND field_goal_result = 'made'
    GROUP BY game_id, posteam), 
two_pt_convs AS (
    SELECT 
        game_id,
        posteam,
        COUNT(*) AS tot_2pt_conv
    FROM pbp
    WHERE two_point_attempt = 1 AND two_point_conv_result = 'success'
    GROUP BY game_id, posteam),
-- Counts defensive TDs and punt/kickoff return TDs
defense AS (
    SELECT
        game_id,
        td_team AS team,
        COUNT(*) AS tot_def_tds
    FROM pbp
    WHERE td_team = defteam
    GROUP BY game_id, td_team),
safeties AS (
    SELECT
        game_id,
        defteam AS team,
        COUNT(*) AS tot_safeties
    FROM pbp
    WHERE safety = 1
    GROUP BY game_id, defteam),
joined AS (
    SELECT 
        offense.*,
        CASE WHEN tot_extra_pts IS NULL THEN 0
        ELSE tot_extra_pts
        END AS tot_extra_pts,
        CASE WHEN tot_field_goals IS NULL THEN 0
        ELSE tot_field_goals
        END AS tot_field_goals,
        CASE WHEN tot_2pt_conv IS NULL THEN 0
        ELSE tot_2pt_conv
        END AS tot_2pt_conv,
        CASE WHEN tot_def_tds IS NULL THEN 0
        ELSE tot_def_tds
        END AS tot_def_tds,
        CASE WHEN tot_safeties IS NULL THEN 0
        ELSE tot_safeties
        END AS tot_safeties
    FROM offense
    LEFT JOIN extra_pts
        ON extra_pts.game_id = offense.game_id
            AND extra_pts.posteam = offense.posteam
    LEFT JOIN field_goals
        ON field_goals.game_id = offense.game_id
            AND field_goals.posteam = offense.posteam
    LEFT JOIN two_pt_convs
        ON two_pt_convs.game_id = offense.game_id
            AND two_pt_convs.posteam = offense.posteam
    LEFT JOIN defense
        ON defense.game_id = offense.game_id
            AND defense.team = offense.posteam
    LEFT JOIN safeties
        ON safeties.game_id = offense.game_id
            AND safeties.team = offense.posteam
)
SELECT *,
    (tot_pass_tds * 6
    + tot_rush_tds * 6
    + tot_ret_tds * 6
    + tot_extra_pts * 1
    + tot_field_goals * 3
    + tot_2pt_conv * 2
    + tot_def_tds * 6
    + tot_safeties * 2) AS score
FROM joined
"""

df_pbp = pd.read_sql(query, conn)
df_pbp.head()

Unnamed: 0,game_id,season,week,home_team,away_team,posteam,tot_tds,tot_pass_tds,tot_rush_tds,tot_ret_tds,tot_extra_pts,tot_field_goals,tot_2pt_conv,tot_def_tds,tot_safeties,score
0,1999_01_ARI_PHI,1999,1,PHI,ARI,ARI,2.0,1.0,1.0,0.0,1,4,0,0,0,25.0
1,1999_01_ARI_PHI,1999,1,PHI,ARI,PHI,3.0,2.0,1.0,0.0,3,1,0,0,0,24.0
2,1999_01_BUF_IND,1999,1,IND,BUF,BUF,1.0,1.0,0.0,0.0,0,2,1,0,0,14.0
3,1999_01_BUF_IND,1999,1,IND,BUF,IND,3.0,2.0,1.0,0.0,4,1,0,1,0,31.0
4,1999_01_CAR_NO,1999,1,NO,CAR,CAR,1.0,1.0,0.0,0.0,1,1,0,0,0,10.0


In [37]:
query = """
WITH home_games AS (
    SELECT
        game_id,
        season,
        week,
        home_team AS team,
        home_score AS score
    FROM schedules
    --WHERE week <= 9
    --    AND game_type = 'REG'
), away_games AS (
    SELECT
        game_id,
        season,
        week,
        away_team AS team,
        away_score AS score
    FROM schedules
    --WHERE week <= 9
    --    AND game_type = 'REG'
), stacked AS (
    SELECT *
    FROM home_games
    UNION ALL
    SELECT *
    FROM away_games
)
SELECT *
FROM stacked
"""

df_schedules = pd.read_sql(query, conn)
df_schedules.head(20)

Unnamed: 0,game_id,season,week,team,score
0,1999_01_MIN_ATL,1999,1,ATL,14.0
1,1999_01_KC_CHI,1999,1,CHI,20.0
2,1999_01_PIT_CLE,1999,1,CLE,0.0
3,1999_01_OAK_GB,1999,1,GB,28.0
4,1999_01_BUF_IND,1999,1,IND,31.0
5,1999_01_SF_JAX,1999,1,JAX,41.0
6,1999_01_CAR_NO,1999,1,NO,19.0
7,1999_01_NE_NYJ,1999,1,NYJ,28.0
8,1999_01_ARI_PHI,1999,1,PHI,24.0
9,1999_01_DET_SEA,1999,1,SEA,20.0


In [30]:
merged = df_pbp.merge(
    df_schedules,
    'left',
    left_on=['game_id', 'posteam'],
    right_on=['game_id', 'team']
)

merged

Unnamed: 0,game_id,season_x,week_x,home_team,away_team,posteam,tot_tds,tot_pass_tds,tot_rush_tds,tot_ret_tds,tot_extra_pts,tot_field_goals,tot_2pt_conv,tot_def_tds,tot_safeties,score_x,season_y,week_y,team,score_y
0,1999_01_ARI_PHI,1999,1,PHI,ARI,ARI,2.0,1.0,1.0,0.0,1,4,0,0,0,25.0,1999.0,1.0,ARI,25.0
1,1999_01_ARI_PHI,1999,1,PHI,ARI,PHI,3.0,2.0,1.0,0.0,3,1,0,0,0,24.0,1999.0,1.0,PHI,24.0
2,1999_01_BUF_IND,1999,1,IND,BUF,BUF,1.0,1.0,0.0,0.0,0,2,1,0,0,14.0,1999.0,1.0,BUF,14.0
3,1999_01_BUF_IND,1999,1,IND,BUF,IND,3.0,2.0,1.0,0.0,4,1,0,1,0,31.0,1999.0,1.0,IND,31.0
4,1999_01_CAR_NO,1999,1,NO,CAR,CAR,1.0,1.0,0.0,0.0,1,1,0,0,0,10.0,1999.0,1.0,CAR,10.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11508,2022_10_MIN_BUF,2022,10,BUF,MIN,MIN,3.0,1.0,2.0,0.0,3,2,0,1,0,33.0,2022.0,10.0,MIN,33.0
11509,2022_10_NO_PIT,2022,10,PIT,NO,NO,1.0,1.0,0.0,0.0,1,1,0,0,0,10.0,2022.0,10.0,NO,10.0
11510,2022_10_NO_PIT,2022,10,PIT,NO,PIT,2.0,0.0,2.0,0.0,2,2,0,0,0,20.0,2022.0,10.0,PIT,20.0
11511,2022_10_SEA_TB,2022,10,TB,SEA,SEA,2.0,2.0,0.0,0.0,1,1,0,0,0,16.0,2022.0,10.0,SEA,16.0


In [34]:
merged[merged.isnull()]

Unnamed: 0,game_id,season_x,week_x,home_team,away_team,posteam,tot_tds,tot_pass_tds,tot_rush_tds,tot_ret_tds,tot_extra_pts,tot_field_goals,tot_2pt_conv,tot_def_tds,tot_safeties,score_x,season_y,week_y,team,score_y
0,,,,,,,,,,,,,,,,,,,,
1,,,,,,,,,,,,,,,,,,,,
2,,,,,,,,,,,,,,,,,,,,
3,,,,,,,,,,,,,,,,,,,,
4,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11508,,,,,,,,,,,,,,,,,,,,
11509,,,,,,,,,,,,,,,,,,,,
11510,,,,,,,,,,,,,,,,,,,,
11511,,,,,,,,,,,,,,,,,,,,


In [29]:
merged[abs(merged['score_x'] - merged['score_y']) == 2]

Unnamed: 0,game_id,season_x,week_x,home_team,away_team,posteam,tot_tds,tot_pass_tds,tot_rush_tds,tot_ret_tds,tot_extra_pts,tot_field_goals,tot_2pt_conv,tot_def_tds,tot_safeties,score_x,season_y,week_y,team,score_y
96,1999_04_PHI_NYG,1999,4,NYG,PHI,NYG,1.0,1.0,0.0,0.0,1,3,0,0,1,18.0,1999.0,4.0,NYG,16.0
514,2000_03_ATL_CAR,2000,3,CAR,ATL,ATL,1.0,0.0,1.0,0.0,1,2,0,0,0,13.0,2000.0,3.0,ATL,15.0
515,2000_03_ATL_CAR,2000,3,CAR,ATL,CAR,1.0,1.0,0.0,0.0,1,1,0,0,1,12.0,2000.0,3.0,CAR,10.0
857,2000_16_OAK_SEA,2000,16,SEA,LV,SEA,3.0,2.0,1.0,0.0,1,2,0,0,0,25.0,2000.0,16.0,SEA,27.0
1249,2001_14_DAL_SEA,2001,14,SEA,DAL,SEA,2.0,0.0,2.0,0.0,3,2,0,1,0,27.0,2001.0,14.0,SEA,29.0
1918,2003_03_NO_TEN,2003,3,TEN,NO,NO,1.0,1.0,0.0,0.0,1,1,0,0,0,10.0,2003.0,3.0,NO,12.0
1919,2003_03_NO_TEN,2003,3,TEN,NO,TEN,3.0,2.0,1.0,0.0,3,2,0,0,1,29.0,2003.0,3.0,TEN,27.0
4950,2009_08_STL_DET,2009,8,DET,LA,DET,1.0,0.0,1.0,0.0,0,0,1,0,0,8.0,2009.0,8.0,DET,10.0
8039,2015_13_CAR_NO,2015,13,NO,CAR,NO,4.0,3.0,1.0,0.0,4,0,1,1,0,36.0,2015.0,13.0,NO,38.0
8233,2016_02_BAL_CLE,2016,2,CLE,BAL,BAL,2.0,2.0,0.0,0.0,2,3,0,0,0,23.0,2016.0,2.0,BAL,25.0


In [35]:
merged[merged['score_x'] != merged['score_y']]

Unnamed: 0,game_id,season_x,week_x,home_team,away_team,posteam,tot_tds,tot_pass_tds,tot_rush_tds,tot_ret_tds,tot_extra_pts,tot_field_goals,tot_2pt_conv,tot_def_tds,tot_safeties,score_x,season_y,week_y,team,score_y
22,1999_01_OAK_GB,1999,1,GB,LV,LV,3.0,0.0,3.0,0.0,3,1,0,0,0,24.0,,,,
41,1999_02_OAK_MIN,1999,2,MIN,LV,LV,2.0,1.0,1.0,0.0,1,3,0,0,0,22.0,,,,
46,1999_02_SD_CIN,1999,2,CIN,LAC,LAC,2.0,2.0,0.0,0.0,2,4,1,1,0,34.0,,,,
53,1999_03_ATL_STL,1999,3,LA,ATL,LA,5.0,3.0,2.0,0.0,5,0,0,0,0,35.0,,,,
55,1999_03_CHI_OAK,1999,3,LV,CHI,LV,3.0,2.0,1.0,0.0,3,1,0,0,0,24.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10992,2021_11_HOU_TEN,2021,11,TEN,HOU,TEN,2.0,1.0,0.0,0.0,1,0,0,0,0,7.0,2021.0,11.0,TEN,13.0
11032,2021_12_SEA_WAS,2021,12,WAS,SEA,SEA,2.0,2.0,0.0,0.0,1,0,0,0,0,13.0,2021.0,12.0,SEA,15.0
11133,2021_16_JAX_NYJ,2021,16,NYJ,JAX,JAX,2.0,0.0,1.0,0.0,0,3,0,0,0,15.0,2021.0,16.0,JAX,21.0
11151,2021_17_ATL_BUF,2021,17,BUF,ATL,ATL,1.0,0.0,1.0,0.0,1,2,0,0,0,13.0,2021.0,17.0,ATL,15.0


In [21]:
df_schedules[df_schedules['game_id'] == '1999_01_OAK_GB']

Unnamed: 0,game_id,season,week,team,score
3,1999_01_OAK_GB,1999,1,GB,28.0
6412,1999_01_OAK_GB,1999,1,OAK,24.0


In [None]:
df_schedules.

query 