In [1]:
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import time
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [25]:
pd.set_option('display.max_columns', None)

def clean_stat_types(s):
    return s.lower().replace(' ','-')

side_of_ball = ['offense','defense','special-teams']

offense_stat_types = list(map(clean_stat_types,['Passing','Rushing','Receiving','Scoring','Downs','Year']))
defense_stat_types = list(map(clean_stat_types,['Passing','Rushing','Receiving','Scoring','Tackles','Downs','Fumbles','Interceptions','Year']))
special_teams_stat_types = list(map(clean_stat_types,['Field Goals','Scoring','Kickoffs','Kickoff Returns','Punting','Punt Returns']))

print(side_of_ball)
print(offense_stat_types)
print(defense_stat_types)
print(special_teams_stat_types)

['offense', 'defense', 'special-teams']
['passing', 'rushing', 'receiving', 'scoring', 'downs', 'year']
['passing', 'rushing', 'receiving', 'scoring', 'tackles', 'downs', 'fumbles', 'interceptions', 'year']
['field-goals', 'scoring', 'kickoffs', 'kickoff-returns', 'punting', 'punt-returns']


In [3]:
def get_raw_data(selected_side,stat_type,year):
    url = f'https://www.nfl.com/stats/team-stats/{selected_side}/{stat_type}/{year}/reg/all'
    
    try:
        raw_data = pd.read_html(url)[0]
    except Exception as e:
        print(f"An error occurred: {e}")
#         raw_data = pd.DataFrame()
        raise e
    
    return raw_data

In [45]:
raw_offense_passing = get_raw_data('offense','passing','2023')
time.sleep(1)
raw_offense_rushing = get_raw_data('offense','rushing','2023')
time.sleep(1)
raw_offense_scoring = get_raw_data('offense','scoring','2023')
time.sleep(1)
raw_offense_downs   = get_raw_data('offense','downs','2023')
time.sleep(1)
raw_offense_receiving   = get_raw_data('offense','receiving','2023')

In [47]:
offense_passing_column_names = ['team', 'pass_atts', 'cmps', 'cmp_pct', 'pass_ypa', 'pass_yds', 'pass_tds', 'pass_ints', 'passer_rating', 'pass_first_downs', 'pass_first_down_pct', 'pass_20_plus', 'pass_40_plus', 'pass_long', 'pass_sacks', 'pass_sacks_Y']
raw_offense_passing.columns = offense_passing_column_names

offense_rushing_column_names = ['team', 'rush_atts', 'rush_yds', 'rush_ypa', 'rush_tds', 'rush_20_plus', 'rush_40_plus', 'rush_long', 'rush_first_downs', 'rush_first_down_pct', 'rush_fumbles']
raw_offense_rushing.columns = offense_rushing_column_names

offense_receiving_column_names = ['team', 'rec', 'rec_yds', 'yds_per_rec', 'rec_tds', 'rec_20_plus', 'rec_40_plus', 'rec_long', 'rec_first_downs', 'rec_first_down_pct', 'rec_fumbles']
raw_offense_receiving.columns = offense_receiving_column_names

offense_scoring_column_names = ['team', 'rush_tds_scoring', 'rec_tds_scoring', 'tot_tds', 'two_pt_conv']
raw_offense_scoring.columns = offense_scoring_column_names

offense_downs_column_names = ['team','third_down_atts', 'third_down_md', 'fourth_down_atts', 'fourth_down_md', 'rec_first_downs', 'rec_first_down_pct', 'rush_first_downs', 'rush_first_down_pct', 'scrimmage_plays']
raw_offense_downs.columns = offense_downs_column_names
raw_offense_downs = raw_offense_downs.drop(['rec_first_downs','rec_first_down_pct','rush_first_downs','rush_first_down_pct'],axis=1) # dropped dupe columns

In [53]:
def join_team_offense_data(*df):
    round1 = pd.merge(df[0], df[1], on='team')
    round2 = pd.merge(round1, df[2], on='team')
    round3 = pd.merge(round2, df[3], on='team')
    round4 = pd.merge(round3, df[4], on='team')
    return round4

In [54]:
all_offense_team_data = join_team_offense_data(raw_offense_passing,raw_offense_rushing,raw_offense_receiving,raw_offense_scoring,raw_offense_downs)

In [59]:
all_offense_team_data

Unnamed: 0,team,pass_atts,cmps,cmp_pct,pass_ypa,pass_yds,pass_tds,pass_ints,passer_rating,pass_first_downs,pass_first_down_pct,pass_20_plus,pass_40_plus,pass_long,pass_sacks,pass_sacks_Y,rush_atts,rush_yds,rush_ypa,rush_tds,rush_20_plus,rush_40_plus,rush_long,rush_first_downs,rush_first_down_pct,rush_fumbles,rec,rec_yds,yds_per_rec,rec_tds,rec_20_plus,rec_40_plus,rec_long,rec_first_downs,rec_first_down_pct,rec_fumbles,rush_tds_scoring,rec_tds_scoring,tot_tds,two_pt_conv,third_down_atts,third_down_md,fourth_down_atts,fourth_down_md,scrimmage_plays
0,Commanders Commanders,636,407,64.0,6.6,4174,24,21,81.6,190,29.9,48,5,51T,65,449,359,1592,4.4,14,8,0,29,99,27.6,6,407,4174,10.3,24,48,5,51,190,46.7,5,14,24,39,3,216,77,29,16,1060
1,Chiefs Chiefs,635,421,66.3,6.9,4383,28,17,89.6,216,34.0,52,8,67T,28,195,417,1784,4.3,9,10,1,48T,107,25.7,2,421,4383,10.4,28,52,8,67,216,51.3,7,9,28,39,0,220,96,20,10,1080
2,Chargers Chargers,632,409,64.7,6.8,4312,24,8,91.8,204,32.3,55,7,79,43,355,431,1642,3.8,11,9,3,55T,89,20.6,6,409,4312,10.5,24,55,7,79,204,49.9,8,11,24,36,1,237,91,32,15,1106
3,Vikings Vikings,631,424,67.2,7.4,4700,30,19,92.4,220,34.9,74,7,62T,47,341,393,1553,4.0,7,5,0,31,79,20.1,9,424,4700,11.1,30,74,7,62,220,51.9,4,7,30,39,1,223,85,29,17,1071
4,Browns Browns,624,355,56.9,6.4,4011,24,23,73.7,173,27.7,53,15,75,45,318,518,2017,3.9,15,9,3,69,121,23.4,9,355,4011,11.3,24,53,15,75,173,48.7,5,15,24,42,6,253,80,32,18,1187
5,Jaguars Jaguars,620,412,66.4,7.1,4377,22,14,89.3,199,32.1,51,10,65T,41,251,453,1646,3.6,17,9,1,62T,98,21.6,5,412,4377,10.6,22,51,10,65,199,48.3,8,17,22,41,3,228,87,29,13,1114
6,Bengals Bengals,615,420,68.3,6.9,4257,27,14,93.0,208,33.8,46,10,80,50,362,383,1527,4.0,12,7,1,44,90,23.5,1,420,4257,10.1,27,46,10,80,208,49.5,3,12,27,41,1,218,82,20,9,1048
7,Cowboys Cowboys,614,428,69.7,7.6,4660,36,10,104.6,229,37.3,64,7,92T,40,263,468,1920,4.1,14,12,1,46,113,24.2,4,428,4660,10.9,36,64,7,92,229,53.5,6,14,36,57,4,230,111,23,13,1122
8,Lions Lions,606,408,67.3,7.6,4606,30,12,98.1,228,37.6,70,9,70T,31,205,500,2311,4.6,27,15,3,75,124,24.8,7,408,4606,11.3,30,70,9,70,228,55.9,4,27,30,58,3,224,93,40,21,1137
9,Saints Saints,606,406,67.0,7.0,4225,28,11,94.8,199,32.8,52,11,58T,35,235,480,1742,3.6,13,4,0,29,112,23.3,6,406,4225,10.4,28,52,11,58T,199,49.0,0,13,28,44,4,235,91,19,9,1121
