In [1]:
import requests
from bs4 import BeautifulSoup 

import pandas as pd
import numpy as np
import re

In [2]:
headers = {'User-Agent': 
           'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'}

In [3]:
team_data = pd.DataFrame()
years = np.arange(2013, 2022)

for year in years:
    year_page = f"https://www.footballdb.com/stats/teamstat.html?group=O&cat=T&yr={year}&lg=NFL"
    year_page_tree = requests.get(year_page, headers=headers)
    year_page_soup = BeautifulSoup(year_page_tree.content, 'html.parser')
    
    year_team_data = year_page_soup.find_all("td")
    
    year_team_list = []
    year_run_list = []
    year_pass_list = []
    year_total_list = []
    year_ppg_list = []
    
    for i in range(0, 32):
        team_name_str = year_team_data[0 + i*10].text.split()
        if len(team_name_str) == 2:
            team_name = re.findall('[A-Z][^A-Z]*', team_name_str[1])[0]
            year_team_list.append(team_name)
        else: 
            team_name = re.findall('[A-Z][^A-Z]*', team_name_str[2])[0]
            year_team_list.append(team_name)
        
        year_run_list.append(year_team_data[5 + i*10].text)
        year_pass_list.append(year_team_data[7 + i*10].text)
        year_total_list.append(year_team_data[9 + i*10].text)
        year_ppg_list.append(year_team_data[3 + i*10].text)
        
    year_df = pd.DataFrame({"Team": year_team_list, 
                            "PassYPG": year_pass_list,
                            "RunYPG": year_run_list,
                            "TotalYPG": year_total_list,
                            "PPG": year_ppg_list,
                            "Year": year})
    
    team_data = pd.concat([team_data, year_df], ignore_index=True)

In [4]:
team_data["Team"].unique()

array(['Broncos', 'Eagles', 'Packers', 'Saints', 'Chargers', 'Lions',
       'Patriots', 'Bears', 'Redskins', 'Bengals', 'Texans', 'Cardinals',
       'Vikings', 'Falcons', 'Colts', 'Cowboys', 'Browns', 'Seahawks',
       'Bills', 'Steelers', 'Chiefs', 'Titans', 'Raiders', 'San', 'Jets',
       'Panthers', 'Dolphins', 'Giants', 'Ravens', 'Rams', 'Jaguars',
       'Buccaneers', 'Team'], dtype=object)

In [6]:
squads = team_data["Team"].replace({"Redskins": "WFT", "San": "49ers", "Team": "WFT"})
team_data["Team"] = squads

In [7]:
team_data["Team"].unique()

array(['Broncos', 'Eagles', 'Packers', 'Saints', 'Chargers', 'Lions',
       'Patriots', 'Bears', 'WFT', 'Bengals', 'Texans', 'Cardinals',
       'Vikings', 'Falcons', 'Colts', 'Cowboys', 'Browns', 'Seahawks',
       'Bills', 'Steelers', 'Chiefs', 'Titans', 'Raiders', '49ers',
       'Jets', 'Panthers', 'Dolphins', 'Giants', 'Ravens', 'Rams',
       'Jaguars', 'Buccaneers'], dtype=object)

In [8]:
team_data.to_csv("team_data.csv", encoding='utf-8', index=False)