In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
#reusable URLs to help us get the different categories that we want. 

pro_football_reference_main = 'https://www.pro-football-reference.com/years/'
most_recent_season = '2023'

passing_stats = '/passing.htm'
rushing_stats = '/rushing.htm'
receiving_stats = '/receiving.htm'
fantasy_stats = '/fantasy.htm'


def get_stats(url):
    """Downloads the web page and we will return a beautiful soup doc."""
    response = requests.get(url)
        
    # verify that we are receiving the correct response
    
    if response.status_code != 200:
        raise Exception(f'Unable to download page{url}')
        
    # Get the HTML from the page
    page_content = response.text
    
    # create our bs4 doc
    doc = BeautifulSoup(response.text, 'html.parser')
    return doc

In [3]:
qb_stats = get_stats(pro_football_reference_main + most_recent_season + passing_stats)
qb_stats.find('title')


<title>2023 NFL Passing | Pro-Football-Reference.com</title>

In [11]:
def get_all_qbs(qb_stats):
    qbs = qb_stats.find('tbody').find_all('tr')
    # print(qbs)
    all_qbs_data = []
    for qb in qbs:
        
        try:
            all_stats = {
                'name': qb.find('td').find('a').text,
                'team': qb.find(attrs = {'data-stat' : 'pos'}).text,
                'pos' :     qb.find(attrs = {'data-stat' : 'team_name_abbr'}).text,
                'comp_pct': qb.find(attrs = {'data-stat' : 'pass_cmp_pct'}).text,
                'pass_yds': qb.find(attrs = {'data-stat' : 'pass_yds'}).text,
                'pass_td' : qb.find(attrs = {'data-stat' : 'pass_td'}).text,
                'pass_int' : qb.find(attrs = {'data-stat' : 'pass_int'}).text,
                'pass_td_%' : qb.find(attrs = {'data-stat': 'pass_td_pct'}).text,
                'QB_Rating' : qb.find(attrs = {'data-stat' : 'qbr'}).text,
                'Fourth_Qtr_Comebacks': qb.find(attrs = {'data-stat' : 'comebacks'}).text  
        }
        
        except:
            all_qbs_data.append({})
            print('No data here')
        all_qbs_data.append(all_stats)
    return all_qbs_data

In [22]:
get_all_qbs(qb_stats)

No data here


[{'name': 'Tua Tagovailoa',
  'team': 'QB',
  'pos': 'MIA',
  'comp_pct': '69.3',
  'pass_yds': '4624',
  'pass_td': '29',
  'pass_int': '14',
  'pass_td_%': '5.2',
  'QB_Rating': '60.8',
  'Fourth_Qtr_Comebacks': '2'},
 {'name': 'Jared Goff',
  'team': 'QB',
  'pos': 'DET',
  'comp_pct': '67.3',
  'pass_yds': '4575',
  'pass_td': '30',
  'pass_int': '12',
  'pass_td_%': '5.0',
  'QB_Rating': '60.3',
  'Fourth_Qtr_Comebacks': '2'},
 {'name': 'Dak Prescott',
  'team': 'QB',
  'pos': 'DAL',
  'comp_pct': '69.5',
  'pass_yds': '4516',
  'pass_td': '36',
  'pass_int': '9',
  'pass_td_%': '6.1',
  'QB_Rating': '72.7',
  'Fourth_Qtr_Comebacks': '2'},
 {'name': 'Josh Allen',
  'team': 'QB',
  'pos': 'BUF',
  'comp_pct': '66.5',
  'pass_yds': '4306',
  'pass_td': '29',
  'pass_int': '18',
  'pass_td_%': '5.0',
  'QB_Rating': '69.6',
  'Fourth_Qtr_Comebacks': '2'},
 {'name': 'Brock Purdy',
  'team': 'QB',
  'pos': 'SFO',
  'comp_pct': '69.4',
  'pass_yds': '4280',
  'pass_td': '31',
  'pass_int

In [25]:
def write_csv(items, path):
    #open the file in write mode
    print(items)
    with open(path, 'w') as f:
        #return nothing if there's nothing to write
        if len(items) == 0:
            return
        
        #write the headers in the first line of csv
        headers = list(items[0].keys())
        f.write(','.join(headers) +'\n')
        
        #write one item per line from our dictionary
        for item in items:
            values = []
            for header in headers:
                values.append(str(item.get(header, '')))
            f.write(','.join(values) + '\n')

In [14]:
write_csv(get_all_qbs(qb_stats), '../data/2023_qb_stats.csv')

No data here


In [15]:
team_stats = get_stats(pro_football_reference_main + most_recent_season)
team_stats.find('title')


<title>2023 NFL Standings &amp; Team Stats | Pro-Football-Reference.com</title>

In [44]:
def get_all_teams(team_stats):
    afc_teams = team_stats.find("div", {"id": "all_AFC"}).find('tbody').find_all('tr')
    print(afc_teams)

    nfc_teams = team_stats.find("div", {"id": "all_NFC"}).find('tbody').find_all('tr')
    print(nfc_teams)

    all_teams_data = []
    for team in afc_teams:
        try:
            all_stats = {
                'team': team.find('th', attrs={"data-stat": "team"}).find('a').text,
                'wins' :     team.find(attrs = {'data-stat' : 'wins'}).text,
                'win_loss_perc': team.find(attrs = {'data-stat' : 'win_loss_perc'}).text,
                'points': team.find(attrs = {'data-stat' : 'points'}).text,
                'points_opp' : team.find(attrs = {'data-stat' : 'points_opp'}).text,
                'points_diff' : team.find(attrs = {'data-stat' : 'points_diff'}).text,
                'mov' : team.find(attrs = {'data-stat': 'mov'}).text,
                'sos_total' : team.find(attrs = {'data-stat' : 'sos_total'}).text,
                'srs_total': team.find(attrs = {'data-stat' : 'srs_total'}).text ,
                'srs_offense': team.find(attrs = {'data-stat' : 'srs_offense'}).text,
                'srs_defense': team.find(attrs = {'data-stat' : 'srs_defense'}).text 
        }
        
        except:
            all_stats = {
                'team': None,
                'wins' :     None,
                'win_loss_perc': None,
                'points': None,
                'points_opp' : None,
                'points_diff' : None,
                'mov' : None,
                'sos_total' : None,
                'srs_total': None,
                'srs_offense': None,
                'srs_defense': None 
            }
            # all_stats = None
            print('No data here')
        all_teams_data.append(all_stats)
    for team in nfc_teams:
        try:
            all_stats = {
                'team': team.find('th', attrs={"data-stat": "team"}).find('a').text,
                'wins' :     team.find(attrs = {'data-stat' : 'wins'}).text,
                'win_loss_perc': team.find(attrs = {'data-stat' : 'win_loss_perc'}).text,
                'points': team.find(attrs = {'data-stat' : 'points'}).text,
                'points_opp' : team.find(attrs = {'data-stat' : 'points_opp'}).text,
                'points_diff' : team.find(attrs = {'data-stat' : 'points_diff'}).text,
                'mov' : team.find(attrs = {'data-stat': 'mov'}).text,
                'sos_total' : team.find(attrs = {'data-stat' : 'sos_total'}).text,
                'srs_total': team.find(attrs = {'data-stat' : 'srs_total'}).text ,
                'srs_offense': team.find(attrs = {'data-stat' : 'srs_offense'}).text,
                'srs_defense': team.find(attrs = {'data-stat' : 'srs_defense'}).text 
        }
        
        except:
            all_stats = {
                'team': None,
                'wins' :     None,
                'win_loss_perc': None,
                'points': None,
                'points_opp' : None,
                'points_diff' : None,
                'mov' : None,
                'sos_total' : None,
                'srs_total': None,
                'srs_offense': None,
                'srs_defense': None 
            }
            # all_stats = None
            print('No data here')
        all_teams_data.append(all_stats)
    return all_teams_data

In [45]:
get_all_teams(team_stats)

[<tr class="thead onecell"><td class="right left" colspan="12" data-stat="onecell"> AFC East</td></tr>, <tr><th class="left" csk="1" data-stat="team" scope="row"><a href="/teams/buf/2023.htm">Buffalo Bills</a>*</th><td class="right" data-stat="wins">11</td><td class="right" data-stat="losses">6</td><td class="right" data-stat="win_loss_perc">.647</td><td class="right" data-stat="points">451</td><td class="right" data-stat="points_opp">311</td><td class="right" data-stat="points_diff">140</td><td class="right" data-stat="mov">8.2</td><td class="right" data-stat="sos_total">-1.8</td><td class="right" data-stat="srs_total">6.5</td><td class="right" data-stat="srs_offense">4.1</td><td class="right" data-stat="srs_defense">2.3</td></tr>, <tr><th class="left" csk="2" data-stat="team" scope="row"><a href="/teams/mia/2023.htm">Miami Dolphins</a>+</th><td class="right" data-stat="wins">11</td><td class="right" data-stat="losses">6</td><td class="right" data-stat="win_loss_perc">.647</td><td cla

[{'team': None,
  'wins': None,
  'win_loss_perc': None,
  'points': None,
  'points_opp': None,
  'points_diff': None,
  'mov': None,
  'sos_total': None,
  'srs_total': None,
  'srs_offense': None,
  'srs_defense': None},
 {'team': 'Buffalo Bills',
  'wins': '11',
  'win_loss_perc': '.647',
  'points': '451',
  'points_opp': '311',
  'points_diff': '140',
  'mov': '8.2',
  'sos_total': '-1.8',
  'srs_total': '6.5',
  'srs_offense': '4.1',
  'srs_defense': '2.3'},
 {'team': 'Miami Dolphins',
  'wins': '11',
  'win_loss_perc': '.647',
  'points': '496',
  'points_opp': '391',
  'points_diff': '105',
  'mov': '6.2',
  'sos_total': '-1.8',
  'srs_total': '4.4',
  'srs_offense': '7.6',
  'srs_defense': '-3.1'},
 {'team': 'New York Jets',
  'wins': '7',
  'win_loss_perc': '.412',
  'points': '268',
  'points_opp': '355',
  'points_diff': '-87',
  'mov': '-5.1',
  'sos_total': '-0.6',
  'srs_total': '-5.8',
  'srs_offense': '-6.7',
  'srs_defense': '0.9'},
 {'team': 'New England Patriots',


In [38]:
write_csv(get_all_teams(team_stats), '../data/2023_team_stats.csv')

No data here
No data here
No data here
No data here
[{'team': None, 'wins': None, 'win_loss_perc': None, 'points': None, 'points_opp': None, 'points_diff': None, 'mov': None, 'sos_total': None, 'srs_total': None, 'srs_offense': None, 'srs_defense': None}, {'team': 'Buffalo Bills', 'wins': '11', 'win_loss_perc': '.647', 'points': '451', 'points_opp': '311', 'points_diff': '140', 'mov': '8.2', 'sos_total': '-1.8', 'srs_total': '6.5', 'srs_offense': '4.1', 'srs_defense': '2.3'}, {'team': 'Miami Dolphins', 'wins': '11', 'win_loss_perc': '.647', 'points': '496', 'points_opp': '391', 'points_diff': '105', 'mov': '6.2', 'sos_total': '-1.8', 'srs_total': '4.4', 'srs_offense': '7.6', 'srs_defense': '-3.1'}, {'team': 'New York Jets', 'wins': '7', 'win_loss_perc': '.412', 'points': '268', 'points_opp': '355', 'points_diff': '-87', 'mov': '-5.1', 'sos_total': '-0.6', 'srs_total': '-5.8', 'srs_offense': '-6.7', 'srs_defense': '0.9'}, {'team': 'New England Patriots', 'wins': '4', 'win_loss_perc': '.