In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
#reusable URLs to help us get the different categories that we want. 

pro_football_reference_main = 'https://www.pro-football-reference.com/years/'
most_recent_season = '2023'

passing_stats = '/passing.htm'
rushing_stats = '/rushing.htm'
receiving_stats = '/receiving.htm'
fantasy_stats = '/fantasy.htm'


def get_stats(url):
    """Downloads the web page and we will return a beautiful soup doc."""
    response = requests.get(url)
        
    # verify that we are receiving the correct response
    
    if response.status_code != 200:
        raise Exception(f'Unable to download page{url}')
        
    # Get the HTML from the page
    page_content = response.text
    
    # create our bs4 doc
    doc = BeautifulSoup(response.text, 'html.parser')
    return doc

In [3]:
qb_stats = get_stats(pro_football_reference_main + most_recent_season + passing_stats)
qb_stats.find('title')


<title>2023 NFL Passing | Pro-Football-Reference.com</title>

In [7]:
def get_all_qbs(qb_stats):
    qbs = qb_stats.find('tbody').find_all('tr')
    all_qbs_data = []
    for qb in qbs:
        
        try:
            all_stats = {
                'name': qb.find('td').find('a').text,
                'pos' :     qb.find(attrs = {'data-stat' : 'pos'}).text,
                'comp_pct': qb.find(attrs = {'data-stat' : 'pass_cmp_pct'}).text,
                'pass_yds': qb.find(attrs = {'data-stat' : 'pass_yds'}).text,
                'pass_td' : qb.find(attrs = {'data-stat' : 'pass_td'}).text,
                'pass_int' : qb.find(attrs = {'data-stat' : 'pass_int'}).text,
                'pass_td_%' : qb.find(attrs = {'data-stat': 'pass_td_pct'}).text,
                'QB_Rating' : qb.find(attrs = {'data-stat' : 'qbr'}).text,
                'Fourth_Qtr_Comebacks': qb.find(attrs = {'data-stat' : 'comebacks'}).text  
        }
        
        except:
            all_qbs_data.append({})
            print('No data here')
        all_qbs_data.append(all_stats)
    return all_qbs_data

In [8]:
def write_csv(items, path):
    #open the file in write mode
    with open(path, 'w') as f:
        #return nothing if there's nothing to write
        if len(items) == 0:
            return
        
        #write the headers in the first line of csv
        headers = list(items[0].keys())
        f.write(','.join(headers) +'\n')
        
        #write one item per line from our dictionary
        for item in items:
            values = []
            for header in headers:
                values.append(str(item.get(header, '')))
            f.write(','.join(values) + '\n')

In [9]:
write_csv(get_all_qbs(qb_stats), '2023_qb_stats.csv')

No data here


In [10]:
def get_all_teams(team_stats):
    qbs = qb_stats.find('tbody').find_all('tr')
    all_qbs_data = []
    for qb in qbs:
        
        try:
            all_stats = {
                'name': qb.find('td').find('a').text,
                'pos' :     qb.find(attrs = {'data-stat' : 'pos'}).text,
                'comp_pct': qb.find(attrs = {'data-stat' : 'pass_cmp_pct'}).text,
                'pass_yds': qb.find(attrs = {'data-stat' : 'pass_yds'}).text,
                'pass_td' : qb.find(attrs = {'data-stat' : 'pass_td'}).text,
                'pass_int' : qb.find(attrs = {'data-stat' : 'pass_int'}).text,
                'pass_td_%' : qb.find(attrs = {'data-stat': 'pass_td_pct'}).text,
                'QB_Rating' : qb.find(attrs = {'data-stat' : 'qbr'}).text,
                'Fourth_Qtr_Comebacks': qb.find(attrs = {'data-stat' : 'comebacks'}).text  
        }
        
        except:
            print('No data here')
        all_qbs_data.append(all_stats)
    return all_qbs_data

In [10]:
team_stats = get_stats(pro_football_reference_main + most_recent_season)
team_stats.find('title')


<title>2023 NFL Standings &amp; Team Stats | Pro-Football-Reference.com</title>

In [81]:
def get_all_teams(team_stats):
    teams = team_stats.find('tbody').find_all('tr')
    all_teams_data = []
    for team in teams:
        
        try:
            all_stats = {
                'team': team.find('th', attrs={"data-stat": "team"}).find('a').text,
                'wins' :     team.find(attrs = {'data-stat' : 'wins'}).text,
                'win_loss_perc': team.find(attrs = {'data-stat' : 'win_loss_perc'}).text,
                'points': team.find(attrs = {'data-stat' : 'points'}).text,
                'points_opp' : team.find(attrs = {'data-stat' : 'points_opp'}).text,
                'points_diff' : team.find(attrs = {'data-stat' : 'points_diff'}).text,
                'mov' : team.find(attrs = {'data-stat': 'mov'}).text,
                'sos_total' : team.find(attrs = {'data-stat' : 'sos_total'}).text,
                'srs_total': team.find(attrs = {'data-stat' : 'srs_total'}).text ,
                'srs_offense': team.find(attrs = {'data-stat' : 'srs_offense'}).text,
                'srs_defense': team.find(attrs = {'data-stat' : 'srs_defense'}).text 
        }
        
        except:
            all_stats = {}
            print('No data here')
        all_teams_data.append(all_stats)
    return all_teams_data

In [82]:
get_all_teams(team_stats)

No data here
No data here
No data here
No data here


[{},
 {'team': 'Buffalo Bills',
  'wins': '11',
  'win_loss_perc': '.647',
  'points': '451',
  'points_opp': '311',
  'points_diff': '140',
  'mov': '8.2',
  'sos_total': '-1.8',
  'srs_total': '6.5',
  'srs_offense': '4.1',
  'srs_defense': '2.3'},
 {'team': 'Miami Dolphins',
  'wins': '11',
  'win_loss_perc': '.647',
  'points': '496',
  'points_opp': '391',
  'points_diff': '105',
  'mov': '6.2',
  'sos_total': '-1.8',
  'srs_total': '4.4',
  'srs_offense': '7.6',
  'srs_defense': '-3.1'},
 {'team': 'New York Jets',
  'wins': '7',
  'win_loss_perc': '.412',
  'points': '268',
  'points_opp': '355',
  'points_diff': '-87',
  'mov': '-5.1',
  'sos_total': '-0.6',
  'srs_total': '-5.8',
  'srs_offense': '-6.7',
  'srs_defense': '0.9'},
 {'team': 'New England Patriots',
  'wins': '4',
  'win_loss_perc': '.235',
  'points': '236',
  'points_opp': '366',
  'points_diff': '-130',
  'mov': '-7.6',
  'sos_total': '-0.2',
  'srs_total': '-7.9',
  'srs_offense': '-8.5',
  'srs_defense': '0.6'

In [None]:
write_csv(get_all_teams(team_stats), '2023_team_stats.csv')