In [None]:
"""
Script to fetch NFL player names from pro-football-reference.com
and store them in a list.
"""

from bs4 import BeautifulSoup as soup
import requests
import time

def get_html(url):
    response = requests.get(url)
    response.raise_for_status()  # Raise an error for bad responses
    return response.text

def extract_names(html, names):
    page_soup = soup(html, 'html.parser')
    div_players = page_soup.find('div', {'id': 'div_players'})
    if not div_players:
        return names

    player_links = div_players.find_all('a', href=True)
    for link in player_links:
        name = link.text.strip()
        # check that name nonempty
        if not name:
            continue

        # Check if link contains a <b> or <strong> child (i.e. bold)
        # or if link’s parent is bold, etc.
        bold_child = link.find(['b', 'strong'])
        parent_bold = link.parent and link.parent.name in ('b', 'strong')

        if bold_child or parent_bold:
            if name not in names:
                names.append(name)
    return names

In [13]:
alphabet_capitalized = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

base_url = 'https://www.pro-football-reference.com/players/'
names = []

for letter in alphabet_capitalized:
    url = f"{base_url}{letter}/"
    print(f"Fetching links from: {url}")
    print("Getting letter: ", letter)
    html = get_html(url)
    extract_names(html, names)
    time.sleep(4)  # Be polite and avoid hammering the server

with open('nfl_players.txt', 'w') as f:
    for name in sorted(names):
        f.write(name + '\n')


Fetching links from: https://www.pro-football-reference.com/players/A/
Getting letter:  A
Fetching links from: https://www.pro-football-reference.com/players/B/
Getting letter:  B
Fetching links from: https://www.pro-football-reference.com/players/C/
Getting letter:  C
Fetching links from: https://www.pro-football-reference.com/players/D/
Getting letter:  D
Fetching links from: https://www.pro-football-reference.com/players/E/
Getting letter:  E
Fetching links from: https://www.pro-football-reference.com/players/F/
Getting letter:  F
Fetching links from: https://www.pro-football-reference.com/players/G/
Getting letter:  G
Fetching links from: https://www.pro-football-reference.com/players/H/
Getting letter:  H
Fetching links from: https://www.pro-football-reference.com/players/I/
Getting letter:  I
Fetching links from: https://www.pro-football-reference.com/players/J/
Getting letter:  J
Fetching links from: https://www.pro-football-reference.com/players/K/
Getting letter:  K
Fetching l