In [3]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [7]:
def scrape_box_scores(start_year, end_year):
    box_scores = []
    for year in range(start_year, end_year + 1):
        url = f"https://www.pro-football-reference.com/years/{year}/games.htm"
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find all box score links for the given year
        for link in soup.find_all('a', href=True):
            if '/boxscores/' in link['href'] and 'htm' in link['href']:
                game_url = f"https://www.pro-football-reference.com{link['href']}"
                game_date = link.find_previous('th').text.strip() if link.find_previous('th') else "Unknown Date"
                box_scores.append((game_url, game_date))
    return box_scores




In [19]:
def scrape_game_details(game_url):
    response = requests.get(game_url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Collect home and away team information
    game_data = {
        'home_team': None,
        'away_team': None,
        'home_snap_counts': [],
        'away_snap_counts': [],
        'home_receiving_stats': [],
        'away_receiving_stats': [],
        'home_rushing_stats': [],
        'away_rushing_stats': [],
        'home_passing_stats': [],
        'away_passing_stats': []
    }

    # Find team names
    team_names = soup.find_all('strong')
    if len(team_names) >= 2:
        game_data['away_team'] = team_names[0].text.strip()
        game_data['home_team'] = team_names[1].text.strip()

    # Find snap counts
    home_snap_count_table = soup.find('table', {'id': 'home_snap_counts'})
    away_snap_count_table = soup.find('table', {'id': 'away_snap_counts'})
    if home_snap_count_table:
        rows = home_snap_count_table.find_all('tr')[1:]
        for row in rows:
            cells = row.find_all('td')
            if len(cells) > 0:
                team = cells[1].text.strip()
                player = cells[0].text.strip()
                snaps = cells[2].text.strip()
                if team == game_data['home_team']:
                    game_data['home_snap_counts'].append((player, snaps))
                else:
                    game_data['away_snap_counts'].append((player, snaps))

    # Find passing, rushing, and receiving stats
    stats_tables = {'passing': 'passing', 'rushing': 'rushing', 'receiving': 'receiving'}
    for stat_type, table_id in stats_tables.items():
        table = soup.find('table', {'id': table_id})
        if table:
            rows = table.find_all('tr')[1:]
            for row in rows:
                cells = row.find_all('td')
                if len(cells) > 0:
                    player = row.find('th').text.strip()
                    team = cells[0].text.strip()
                    stats = [cell.text.strip() for cell in cells[1:]]
                    if team == game_data['home_team']:
                        game_data[f'home_{stat_type}_stats'].append((player, stats))
                    else:
                        game_data[f'away_{stat_type}_stats'].append((player, stats))

    return game_data

In [8]:
data = scrape_box_scores(2023,2023)

In [26]:
game_data = scrape_game_details(data[0][0])

In [27]:
game_data

{'home_team': 'Detroit Lions',
 'away_team': 'Detroit Lions at Kansas City Chiefs - September 7th, 2023',
 'home_snap_counts': [],
 'away_snap_counts': [],
 'home_receiving_stats': [],
 'away_receiving_stats': [],
 'home_rushing_stats': [],
 'away_rushing_stats': [],
 'home_passing_stats': [],
 'away_passing_stats': []}

In [None]:
# Convert to a DataFrame
df = pd.DataFrame(data, columns=['Player', 'Team', 'Age', ...])  # Use appropriate column names

# Save to a CSV or database
df.to_csv('nfl_player_stats.csv', index=False)