In [1]:
# All necessary imports
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
import time
import os
import io

In [10]:
def scrape_player_gamelog(player_id, name):
    """
    Scrapes game logs for a player from Pro Football Reference
    """
    # Construct URL using player ID
    url = f'https://www.pro-football-reference.com/players/{player_id[0]}/{player_id}/gamelog/'
    print(f"Attempting to scrape: {url}")
    
    try:
        # Open URL and create BeautifulSoup object
        html = urlopen(url)
        soup = BeautifulSoup(html, 'html.parser')
        
        # Find the regular season table (it has id='stats')
        table_html = soup.find('table', {'id': 'stats'})
        
        if table_html is None:
            print(f"Could not find stats table for {name}")
            return None
        
        # Convert to DataFrame using StringIO to handle deprecation warning
        df = pd.read_html(io.StringIO(str(table_html)))[0]
        
        # Clean up the DataFrame
        # Remove multi-level column headers if they exist
        if isinstance(df.columns, pd.MultiIndex):
            df.columns = df.columns.get_level_values(-1)
        
        # Drop rows that contain header information
        df = df[df['Rk'] != 'Rk']
        
        # Drop rows where Rk is NaN (usually indicates section breaks)
        df = df.dropna(subset=['Rk'])
        
        print(f"Successfully found game log with {len(df)} rows")
        return df
        
    except Exception as e:
        print(f"Error scraping data for {name}: {str(e)}")
        return None

def process_roster(roster_file, output_folder):
    """
    Process entire roster and save individual player stats
    """
    # Create output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
        print(f"Created output folder: {output_folder}")
    
    # Read roster file
    print(f"Reading roster file: {roster_file}")
    roster = pd.read_csv(roster_file)
    print(f"Found {len(roster)} players in roster")
    
    # Initialize counters
    successful = 0
    failed = 0
    
    # Process each player
    for index, row in roster.iterrows():
        player_id = row['Player-additional']
        name = row['Player']
        
        print(f"\nProcessing player {index + 1} of {len(roster)}: {name}")
        
        # Skip if no player ID
        if pd.isna(player_id):
            print(f"No ID found for {name}, skipping...")
            failed += 1
            continue
        
        # Scrape player's game log
        game_log = scrape_player_gamelog(player_id, name)
        
        if game_log is not None:
            # Clean up filename - remove any characters that might cause issues
            safe_name = "".join(x for x in name if x.isalnum() or x in [' ', '-', '_'])
            filename = os.path.join(output_folder, f"{safe_name}_gamelog.csv")
            
            # Save to CSV
            game_log.to_csv(filename, index=False)
            print(f"Saved game log to: {filename}")
            successful += 1
        else:
            failed += 1
        
        # Add delay to avoid overwhelming the server
        print("Waiting 3 seconds before next player...")
        time.sleep(3)
    
    # Print summary
    print(f"\nProcessing complete!")
    print(f"Successfully processed: {successful} players")
    print(f"Failed to process: {failed} players")

# Example usage
team_name = "Commanders" # Enter Nickname of team (Bears, Eagles, Commanders, etc.)

roster_file = f'Rosters/{team_name}_roster.csv'  # Your roster CSV file
output_folder = f'Player_Logs/{team_name}'  # Folder where individual player data will be saved

process_roster(roster_file, output_folder)

Created output folder: Player_Logs/Commanders
Reading roster file: Rosters/Commanders_roster.csv
Found 60 players in roster

Processing player 1 of 60: Nick Allegretti
Attempting to scrape: https://www.pro-football-reference.com/players/A/AlleNi00/gamelog/
Successfully found game log with 94 rows
Saved game log to: Player_Logs/Commanders\Nick Allegretti_gamelog.csv
Waiting 3 seconds before next player...

Processing player 2 of 60: Dorance Armstrong Jr.
Attempting to scrape: https://www.pro-football-reference.com/players/A/ArmsDo00/gamelog/
Successfully found game log with 110 rows
Saved game log to: Player_Logs/Commanders\Dorance Armstrong Jr_gamelog.csv
Waiting 3 seconds before next player...

Processing player 3 of 60: John Bates
Attempting to scrape: https://www.pro-football-reference.com/players/B/BateJo00/gamelog/
Successfully found game log with 62 rows
Saved game log to: Player_Logs/Commanders\John Bates_gamelog.csv
Waiting 3 seconds before next player...

Processing player 4 o