In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import os

def scrape_f1_standings(year):
    url = f"https://www.formula1.com/en/results.html/{year}/drivers.html"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table', class_='resultsarchive-table')
    
    data = []
    if table:
        for row in table.find_all('tr')[1:]:  # Skip the header row
            cols = row.find_all('td')
            if len(cols) > 0:
                position = cols[1].text.strip()
                driver = cols[2].text.strip()
                nationality = cols[3].text.strip()
                car = cols[4].text.strip()
                points = cols[5].text.strip()
                data.append([position, driver, nationality, car, points])
    
    return data

# Create a directory to store the CSV files
if not os.path.exists('f1_standings'):
    os.makedirs('f1_standings')

# Scrape data for years from 1950 to 2023 (inclusive)
for year in range(1950, 2024):  # Changed this line to include 2023
    print(f"Scraping data for {year}...")
    year_data = scrape_f1_standings(year)
    
    if year_data:
        # Create a DataFrame for the year
        df = pd.DataFrame(year_data, columns=['Position', 'Driver', 'Nationality', 'Car', 'Points'])
        
        # Split the 'Driver' column
        df[['First Name', 'Last Name', 'Code']] = df['Driver'].str.split('\n', expand=True)
        
        # Combine 'First Name' and 'Last Name' into 'Full Name'
        df['Full Name'] = df['First Name'] + ' ' + df['Last Name']
        
        # Rearrange the columns
        columns_order = ['Position', 'Full Name', 'Nationality', 'Car', 'Points']
        df = df[columns_order]
        
        # Save to a CSV file
        filename = f'f1_standings/f1_driver_standings_{year}.csv'
        df.to_csv(filename, index=False)
        print(f"Saved data for {year} to {filename}")
    else:
        print(f"No data found for {year}")
    
    # Add a delay to avoid overwhelming the server
    time.sleep(1)

print("Scraping complete!")

Scraping data for 1950...
Saved data for 1950 to f1_standings/f1_driver_standings_1950.csv
Scraping data for 1951...
Saved data for 1951 to f1_standings/f1_driver_standings_1951.csv
Scraping data for 1952...
Saved data for 1952 to f1_standings/f1_driver_standings_1952.csv
Scraping data for 1953...
Saved data for 1953 to f1_standings/f1_driver_standings_1953.csv
Scraping data for 1954...
Saved data for 1954 to f1_standings/f1_driver_standings_1954.csv
Scraping data for 1955...
Saved data for 1955 to f1_standings/f1_driver_standings_1955.csv
Scraping data for 1956...
Saved data for 1956 to f1_standings/f1_driver_standings_1956.csv
Scraping data for 1957...
Saved data for 1957 to f1_standings/f1_driver_standings_1957.csv
Scraping data for 1958...
Saved data for 1958 to f1_standings/f1_driver_standings_1958.csv
Scraping data for 1959...
Saved data for 1959 to f1_standings/f1_driver_standings_1959.csv
Scraping data for 1960...
Saved data for 1960 to f1_standings/f1_driver_standings_1960.csv