In [None]:
from bs4 import BeautifulSoup
import aiohttp
import asyncio
import os
import json
import time
import sys

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..", "src")))
from utils.path import get_project_root
from utils.f1_shared import ssl_context, head, base_url, years, test_function

PROJECT_ROOT = get_project_root()
DATA_DIR = os.path.join(PROJECT_ROOT, "data", "f1_fastest_laps")
CHECKPOINTS_DIR = os.path.join(PROJECT_ROOT, "data", "f1_checkpoints")

In [None]:
async def scrape_fastest_laps(session, year):
    """Scrape fastest lap data for a specific year"""
    url = f"{base_url}/en/results/{year}/fastest-laps"
    
    async with session.get(url, headers=head) as response:
        if response.status != 200:
            print(f"Failed to load {url}. Status: {response.status}")
            return None

        html = await response.text()
        soup = BeautifulSoup(html, 'lxml')
        
        # Find table
        table = soup.find('table', class_='f1-table-with-data')
        
        if not table:
            print(f"No fastest lap data found for {year}")
            return None
            
        # Get headers
        headers = [header.text.strip() for header in table.find('thead').find_all('th')]
        
        # Get rows
        rows = table.find('tbody').find_all('tr')
        data = []
        
        for row in rows:
            cols = row.find_all('td')
            row_data = []
            
            # Extract Grand Prix name
            grand_prix = cols[0].text.strip()
            row_data.append(grand_prix)
            
            # Extract Driver name (handle responsive design spans)
            driver_cell = cols[1]
            first_name_span = driver_cell.select_one('span.max-desktop\\:hidden')
            last_name_span = driver_cell.select_one('span.max-tablet\\:hidden')
            
            if first_name_span and last_name_span:
                first_name = first_name_span.text.strip()
                last_name = last_name_span.text.strip()
                driver_name = f"{first_name} {last_name}"
            else:
                # For older pages without responsive spans
                driver_name = driver_cell.text.strip()
                
            row_data.append(driver_name)
            
            # Extract Car/Team name
            car = cols[2].text.strip()
            row_data.append(car)
            
            # Extract Time
            time = cols[3].text.strip()
            row_data.append(time)
                
            data.append(row_data)
        
        # Create output structure
        output = {
            "headers": headers,
            "data": data
        }
        
        return output

In [None]:
async def collect_fastest_laps_data(start_year=years[0], end_year=years[-1]):
    """Collect fastest lap data for a range of years"""
    connector = aiohttp.TCPConnector(ssl=ssl_context)
    timeout = aiohttp.ClientTimeout(total=60)
    start_time = time.time()

    async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
        # Create directories
        fastest_laps_dir = os.path.join(DATA_DIR, "fastest_laps")
        os.makedirs(fastest_laps_dir, exist_ok=True)
        
        for year in range(start_year, end_year + 1):
            print(f"Fetching fastest lap data for {year}...")
            data = await scrape_fastest_laps(session, year)
            
            if data:
                end_time = time.time()
                total_time = end_time - start_time
                print(f"\nCompleted fastest laps data collection in {total_time:.2f} seconds:")

                # Save to JSON file
                file_path = os.path.join(fastest_laps_dir, f"{year}_fastest_lap.json")
                
                with open(file_path, 'w', encoding='utf-8') as f:
                    json.dump(data, f, indent=2, ensure_ascii=False)
                    
                print(f"Saved fastest lap data for {year} with {len(data['data'])} entries")
            else:
                print(f"No data available for {year}")

In [None]:
if __name__ == "__main__":
    # Collect fastest lap data
    await collect_fastest_laps_data()