In [1]:
%pip install selenium pandas webdriver-manager beautifulsoup4

Note: you may need to restart the kernel to use updated packages.




In [2]:
import time
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.select import Select
from webdriver_manager.chrome import ChromeDriverManager

def scrape_racefacer(kart_id, kart_name):
    """
    Scrape RaceFacer data for a specific kart type.
    
    Args:
        kart_id: The value attribute of the kart option (e.g., "1099")
        kart_name: The display name of the kart (e.g., "SR5")
    """
    # 1. Setup the Browser (Chrome)
    options = webdriver.ChromeOptions()
    # options.add_argument("--headless")  # Uncomment this line if you don't want to see the browser window
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

    url = "https://www.racefacer.com/en/karting-tracks/pakistan/2f2fislamabad"
    print(f"\n{'='*60}")
    print(f"Scraping {kart_name} (ID: {kart_id})")
    print(f"{'='*60}")
    print(f"Opening {url}...")
    driver.get(url)

    # Allow initial load
    time.sleep(3)

    # Select the specific kart type from the kart_id dropdown
    print(f"Selecting '{kart_name}' from kart dropdown...")
    try:
        kart_select = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.NAME, "kart_id"))
        )
        
        select = Select(kart_select)
        select.select_by_value(kart_id)
        print(f"✓ Selected '{kart_name}' successfully")
        
        # Wait for page to reload/update with selected kart data
        time.sleep(5)
        print("✓ Page updated with kart-specific data")
        
    except Exception as e:
        print(f"✗ Could not select kart type: {e}")
        driver.quit()
        return

    # Select "All time" from the Period dropdown
    print("Selecting 'All time' from Period dropdown...")
    try:
        period_select = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.NAME, "period"))
        )
        
        select = Select(period_select)
        select.select_by_value('all')
        print("✓ Selected 'All time' successfully")
        
        # Wait for page to reload/update with all-time data
        time.sleep(5)
        print("✓ Page updated with all-time data")
        
    except Exception as e:
        print(f"✗ Could not set period to 'All time': {e}")
        print("Proceeding with default period (Year)...")

    previous_row_count = 0

    # 2. Loop to load all data
    while True:
        try:
            # Count current loaded rows
            rows = driver.find_elements(By.CLASS_NAME, "row")
            current_row_count = len(rows)
            
            print(f"Rows loaded so far: {current_row_count}")

            # Safety Check: If we clicked but the row count didn't change, stop.
            if current_row_count == previous_row_count and current_row_count > 0:
                print("No new data loaded after click. Stopping.")
                break
            
            previous_row_count = current_row_count

            # Find and Click 'Load More'
            load_more_btn = WebDriverWait(driver, 5).until(
                EC.element_to_be_clickable((By.CSS_SELECTOR, ".load-more-button"))
            )
            
            # Scroll to button and click (using JS to avoid interception)
            driver.execute_script("arguments[0].scrollIntoView();", load_more_btn)
            driver.execute_script("arguments[0].click();", load_more_btn)

            # Smart Wait: Wait until the number of rows actually increases
            try:
                WebDriverWait(driver, 10).until(
                    lambda d: len(d.find_elements(By.CLASS_NAME, "row")) > current_row_count
                )
            except:
                print("Timed out waiting for new rows. Assuming end of list.")
                break
            
        except Exception as e:
            print("No more 'Load more' buttons found or end of data reached.")
            break

    # 3. Parse the fully loaded HTML
    print("Parsing final data...")
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    data = []

    # --- Extract Podium (1st, 2nd, 3rd) ---
    podium = soup.find('div', class_='track_podium')
    if podium:
        classes = ['first', 'second', 'third']
        ranks = [1, 2, 3]
        for cls, rank in zip(classes, ranks):
            item = podium.find('a', class_=cls)
            if item:
                try:
                    name = item.find('div', class_='name').get_text(strip=True)
                    time_val = item.find('div', class_='time').get_text(strip=True)
                    date = item.find('div', class_='date').get_text(strip=True)
                    link = item.get('href')
                    
                    data.append({
                        'Position': rank,
                        'Name': name,
                        'Date': date,
                        'Max km/h': '',
                        'Max G': '',
                        'Best Time': time_val,
                        'Profile URL': link,
                        'Kart Type': kart_name
                    })
                except AttributeError:
                    continue

    # --- Extract Table Rows (4 onwards) ---
    rows = soup.find_all('div', class_='row')
    for row in rows:
        try:
            pos_div = row.find('div', class_='position')
            if not pos_div: continue
            pos = pos_div.get_text(strip=True)

            name_div = row.find('div', class_='name')
            name = name_div.get_text(strip=True) if name_div else ""

            date_div = row.find('div', class_='date')
            date = date_div.get_text(strip=True) if date_div else ""

            mk = row.find('div', class_='max-km-h')
            max_km = mk.get_text(strip=True) if mk else ""

            mg = row.find('div', class_='max-g')
            max_g = mg.get_text(strip=True) if mg else ""

            # Time is often inside an anchor tag
            time_a = row.find('a', class_='time')
            if time_a:
                time_span = time_a.find('span')
                best_time = time_span.get_text(strip=True) if time_span else time_a.get_text(strip=True)
            else:
                best_time = ""

            name_link = row.find('a', class_='name-date')
            link = name_link.get('href') if name_link else ""

            data.append({
                'Position': pos,
                'Name': name,
                'Date': date,
                'Max km/h': max_km,
                'Max G': max_g,
                'Best Time': best_time,
                'Profile URL': link,
                'Kart Type': kart_name
            })
        except AttributeError:
            continue

    # 4. Save to CSV
    if data:
        df = pd.DataFrame(data)
        # Clean duplicates just in case
        df.drop_duplicates(subset=['Position', 'Name', 'Best Time'], inplace=True)
        
        # Create filename based on kart name
        filename = f'data_2f2f_islamabad_{kart_name.lower()}.csv'
        df.to_csv(filename, index=False)
        print(f"Success! Scraped {len(df)} rows. Saved to {filename}")
        print(df.head())
    else:
        print("No data found.")

    driver.quit()

if __name__ == "__main__":
    # Scrape SR5 kart only
    scrape_racefacer("1099", "SR5")


Scraping SR5 (ID: 1099)
Opening https://www.racefacer.com/en/karting-tracks/pakistan/2f2fislamabad...
Selecting 'SR5' from kart dropdown...
✓ Selected 'SR5' successfully
✓ Page updated with kart-specific data
Selecting 'All time' from Period dropdown...
✓ Selected 'All time' successfully
✓ Page updated with all-time data
Rows loaded so far: 6
Rows loaded so far: 11
Rows loaded so far: 16
Rows loaded so far: 21
Rows loaded so far: 26
Rows loaded so far: 31
Rows loaded so far: 36
Rows loaded so far: 41
Rows loaded so far: 46
Rows loaded so far: 51
Rows loaded so far: 56
Rows loaded so far: 61
Rows loaded so far: 66
Rows loaded so far: 71
Rows loaded so far: 76
Rows loaded so far: 81
Rows loaded so far: 86
Rows loaded so far: 91
Rows loaded so far: 96
Rows loaded so far: 100
Rows loaded so far: 101
Rows loaded so far: 106
Rows loaded so far: 111
Rows loaded so far: 116
Rows loaded so far: 121
Rows loaded so far: 131
Rows loaded so far: 136
Rows loaded so far: 141
Rows loaded so far: 146
