In [None]:

!pip install selenium pandas webdriver-manager beautifulsoup4

Collecting webdriver-manager
  Using cached webdriver_manager-4.0.2-py2.py3-none-any.whl.metadata (12 kB)
Using cached webdriver_manager-4.0.2-py2.py3-none-any.whl (27 kB)
Installing collected packages: webdriver-manager
Successfully installed webdriver-manager-4.0.2



[notice] A new release of pip is available: 24.0 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import time
import os
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager

def scrape_racefacer():
    # 1. Setup the Browser (Chrome)
    options = webdriver.ChromeOptions()
    # Enable headless mode for CI environments (GitHub Actions)
    if os.environ.get('CI') or os.environ.get('GITHUB_ACTIONS'):
        options.add_argument("--headless=new")
        options.add_argument("--no-sandbox")
        options.add_argument("--disable-dev-shm-usage")
        options.add_argument("--disable-gpu")
        options.add_argument("--window-size=1920,1080")
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

    try:
        url = "https://www.racefacer.com/en/karting-tracks/pakistan/apexautodromepakistan"
        print(f"Opening {url}...")
        driver.get(url)

        # Allow initial load
        time.sleep(5)

        previous_row_count = 0
        no_change_count = 0
        max_no_change = 3  # Stop after 3 consecutive failed attempts

        # 2. Loop to load all data
        while True:
            try:
                # Count current loaded rows
                rows = driver.find_elements(By.CLASS_NAME, "row")
                current_row_count = len(rows)
                
                print(f"Rows loaded so far: {current_row_count}")

                # Safety Check: If we clicked but the row count didn't change, stop.
                if current_row_count == previous_row_count and current_row_count > 0:
                    no_change_count += 1
                    print(f"No new data loaded after click (attempt {no_change_count}/{max_no_change}).")
                    if no_change_count >= max_no_change:
                        print("Stopping - reached maximum attempts with no new data.")
                        break
                else:
                    no_change_count = 0  # Reset counter
                
                previous_row_count = current_row_count

                # Try multiple possible selectors for the Load More button
                load_more_btn = None
                selectors = [
                    ".load-more-button",
                    "button.load-more",
                    ".load-more",
                    "//button[contains(text(), 'Load more')]",
                    "//a[contains(text(), 'Load more')]"
                ]
                
                for selector in selectors:
                    try:
                        if selector.startswith("//"):
                            # XPath selector
                            load_more_btn = WebDriverWait(driver, 3).until(
                                EC.element_to_be_clickable((By.XPATH, selector))
                            )
                        else:
                            # CSS selector
                            load_more_btn = WebDriverWait(driver, 3).until(
                                EC.element_to_be_clickable((By.CSS_SELECTOR, selector))
                            )
                        print(f"Found button with selector: {selector}")
                        break
                    except:
                        continue
                
                if not load_more_btn:
                    print("No 'Load more' button found with any selector.")
                    break
                
                # Scroll to button and click
                driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", load_more_btn)
                time.sleep(0.5)
                driver.execute_script("arguments[0].click();", load_more_btn)
                print("Clicked 'Load more' button")

                # Wait for new rows to load
                time.sleep(2)
                
            except Exception as e:
                print(f"Exception in load loop: {e}")
                break

        # 3. Parse the fully loaded HTML BEFORE closing browser
        print("Parsing final data...")
        
        # Get page source while browser is still open
        page_html = driver.page_source
        soup = BeautifulSoup(page_html, 'html.parser')
        data = []

        # --- Extract Podium (1st, 2nd, 3rd) ---
        podium = soup.find('div', class_='track_podium')
        if podium:
            classes = ['first', 'second', 'third']
            ranks = [1, 2, 3]
            for cls, rank in zip(classes, ranks):
                item = podium.find('a', class_=cls)
                if item:
                    try:
                        name = item.find('div', class_='name').get_text(strip=True)
                        time_val = item.find('div', class_='time').get_text(strip=True)
                        date = item.find('div', class_='date').get_text(strip=True)
                        link = item.get('href')
                        
                        data.append({
                            'Position': rank,
                            'Name': name,
                            'Date': date,
                            'Max km/h': '',
                            'Max G': '',
                            'Best Time': time_val,
                            'Profile URL': link
                        })
                    except AttributeError:
                        continue

        # --- Extract Table Rows (4 onwards) ---
        rows = soup.find_all('div', class_='row')
        for row in rows:
            try:
                pos_div = row.find('div', class_='position')
                if not pos_div: continue
                pos = pos_div.get_text(strip=True)

                name_div = row.find('div', class_='name')
                name = name_div.get_text(strip=True) if name_div else ""

                date_div = row.find('div', class_='date')
                date = date_div.get_text(strip=True) if date_div else ""

                mk = row.find('div', class_='max-km-h')
                max_km = mk.get_text(strip=True) if mk else ""

                mg = row.find('div', class_='max-g')
                max_g = mg.get_text(strip=True) if mg else ""

                time_a = row.find('a', class_='time')
                if time_a:
                    time_span = time_a.find('span')
                    best_time = time_span.get_text(strip=True) if time_span else time_a.get_text(strip=True)
                else:
                    best_time = ""

                name_link = row.find('a', class_='name-date')
                link = name_link.get('href') if name_link else ""

                data.append({
                    'Position': pos,
                    'Name': name,
                    'Date': date,
                    'Max km/h': max_km,
                    'Max G': max_g,
                    'Best Time': best_time,
                    'Profile URL': link
                })
            except AttributeError:
                continue

        # 4. Save to CSV
        if data:
            df = pd.DataFrame(data)
            df.drop_duplicates(subset=['Position', 'Name', 'Best Time'], inplace=True)
            
            filename = 'data_apex.csv'
            df.to_csv(filename, index=False)
            print(f"Success! Scraped {len(df)} rows. Saved to {filename}")
            print(df.head(10))
        else:
            print("No data found.")
    
    except Exception as e:
        print(f"Fatal error during scraping: {e}")
        import traceback
        traceback.print_exc()
    
    finally:
        # Always close the driver
        try:
            driver.quit()
            print("Browser closed successfully")
        except:
            print("Browser already closed")

if __name__ == "__main__":
    scrape_racefacer()