In [1]:
# selenium 3
import selenium
selenium.__version__

'4.9.0'

In [2]:
# selenium 4
from selenium import webdriver
from selenium.webdriver.edge.service import Service as EdgeService
from webdriver_manager.microsoft import EdgeChromiumDriverManager
from selenium.webdriver.edge.webdriver import WebDriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
import time


In [3]:

import csv

# Initialize Edge WebDriver
driver = webdriver.Edge(service=EdgeService(EdgeChromiumDriverManager().install()))

# Open the main page
driver.get("https://www.racingpost.com/racecards/us-racing")
driver.maximize_window()

# Wait for the iframe to be available and switch to it
iframe = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.XPATH, "/html/body/div[3]/div/main/div/iframe"))
)
driver.switch_to.frame(iframe)
print("Switched to main iframe")

# Function to get fresh buttons from shadow DOM
def get_fresh_buttons():
    return driver.execute_script('''
        const elements = [];
        const shadowHosts = document.querySelectorAll('div.ssg-module');
        shadowHosts.forEach(shadowHost => {
            const shadowRoot = shadowHost.shadowRoot;
            if (shadowRoot) {
                const tracks = shadowRoot.querySelectorAll('div.ssg-module-1jbtlky');
                tracks.forEach(track => {
                    const trackName = track.querySelector('span.ssg-module-5e06qd').textContent;
                    const buttons = track.querySelectorAll('button.ssg-module-1wckp3v');
                    buttons.forEach(button => {
                        elements.push({trackName: trackName, button: button});
                    });
                });
            }
        });
        return elements;
    ''')

# Get initial list of race buttons
race_buttons = get_fresh_buttons()
clicked = False
time.sleep(10)

Switched to main iframe


In [4]:

# Iterate over each button and click if the track name is "Finger Lakes"
for race in race_buttons:
    if clicked:
        break

    track_name = race['trackName']
    button = race['button']
    if track_name == "Belterra Park":
        print(f"Attempting to click race button for track: {track_name}")
        # Scroll button into view
        driver.execute_script("arguments[0].scrollIntoView();", button)
        # Click the button
        button.click()
        time.sleep(5)  # wait for the new content to load
        clicked = True
        

Attempting to click race button for track: Belterra Park


In [5]:
# Function to get fresh buttons from shadow DOM
def get_fresh_page():
    # Switch back to the default content
    driver.switch_to.default_content()
    
    # Find the new iframe element
    iframe = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.XPATH, "/html/body/div[3]/div/main/div/iframe"))
    )
    
    # Switch to the new iframe
    driver.switch_to.frame(iframe)
    
    # Get the buttons from the shadow DOM
    return driver.execute_script('''
        const shadowHosts = document.querySelectorAll('div.ssg-module');
        return Array.from(shadowHosts).map(shadowHost => {
            const shadowRoot = shadowHost.shadowRoot;
            if (shadowRoot) {
                const detailsButton = shadowRoot.querySelector('div.ssg-module-1ilgfmd > div.ssg-module-1kmgbcx > button');
                if (detailsButton) {
                 detailsButton.click();
                }
                return Array.from(shadowRoot.querySelectorAll('div.ssg-module-rd2reb > button'));
            }
            return [];
        }).flat();
    ''')

In [6]:
def get_horse_data():
    script = '''
        const elements = [];
        const shadowHosts = document.querySelectorAll('div.ssg-module');
        shadowHosts.forEach(shadowHost => {
            const shadowRoot = shadowHost.shadowRoot;
            if (shadowRoot) {           
                const horses = shadowRoot.querySelectorAll('div.ssg-module-y3rnh0');                 
                horses.forEach((horse, index) => {
                    const nthChildIndex = index + 3;

                    // Select horse name
                    const horseIDElement = horse.querySelector(`div.ssg-module-1ilgfmd > div:nth-child(${nthChildIndex}) > div.ssg-module-1mx1a43 > div.ssg-module-gryhqf > div.ssg-module-15btxpu > div.ssg-module-3bxiye > div`);
                    const horseID = horseIDElement ? horseIDElement.textContent.trim() : '';

                    // Select horse name
                    const horseNameElement = horse.querySelector(`div.ssg-module-1ilgfmd > div:nth-child(${nthChildIndex}) > div.ssg-module-1mx1a43 > div.ssg-module-gryhqf > div.ssg-module-15btxpu > div.ssg-module-12dzwx5 > span`);
                    const horseName = horseNameElement ? horseNameElement.textContent.trim() : '';

                    // Get table data
                    const tableRows = horse.querySelectorAll(`div > div > div.ssg-module-10d5d1z > div.ssg-module-1ilgfmd > div:nth-child(${nthChildIndex}) > div.ssg-module-1goivjb > div.ssg-module-1vr8bhw > div > div[class*='ssg-module-'] > div[class*='ssg-module-']`);
                    const tableData = [];
                    tableRows.forEach((row, index) => {
                        if (index >= 4) {
                            const rowData = row.textContent.trim();
                            tableData.push(rowData);
                        }
                    });

                    elements.push({horseID: horseID, horseName: horseName, tableData: tableData});
                });
            }
        });
        return elements;
    '''
    
    races = driver.execute_script(script)
    return races

In [7]:
def write_horse_data_to_csv(track_name, time_info,data):
    output_file= f"{track_name}_{time_info.replace(':', '').replace(' ', '_')}.csv"
    # Extract all possible keys from the data
    fieldnames = ['horseID', 'horseName', 'Date', 'Course', 'Surface', 'Class', 'Dist.', 'Gng', 'Wgt.', 'Finish Pos.', 'Winner / 2nd', 'Jky', 'SP']

    # Writing data to CSV
    with open(output_file, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        
        for entry in data:
            horseID = entry.get('horseID', '')
            horseName = entry.get('horseName', '')
            
            if 'tableData' in entry and 'Date' in entry['tableData']:
                num_entries = len(entry['tableData']['Date'])
                for i in range(num_entries):
                    row = {
                        'horseID': horseID,
                        'horseName': horseName,
                        'Date': entry['tableData']['Date'][i],
                        'Course': entry['tableData'].get('Course', [''])[i],
                        'Surface': entry['tableData'].get('Surface', [''])[i],
                        'Class': entry['tableData'].get('Class', [''])[i],
                        'Dist.': entry['tableData'].get('Dist.', [''])[i],
                        'Gng': entry['tableData'].get('Gng', [''])[i],
                        'Wgt.': entry['tableData'].get('Wgt.', [''])[i],
                        'Finish Pos.': entry['tableData'].get('Finish Pos.', [''])[i],
                        'Winner / 2nd': entry['tableData'].get('Winner / 2nd', [''])[i],
                        'Jky': entry['tableData'].get('Jky', [''])[i],
                        'SP': entry['tableData'].get('SP', [''])[i]
                    }
                    writer.writerow(row)
            else:
                # If there's no tableData or no Date, just write one row with horseID and horseName
                row = {
                    'horseID': horseID,
                    'horseName': horseName,
                    'Date': '',
                    'Course': '',
                    'Surface': '',
                    'Class': '',
                    'Dist.': '',
                    'Gng': '',
                    'Wgt.': '',
                    'Finish Pos.': '',
                    'Winner / 2nd': '',
                    'Jky': '',
                    'SP': ''
                }
                writer.writerow(row)
    
    print(f"CSV file '{output_file}' has been created successfully.")

In [8]:
def structure_horse_data(data):
    for entry in data:
        table_data = entry.get('tableData', [])  # Use .get() to safely handle missing 'tableData'
        
        if not table_data:
            # If table_data is empty or not present, skip processing
            continue
        
        # Extract headers and rows
        headers = table_data[:11]
        rows = table_data[11:]
        
        # Initialize the dictionary with headers as keys and empty lists as values
        structured_data = {header: [] for header in headers}
        
        # Populate the dictionary with the data
        num_columns = len(headers)
        num_rows = len(rows) // num_columns
        for i in range(num_rows):
            for j, header in enumerate(headers):
                structured_data[header].append(rows[i * num_columns + j])
        
        # Update the dictionary with the new structured data
        entry['tableData'] = structured_data
    
    return data

In [9]:
listButton=get_fresh_page()

In [10]:

for button in listButton:
    button_text = button.text
    combined_button = button_text.replace('\n', ' ')
    time_info = " ".join(combined_button.split()[:2])
    try:
        # Click the button
        driver.execute_script("arguments[0].click();", button)
        # Wait for some time for the new content to load
        time.sleep(10)
        horse_data = get_horse_data()
        structured_data = structure_horse_data(horse_data)
        write_horse_data_to_csv(track_name, time_info, structured_data)
        # time.sleep(10)
        
    except Exception as e:
        print(f'Error during iteration: {e}')

CSV file 'Belterra Park_915_PM.csv' has been created successfully.
CSV file 'Belterra Park_945_PM.csv' has been created successfully.
CSV file 'Belterra Park_1015_PM.csv' has been created successfully.
CSV file 'Belterra Park_1045_PM.csv' has been created successfully.
CSV file 'Belterra Park_1115_PM.csv' has been created successfully.
CSV file 'Belterra Park_1145_PM.csv' has been created successfully.
CSV file 'Belterra Park_1215_AM.csv' has been created successfully.
CSV file 'Belterra Park_1245_AM.csv' has been created successfully.


In [11]:
driver.quit()