In [1]:
# Selenium imports
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys  # Importing Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

# Other imports
from dotenv import load_dotenv
import os
import time
import logging
from bs4 import BeautifulSoup
import pandas as pd


In [2]:
from dotenv import load_dotenv
import os

# Load variables from .env file
load_dotenv()

# Retrieve credentials
email = os.getenv('UTR_EMAIL')
password = os.getenv('UTR_PASSWORD')

# Verify that credentials are loaded
print(f"Email: {email}")
print(f"Password: {'*' * len(password) if password else 'Not Found'}")


Email: neil.treat@gmail.com
Password: *************


scrape table

In [None]:

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

from dotenv import load_dotenv
import os
import time
import logging
from bs4 import BeautifulSoup
import pandas as pd

# Load environment variables
load_dotenv()

# Retrieve credentials
email = os.getenv('UTR_EMAIL')
password = os.getenv('UTR_PASSWORD')

# Configure logging
logging.basicConfig(filename='scraper.log', level=logging.INFO,
                    format='%(asctime)s:%(levelname)s:%(message)s')

def setup_driver():
    chrome_options = Options()
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.add_argument("--start-maximized")
    
    return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

def perform_login(driver, wait):
    driver.get("https://app.utrsports.net/login")
    logging.info("Navigated to login page")
    
    # Wait for email field and enter credentials
    email_field = wait.until(EC.presence_of_element_located((By.ID, "emailInput")))
    email_field.clear()
    email_field.send_keys(email)
    
    password_field = driver.find_element(By.ID, "passwordInput")
    password_field.clear()
    password_field.send_keys(password)
    
    # Click sign in
    sign_in_button = wait.until(
        EC.element_to_be_clickable((By.XPATH, "//button[text()='SIGN IN']"))
    )
    sign_in_button.click()
    logging.info("Clicked sign in button")
    
    # Wait for and click continue button
    continue_button = wait.until(
        EC.element_to_be_clickable((By.XPATH, "//button[text()='Continue']"))
    )
    continue_button.click()
    logging.info("Login completed successfully")

def navigate_to_stats(driver, wait, stats_url):
    driver.get(stats_url)
    logging.info(f"Navigated to stats page: {stats_url}")
    # Wait for page load
    time.sleep(5)

def find_show_all_link(driver, wait):
    """Try multiple strategies to find the 'Show all' link"""
    # List of possible XPath and CSS selectors to try
    selectors = [
        (By.XPATH, "//a[text()='Show all']"),
        (By.XPATH, "//a[contains(text(), 'Show all')]"),
        (By.XPATH, "//div[contains(@class, 'mt32')]//a[text()='Show all']"),
        (By.CSS_SELECTOR, "a[href*='show-all']"),  # If the link contains 'show-all' in href
        (By.LINK_TEXT, "Show all"),
        (By.PARTIAL_LINK_TEXT, "Show all")
    ]
    
    # Try each selector
    for by, selector in selectors:
        try:
            elements = driver.find_elements(by, selector)
            if elements:
                # Print information about found elements
                print(f"Found {len(elements)} elements with selector {selector}")
                for idx, elem in enumerate(elements):
                    try:
                        print(f"Element {idx + 1}:")
                        print(f"  Text: {elem.text}")
                        print(f"  Is displayed: {elem.is_displayed()}")
                        print(f"  Location: {elem.location}")
                        print(f"  HTML: {elem.get_attribute('outerHTML')}")
                    except:
                        continue
                return elements
        except Exception as e:
            continue
    
    return []

def click_show_all(driver, wait):
    # Scroll to bottom to ensure all content is loaded
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)
    
    # Find the Show all link
    show_all_elements = find_show_all_link(driver, wait)
    
    if not show_all_elements:
        # If no elements found, take a screenshot and log the page source
        driver.save_screenshot("no_show_all_found.png")
        with open("page_source.html", "w", encoding="utf-8") as f:
            f.write(driver.page_source)
        raise Exception("No 'Show all' link found. Screenshot and page source saved.")
    
    # Try to click the last "Show all" link
    show_all_link = show_all_elements[-1]
    
    # Try multiple click methods
    click_methods = [
        lambda: show_all_link.click(),  # Regular click
        lambda: ActionChains(driver).move_to_element(show_all_link).click().perform(),  # Action chains click
        lambda: driver.execute_script("arguments[0].click();", show_all_link)  # JavaScript click
    ]
    
    for click_method in click_methods:
        try:
            # Scroll the element into view
            driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", show_all_link)
            time.sleep(1)
            
            # Try to click
            click_method()
            logging.info("Successfully clicked 'Show all' link")
            time.sleep(3)  # Wait for content to load
            return True
        except Exception as e:
            logging.warning(f"Click method failed: {str(e)}")
            continue
    
    raise Exception("Failed to click 'Show all' link with all methods")

import pandas as pd
from bs4 import BeautifulSoup

def extract_utr_data(html_content):
    # Create BeautifulSoup object
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Find all history items
    history_items = soup.find_all('div', class_='newStatsTabContent__historyItem__1Nb0C')
    
    # Lists to store data
    dates = []
    ratings = []
    
    # Extract data from each history item
    for item in history_items:
        # Extract date
        date = item.find('div', class_='newStatsTabContent__historyItemDate__jFJyD').text.strip()
        
        # Extract rating
        rating = item.find('div', class_='newStatsTabContent__historyItemRating__GQUXw').text.strip()
        
        # Append to lists
        dates.append(date)
        ratings.append(float(rating))
    
    # Create DataFrame
    df = pd.DataFrame({
        'Date': pd.to_datetime(dates),
        'UTR_Rating': ratings
    })
    
    # Sort by date
    df = df.sort_values('Date')
    
    return df


def main():
    driver = None
    try:
        driver = setup_driver()
        wait = WebDriverWait(driver, 20)
        stats_url = "https://app.utrsports.net/profiles/247320?t=6"
        
        perform_login(driver, wait)
        navigate_to_stats(driver, wait, stats_url)
        
        # Take a screenshot before attempting to click Show all
        driver.save_screenshot("before_show_all.png")
        
        # Get the initial page source to check if we need to click "Show all"
        initial_html = driver.page_source
        initial_soup = BeautifulSoup(initial_html, 'html.parser')
        show_all_link = initial_soup.find('a', string='Show all')
        
        if show_all_link:
            click_show_all(driver, wait)
            # Wait for new content to load
            time.sleep(3)
        
        # Get the final page source after clicking "Show all" (if needed)
        html_content = driver.page_source
        
        # Create the DataFrame
        df = extract_utr_data(html_content)

        # Basic statistics
        stats = {
            'Latest Rating': df['UTR_Rating'].iloc[-1],
            'Highest Rating': df['UTR_Rating'].max(),
            'Lowest Rating': df['UTR_Rating'].min(),
            'Average Rating': df['UTR_Rating'].mean(),
            'Total Records': len(df),
            'Date Range': f"{df['Date'].min().strftime('%Y-%m-%d')} to {df['Date'].max().strftime('%Y-%m-%d')}"
        }

        # Print some basic statistics
        print("\nUTR Statistics:")
        print("-" * 50)
        for key, value in stats.items():
            if isinstance(value, float):
                print(f"{key}: {value:.2f}")
            else:
                print(f"{key}: {value}")

        # Save to CSV
        output_file = 'utr_history.csv'
        df.to_csv(output_file, index=False)
        print(f"\nData saved to {output_file}")
        
        # Save raw HTML for debugging if needed
        with open("raw_data.html", "w", encoding="utf-8") as f:
            f.write(html_content)
        
    except Exception as e:
        logging.error(f"An error occurred: {e}")
        if driver:
            driver.save_screenshot(f"error_{int(time.time())}.png")
            # Save the page source when error occurs
            with open(f"error_page_{int(time.time())}.html", "w", encoding="utf-8") as f:
                f.write(driver.page_source)
        raise e
    finally:
        if driver:
            driver.quit()

if __name__ == "__main__":
    main()

Found 1 elements with selector //a[text()='Show all']
Element 1:
  Text: Show all
  Is displayed: True
  Location: {'x': 1124, 'y': 2576}
  HTML: <a href="#" class="underline">Show all</a>

UTR Statistics:
--------------------------------------------------
Latest Rating: 16.21
Highest Rating: 16.35
Lowest Rating: 11.06
Average Rating: 14.86
Total Records: 435
Date Range: 2016-06-27 to 2024-11-11

Data saved to utr_history.csv


now need to integrate player search

In [5]:
# --- Complete Revised Script ---

# Import necessary libraries
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import pandas as pd
import time
import logging
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Retrieve credentials
email = os.getenv('UTR_EMAIL')
password = os.getenv('UTR_PASSWORD')

# Verify credentials are loaded
if not email or not password:
    raise ValueError("Email or password not found in environment variables.")

# Define the is_float function
def is_float(value):
    """Check if the provided value can be converted to a float."""
    try:
        float(value)
        return True
    except ValueError:
        return False

# Configure logging
logging.basicConfig(
    filename='scraper.log',
    level=logging.INFO,
    format='%(asctime)s:%(levelname)s:%(message)s'
)

# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--start-maximized")  # Start maximized for better visibility

# Uncomment the next line to run Chrome in headless mode after successful debugging
# chrome_options.add_argument("--headless")  

# Initialize the WebDriver
driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=chrome_options
)

# Define WebDriverWait
wait = WebDriverWait(driver, 20)  # 20 seconds timeout

# Define the login URL and the player's stats URL
login_url = "https://app.utrsports.net/login"
stats_url = "https://app.utrsports.net/search?sportTypes=tennis&startDate=11/21/2024&utrMin=1&utrMax=16&utrType=verified&utrTeamType=singles&utrFitPosition=6&type=players&lat=37.2358078&lng=-121.9623751"

def scrape_current_page(driver, wait, base_url):
    """Scrape player data from the current page."""
    data = []
    try:
        # Locate all player cards
        player_cards = driver.find_elements(By.CLASS_NAME, "search__cardContainer__1Z9Ee")  # Ensure this is the correct class
        logging.info(f"Found {len(player_cards)} player cards on the current page.")
        print(f"Found {len(player_cards)} player cards on the current page.")

        for idx, card in enumerate(player_cards, start=1):
            try:
                # Extract player name within the current card
                try:
                    name_element = card.find_element(By.CSS_SELECTOR, "div.name.show-ellipsis")
                    player_name = name_element.text.strip()
                except Exception as e:
                    player_name = "N/A"
                    logging.warning(f"Player name not found for player {idx}: {e}")
                    print(f"Player name not found for player {idx}: {e}")

                # Extract player location within the current card
                try:
                    location_element = card.find_element(By.CSS_SELECTOR, "div.place.show-ellipsis")
                    location = location_element.text.strip()
                except Exception as e:
                    location = "N/A"
                    logging.warning(f"Location not found for player {idx}: {e}")
                    print(f"Location not found for player {idx}: {e}")

                # Extract UTR values within the current card
                try:
                    utr_elements = card.find_elements(By.XPATH, ".//div[@title='Rated']")
                    singles_utr = "N/A"
                    doubles_utr = "N/A"

                    if len(utr_elements) >= 2:
                        # Extract Singles UTR
                        singles_utr_text = utr_elements[0].text.strip()
                        singles_utr = singles_utr_text.split()[0]  # Assuming format "16.25 Verified"
                        if not is_float(singles_utr):
                            singles_utr = "N/A"
                            logging.warning(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                            print(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")

                        # Extract Doubles UTR
                        doubles_utr_text = utr_elements[1].text.strip()
                        doubles_utr = doubles_utr_text.split()[0]  # Assuming format "15.26 Verified"
                        if not is_float(doubles_utr):
                            doubles_utr = "N/A"
                            logging.warning(f"Invalid Doubles UTR value for player {idx}: {doubles_utr_text}")
                            print(f"Invalid Doubles UTR value for player {idx}: {doubles_utr_text}")
                    elif len(utr_elements) == 1:
                        # Only Singles UTR found
                        singles_utr_text = utr_elements[0].text.strip()
                        singles_utr = singles_utr_text.split()[0]
                        if not is_float(singles_utr):
                            singles_utr = "N/A"
                            logging.warning(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                            print(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                    else:
                        logging.warning(f"No UTR values found for player {idx}.")
                        print(f"No UTR values found for player {idx}.")

                except Exception as e:
                    singles_utr = "N/A"
                    doubles_utr = "N/A"
                    logging.warning(f"UTR not found for player {idx}: {e}")
                    print(f"UTR not found for player {idx}: {e}")

                # Extract Profile Link within the current card
                try:
                    profile_link_element = card.find_element(By.TAG_NAME, "a")
                    relative_profile_link = profile_link_element.get_attribute("href")
                    # Ensure the link is absolute
                    if relative_profile_link.startswith("/"):
                        profile_link = base_url + relative_profile_link
                    else:
                        profile_link = relative_profile_link
                except Exception as e:
                    profile_link = "N/A"
                    logging.warning(f"Profile link not found for player {idx}: {e}")
                    print(f"Profile link not found for player {idx}: {e}")

                # Compile the data
                player_data = {
                    'Player Name': player_name,
                    'Location': location,
                    'Singles UTR': singles_utr,
                    'Doubles UTR': doubles_utr,
                    'Profile Link': profile_link
                }

                data.append(player_data)
                logging.info(f"Scraped player {idx}: {player_data}")
                print(f"Scraped player {idx}: {player_data}")

            except Exception as card_e:
                logging.error(f"Error scraping player card {idx}: {card_e}")
                driver.save_screenshot(f"error_scraping_player_{idx}.png")
                print(f"Error scraping player card {idx}: {card_e}")
                continue  # Proceed to next card

    except Exception as e:
        logging.error(f"Error locating player cards: {e}")
        driver.save_screenshot("error_locating_player_cards.png")
        print(f"Error locating player cards: {e}")

    return data

def scrape_all_pages(driver, wait, base_url):
    """Scrape data from all paginated pages."""
    all_data = []
    page = 1

    while True:
        logging.info(f"Scraping page {page}.")
        print(f"Scraping page {page}.")

        # Scrape data from the current page
        page_data = scrape_current_page(driver, wait, base_url)
        all_data.extend(page_data)

        # Check if "Next" button is present and clickable
        try:
            next_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[text()='Next']"))  # Replace with actual XPath
            )
            next_button.click()
            logging.info("Clicked the 'Next' button.")
            print("Clicked the 'Next' button.")
            page += 1
            time.sleep(3)  # Wait for the next page to load
        except Exception as e:
            logging.info("No more pages to scrape or 'Next' button not found.")
            print("No more pages to scrape or 'Next' button not found.")
            break

    return all_data

def save_to_csv(data, filename='player_statistics.csv'):
    """Save the scraped data to a CSV file."""
    try:
        df = pd.DataFrame(data)
        df.to_csv(filename, index=False)
        logging.info(f"Data saved to {filename}.")
        print(f"Data saved to {filename}.")
    except Exception as e:
        logging.error(f"Error saving data to CSV: {e}")
        print(f"Error saving data to CSV: {e}")

def main():
    """Main function to execute the scraping workflow."""
    base_url = "https://app.utrsports.net"  # Define the base URL

    try:
        # Navigate to the login page
        driver.get(login_url)
        logging.info("Navigated to the login page.")
        print("Navigated to the login page.")

        # Allow the page to load completely
        time.sleep(3)  # Adjust based on your internet speed

        # Locate the email input field
        try:
            email_field = wait.until(EC.presence_of_element_located((By.ID, "emailInput")))  # Replace with actual ID
            email_field.clear()
            email_field.send_keys(email)
            logging.info("Entered email.")
            print("Entered email.")
        except Exception as e:
            logging.error(f"Email input field not found: {e}")
            driver.save_screenshot("error_email_field.png")
            print(f"Email input field not found: {e}")
            driver.quit()
            return

        # Locate the password input field
        try:
            password_field = driver.find_element(By.ID, "passwordInput")  # Replace with actual ID
            password_field.clear()
            password_field.send_keys(password)
            logging.info("Entered password.")
            print("Entered password.")
        except Exception as e:
            logging.error(f"Password input field not found: {e}")
            driver.save_screenshot("error_password_field.png")
            print(f"Password input field not found: {e}")
            driver.quit()
            return

        # Locate and click the login button
        try:
            sign_in_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[text()='SIGN IN']"))
            )
            driver.execute_script("arguments[0].scrollIntoView();", sign_in_button)
            sign_in_button.click()
            logging.info("Clicked the 'SIGN IN' button.")
            print("Clicked the 'SIGN IN' button.")
        except Exception as e:
            logging.error(f"An error occurred while clicking the 'SIGN IN' button: {e}")
            driver.save_screenshot("error_click_sign_in.png")
            print(f"An error occurred while clicking the 'SIGN IN' button: {e}")
            driver.quit()
            return

        # Wait until the "Continue" button appears and click it
        try:
            continue_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[text()='Continue']"))
            )
            continue_button.click()
            logging.info("Clicked the 'Continue' button.")
            print("Clicked the 'Continue' button.")
        except Exception as e:
            logging.error(f"An error occurred while clicking the 'Continue' button: {e}")
            driver.save_screenshot("error_click_continue.png")
            print(f"An error occurred while clicking the 'Continue' button: {e}")
            driver.quit()
            return

        # Navigate to the Player's Stats Page
        try:
            driver.get(stats_url)
            logging.info(f"Navigated to player's stats page: {stats_url}")
            print(f"Navigated to player's stats page: {stats_url}")

            # Allow the stats page to load
            time.sleep(5)  # Adjust based on your internet speed
        except Exception as e:
            logging.error(f"Error navigating to player's stats page: {e}")
            driver.save_screenshot("error_navigate_stats_page.png")
            print(f"Error navigating to player's stats page: {e}")
            driver.quit()
            return

        # Scrape all pages (if paginated)
        all_data = scrape_all_pages(driver, wait, base_url)

        # Save the data to CSV
        save_to_csv(all_data)

    except Exception as main_e:
        logging.error(f"An unexpected error occurred: {main_e}")
        driver.save_screenshot("unexpected_error.png")
        print(f"An unexpected error occurred: {main_e}")
    finally:
        # Close the WebDriver
        driver.quit()
        logging.info("WebDriver closed.")
        print("WebDriver closed.")

if __name__ == "__main__":
    main()


Navigated to the login page.
Entered email.
Entered password.
Clicked the 'SIGN IN' button.
Clicked the 'Continue' button.
Navigated to player's stats page: https://app.utrsports.net/search?sportTypes=tennis&startDate=11/21/2024&utrMin=1&utrMax=16&utrType=verified&utrTeamType=singles&utrFitPosition=6&type=players&lat=37.2358078&lng=-121.9623751
Scraping page 1.
Found 40 player cards on the current page.
Scraped player 1: {'Player Name': 'Jannik Sinner', 'Location': 'M • Italy', 'Singles UTR': '16.25', 'Doubles UTR': '15.26', 'Profile Link': 'https://app.utrsports.net/profiles/247320'}
Scraped player 2: {'Player Name': 'Carlos Alcaraz', 'Location': 'M • Spain', 'Singles UTR': '16.21', 'Doubles UTR': '14.91', 'Profile Link': 'https://app.utrsports.net/profiles/3569175'}
UTR not found for player 3: list index out of range
Scraped player 3: {'Player Name': 'Novak Djokovic', 'Location': 'M • Serbia', 'Singles UTR': 'N/A', 'Doubles UTR': 'N/A', 'Profile Link': 'https://app.utrsports.net/prof

In [None]:
# --- Complete Revised Script ---

# Import necessary libraries
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import pandas as pd
import time
import logging
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Retrieve credentials
email = os.getenv('UTR_EMAIL')
password = os.getenv('UTR_PASSWORD')

# Verify credentials are loaded
if not email or not password:
    raise ValueError("Email or password not found in environment variables.")

# Define the is_float function
def is_float(value):
    """Check if the provided value can be converted to a float."""
    try:
        float(value)
        return True
    except ValueError:
        return False

# Configure logging
logging.basicConfig(
    filename='scraper.log',
    level=logging.INFO,
    format='%(asctime)s:%(levelname)s:%(message)s'
)

# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--start-maximized")  # Start maximized for better visibility

# Uncomment the next line to run Chrome in headless mode after successful debugging
# chrome_options.add_argument("--headless")  

# Initialize the WebDriver
driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=chrome_options
)

# Define WebDriverWait
wait = WebDriverWait(driver, 20)  # 20 seconds timeout

# Define the login URL and the player's stats URL
login_url = "https://app.utrsports.net/login"
stats_url = "https://app.utrsports.net/search?sportTypes=tennis&startDate=11/21/2024&utrMin=1&utrMax=16&utrType=verified&utrTeamType=singles&utrFitPosition=6&type=players&lat=37.2358078&lng=-121.9623751"

def click_load_more(driver, wait):
    """Click the 'Load More' button until it's no longer present."""
    while True:
        try:
            # Locate the 'Load More' button
            load_more_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[span[text()='Load More']]"))
            )
            # Scroll to the 'Load More' button to ensure it's in view
            driver.execute_script("arguments[0].scrollIntoView(true);", load_more_button)
            # Click the 'Load More' button
            load_more_button.click()
            logging.info("Clicked the 'Load More' button.")
            print("Clicked the 'Load More' button.")
            # Wait for new content to load
            time.sleep(3)  # Adjust based on your internet speed
        except Exception as e:
            logging.info("No more 'Load More' buttons to click or button not found.")
            print("No more 'Load More' buttons to click or button not found.")
            break

def scrape_current_page(driver, wait, base_url):
    """Scrape player data from the current page."""
    data = []
    try:
        # Locate all player cards
        player_cards = driver.find_elements(By.CLASS_NAME, "search__cardContainer__1Z9Ee")  # Ensure this is the correct class
        logging.info(f"Found {len(player_cards)} player cards on the current page.")
        print(f"Found {len(player_cards)} player cards on the current page.")

        for idx, card in enumerate(player_cards, start=1):
            try:
                # Extract player name within the current card
                try:
                    name_element = card.find_element(By.CSS_SELECTOR, "div.name.show-ellipsis")
                    player_name = name_element.text.strip()
                except Exception as e:
                    player_name = "N/A"
                    logging.warning(f"Player name not found for player {idx}: {e}")
                    print(f"Player name not found for player {idx}: {e}")

                # Extract player location within the current card
                try:
                    location_element = card.find_element(By.CSS_SELECTOR, "div.place.show-ellipsis")
                    location = location_element.text.strip()
                except Exception as e:
                    location = "N/A"
                    logging.warning(f"Location not found for player {idx}: {e}")
                    print(f"Location not found for player {idx}: {e}")

                # Extract UTR values within the current card
                try:
                    utr_elements = card.find_elements(By.XPATH, ".//div[@title='Rated']")
                    singles_utr = "N/A"
                    doubles_utr = "N/A"

                    if len(utr_elements) >= 2:
                        # Extract Singles UTR
                        singles_utr_text = utr_elements[0].text.strip()
                        singles_utr = singles_utr_text.split()[0]  # Assuming format "16.25 Verified"
                        if not is_float(singles_utr):
                            singles_utr = "N/A"
                            logging.warning(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                            print(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")

                        # Extract Doubles UTR
                        doubles_utr_text = utr_elements[1].text.strip()
                        doubles_utr = doubles_utr_text.split()[0]  # Assuming format "15.26 Verified"
                        if not is_float(doubles_utr):
                            doubles_utr = "N/A"
                            logging.warning(f"Invalid Doubles UTR value for player {idx}: {doubles_utr_text}")
                            print(f"Invalid Doubles UTR value for player {idx}: {doubles_utr_text}")
                    elif len(utr_elements) == 1:
                        # Only Singles UTR found
                        singles_utr_text = utr_elements[0].text.strip()
                        singles_utr = singles_utr_text.split()[0]
                        if not is_float(singles_utr):
                            singles_utr = "N/A"
                            logging.warning(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                            print(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                    else:
                        logging.warning(f"No UTR values found for player {idx}.")
                        print(f"No UTR values found for player {idx}.")

                except Exception as e:
                    singles_utr = "N/A"
                    doubles_utr = "N/A"
                    logging.warning(f"UTR not found for player {idx}: {e}")
                    print(f"UTR not found for player {idx}: {e}")

                # Extract Profile Link within the current card
                try:
                    profile_link_element = card.find_element(By.TAG_NAME, "a")
                    relative_profile_link = profile_link_element.get_attribute("href")
                    # Ensure the link is absolute
                    if relative_profile_link.startswith("/"):
                        profile_link = base_url + relative_profile_link
                    else:
                        profile_link = relative_profile_link
                except Exception as e:
                    profile_link = "N/A"
                    logging.warning(f"Profile link not found for player {idx}: {e}")
                    print(f"Profile link not found for player {idx}: {e}")

                # Compile the data
                player_data = {
                    'Player Name': player_name,
                    'Location': location,
                    'Singles UTR': singles_utr,
                    'Doubles UTR': doubles_utr,
                    'Profile Link': profile_link
                }

                data.append(player_data)
                logging.info(f"Scraped player {idx}: {player_data}")
                print(f"Scraped player {idx}: {player_data}")

            except Exception as card_e:
                logging.error(f"Error scraping player card {idx}: {card_e}")
                driver.save_screenshot(f"error_scraping_player_{idx}.png")
                print(f"Error scraping player card {idx}: {card_e}")
                continue  # Proceed to next card

    except Exception as e:
        logging.error(f"Error locating player cards: {e}")
        driver.save_screenshot("error_locating_player_cards.png")
        print(f"Error locating player cards: {e}")

    return data

def scrape_all_pages(driver, wait, base_url):
    """Scrape data from all paginated pages."""
    all_data = []
    page = 1

    while True:
        logging.info(f"Scraping page {page}.")
        print(f"Scraping page {page}.")

        # Scrape data from the current page
        page_data = scrape_current_page(driver, wait, base_url)
        all_data.extend(page_data)

        # Attempt to locate the "Load More" button
        try:
            click_load_more(driver, wait)
            page += 1
        except Exception as e:
            logging.error(f"Error while clicking 'Load More': {e}")
            driver.save_screenshot("error_load_more.png")
            print(f"Error while clicking 'Load More': {e}")
            break

    return all_data

def save_to_csv(data, filename='player_statistics.csv'):
    """Save the scraped data to a CSV file."""
    try:
        df = pd.DataFrame(data)
        df.to_csv(filename, index=False)
        logging.info(f"Data saved to {filename}.")
        print(f"Data saved to {filename}.")
    except Exception as e:
        logging.error(f"Error saving data to CSV: {e}")
        print(f"Error saving data to CSV: {e}")

def main():
    """Main function to execute the scraping workflow."""
    base_url = "https://app.utrsports.net"  # Define the base URL

    try:
        # Navigate to the login page
        driver.get(login_url)
        logging.info("Navigated to the login page.")
        print("Navigated to the login page.")

        # Allow the page to load completely
        time.sleep(3)  # Adjust based on your internet speed

        # Locate the email input field
        try:
            email_field = wait.until(EC.presence_of_element_located((By.ID, "emailInput")))  # Replace with actual ID
            email_field.clear()
            email_field.send_keys(email)
            logging.info("Entered email.")
            print("Entered email.")
        except Exception as e:
            logging.error(f"Email input field not found: {e}")
            driver.save_screenshot("error_email_field.png")
            print(f"Email input field not found: {e}")
            driver.quit()
            return

        # Locate the password input field
        try:
            password_field = driver.find_element(By.ID, "passwordInput")  # Replace with actual ID
            password_field.clear()
            password_field.send_keys(password)
            logging.info("Entered password.")
            print("Entered password.")
        except Exception as e:
            logging.error(f"Password input field not found: {e}")
            driver.save_screenshot("error_password_field.png")
            print(f"Password input field not found: {e}")
            driver.quit()
            return

        # Locate and click the login button
        try:
            sign_in_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[text()='SIGN IN']"))
            )
            driver.execute_script("arguments[0].scrollIntoView();", sign_in_button)
            sign_in_button.click()
            logging.info("Clicked the 'SIGN IN' button.")
            print("Clicked the 'SIGN IN' button.")
        except Exception as e:
            logging.error(f"An error occurred while clicking the 'SIGN IN' button: {e}")
            driver.save_screenshot("error_click_sign_in.png")
            print(f"An error occurred while clicking the 'SIGN IN' button: {e}")
            driver.quit()
            return

        # Wait until the "Continue" button appears and click it
        try:
            continue_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[text()='Continue']"))
            )
            continue_button.click()
            logging.info("Clicked the 'Continue' button.")
            print("Clicked the 'Continue' button.")
        except Exception as e:
            logging.error(f"An error occurred while clicking the 'Continue' button: {e}")
            driver.save_screenshot("error_click_continue.png")
            print(f"An error occurred while clicking the 'Continue' button: {e}")
            driver.quit()
            return

        # Navigate to the Player's Stats Page
        try:
            driver.get(stats_url)
            logging.info(f"Navigated to player's stats page: {stats_url}")
            print(f"Navigated to player's stats page: {stats_url}")

            # Allow the stats page to load
            time.sleep(5)  # Adjust based on your internet speed
        except Exception as e:
            logging.error(f"Error navigating to player's stats page: {e}")
            driver.save_screenshot("error_navigate_stats_page.png")
            print(f"Error navigating to player's stats page: {e}")
            driver.quit()
            return

        # Click the "Load More" button until all data is loaded
        try:
            click_load_more(driver, wait)
        except Exception as e:
            logging.error(f"Error while clicking 'Load More': {e}")
            driver.save_screenshot("error_load_more.png")
            print(f"Error while clicking 'Load More': {e}")

        # Scrape all pages (now all data is loaded)
        all_data = scrape_current_page(driver, wait, base_url)

        # Save the data to CSV
        save_to_csv(all_data)

    except Exception as main_e:
        logging.error(f"An unexpected error occurred: {main_e}")
        driver.save_screenshot("unexpected_error.png")
        print(f"An unexpected error occurred: {main_e}")
    finally:
        # Close the WebDriver
        driver.quit()
        logging.info("WebDriver closed.")
        print("WebDriver closed.")

if __name__ == "__main__":
    main()


apply filters

In [None]:
# --- Complete Revised Script with Filter Interaction ---

# Import necessary libraries
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import pandas as pd
import time
import logging
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Retrieve credentials
email = os.getenv('UTR_EMAIL')
password = os.getenv('UTR_PASSWORD')

# Verify credentials are loaded
if not email or not password:
    raise ValueError("Email or password not found in environment variables.")

# Define the is_float function
def is_float(value):
    """Check if the provided value can be converted to a float."""
    try:
        float(value)
        return True
    except ValueError:
        return False

# Configure logging
logging.basicConfig(
    filename='scraper.log',
    level=logging.INFO,
    format='%(asctime)s:%(levelname)s:%(message)s'
)

# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--start-maximized")  # Start maximized for better visibility

# Uncomment the next line to run Chrome in headless mode after successful debugging
# chrome_options.add_argument("--headless")  

# Initialize the WebDriver
driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=chrome_options
)

# Define WebDriverWait
wait = WebDriverWait(driver, 20)  # 20 seconds timeout

# Define the login URL and the player's stats URL
login_url = "https://app.utrsports.net/login"
stats_url = "https://app.utrsports.net/search?sportTypes=tennis&startDate=11/21/2024&utrMin=1&utrMax=16&utrType=verified&utrTeamType=singles&utrFitPosition=6&type=players&lat=37.2358078&lng=-121.9623751"

def apply_filter(driver, wait, filter_name, option):
    """
    Applies a filter on the UTR Sports website.

    :param driver: Selenium WebDriver instance.
    :param wait: WebDriverWait instance.
    :param filter_name: The name of the filter to apply (e.g., 'Gender').
    :param option: The option to select within the filter (e.g., 'Male').
    """
    try:
        # Locate the filter button by its visible text
        filter_button = wait.until(
            EC.element_to_be_clickable(
                (By.XPATH, f"//button[contains(text(), '{filter_name}')]")
            )
        )
        filter_button.click()
        logging.info(f"Clicked the '{filter_name}' filter button.")
        print(f"Clicked the '{filter_name}' filter button.")

        # Wait for the filter options to appear
        # Using a more precise XPath to locate the option button directly
        option_button = wait.until(
            EC.element_to_be_clickable(
                (By.XPATH, f"//button[text()='{option}']")
            )
        )
        option_button.click()
        logging.info(f"Selected '{option}' from '{filter_name}' filter.")
        print(f"Selected '{option}' from '{filter_name}' filter.")

        # Optional: Wait for the page to refresh/update after applying the filter
        time.sleep(3)  # Adjust based on your internet speed and website response

    except Exception as e:
        logging.error(f"Failed to apply filter '{filter_name}' with option '{option}': {e}")
        print(f"Failed to apply filter '{filter_name}' with option '{option}': {e}")
        driver.save_screenshot(f"error_apply_filter_{filter_name}_{option}.png")


def click_load_more(driver, wait):
    """Click the 'Load More' button until it's no longer present."""
    while True:
        try:
            # Locate the 'Load More' button
            load_more_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[span[text()='Load More']]"))
            )
            # Scroll to the 'Load More' button to ensure it's in view
            driver.execute_script("arguments[0].scrollIntoView(true);", load_more_button)
            # Click the 'Load More' button
            load_more_button.click()
            logging.info("Clicked the 'Load More' button.")
            print("Clicked the 'Load More' button.")
            # Wait for new content to load
            time.sleep(3)  # Adjust based on your internet speed
        except Exception as e:
            logging.info("No more 'Load More' buttons to click or button not found.")
            print("No more 'Load More' buttons to click or button not found.")
            break

def scrape_current_page(driver, wait, base_url):
    """Scrape player data from the current page."""
    data = []
    try:
        # Locate all player cards
        player_cards = driver.find_elements(By.CLASS_NAME, "search__cardContainer__1Z9Ee")  # Ensure this is the correct class
        logging.info(f"Found {len(player_cards)} player cards on the current page.")
        print(f"Found {len(player_cards)} player cards on the current page.")

        for idx, card in enumerate(player_cards, start=1):
            try:
                # Extract player name within the current card
                try:
                    name_element = card.find_element(By.CSS_SELECTOR, "div.name.show-ellipsis")
                    player_name = name_element.text.strip()
                except Exception as e:
                    player_name = "N/A"
                    logging.warning(f"Player name not found for player {idx}: {e}")
                    print(f"Player name not found for player {idx}: {e}")

                # Extract player location within the current card
                try:
                    location_element = card.find_element(By.CSS_SELECTOR, "div.place.show-ellipsis")
                    location = location_element.text.strip()
                except Exception as e:
                    location = "N/A"
                    logging.warning(f"Location not found for player {idx}: {e}")
                    print(f"Location not found for player {idx}: {e}")

                # Extract UTR values within the current card
                try:
                    utr_elements = card.find_elements(By.XPATH, ".//div[@title='Rated']")
                    singles_utr = "N/A"
                    doubles_utr = "N/A"

                    if len(utr_elements) >= 2:
                        # Extract Singles UTR
                        singles_utr_text = utr_elements[0].text.strip()
                        singles_utr = singles_utr_text.split()[0]  # Assuming format "16.25 Verified"
                        if not is_float(singles_utr):
                            singles_utr = "N/A"
                            logging.warning(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                            print(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")

                        # Extract Doubles UTR
                        doubles_utr_text = utr_elements[1].text.strip()
                        doubles_utr = doubles_utr_text.split()[0]  # Assuming format "15.26 Verified"
                        if not is_float(doubles_utr):
                            doubles_utr = "N/A"
                            logging.warning(f"Invalid Doubles UTR value for player {idx}: {doubles_utr_text}")
                            print(f"Invalid Doubles UTR value for player {idx}: {doubles_utr_text}")
                    elif len(utr_elements) == 1:
                        # Only Singles UTR found
                        singles_utr_text = utr_elements[0].text.strip()
                        singles_utr = singles_utr_text.split()[0]
                        if not is_float(singles_utr):
                            singles_utr = "N/A"
                            logging.warning(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                            print(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                    else:
                        logging.warning(f"No UTR values found for player {idx}.")
                        print(f"No UTR values found for player {idx}.")

                except Exception as e:
                    singles_utr = "N/A"
                    doubles_utr = "N/A"
                    logging.warning(f"UTR not found for player {idx}: {e}")
                    print(f"UTR not found for player {idx}: {e}")

                # Extract Profile Link within the current card
                try:
                    profile_link_element = card.find_element(By.TAG_NAME, "a")
                    relative_profile_link = profile_link_element.get_attribute("href")
                    # Ensure the link is absolute
                    if relative_profile_link.startswith("/"):
                        profile_link = base_url + relative_profile_link
                    else:
                        profile_link = relative_profile_link
                except Exception as e:
                    profile_link = "N/A"
                    logging.warning(f"Profile link not found for player {idx}: {e}")
                    print(f"Profile link not found for player {idx}: {e}")

                # Compile the data
                player_data = {
                    'Player Name': player_name,
                    'Location': location,
                    'Singles UTR': singles_utr,
                    'Doubles UTR': doubles_utr,
                    'Profile Link': profile_link
                }

                data.append(player_data)
                logging.info(f"Scraped player {idx}: {player_data}")
                print(f"Scraped player {idx}: {player_data}")

            except Exception as card_e:
                logging.error(f"Error scraping player card {idx}: {card_e}")
                driver.save_screenshot(f"error_scraping_player_{idx}.png")
                print(f"Error scraping player card {idx}: {card_e}")
                continue  # Proceed to next card

    except Exception as e:
        logging.error(f"Error locating player cards: {e}")
        driver.save_screenshot("error_locating_player_cards.png")
        print(f"Error locating player cards: {e}")

    return data

def scrape_all_pages(driver, wait, base_url):
    """Scrape data from all paginated pages."""
    all_data = []
    page = 1

    while True:
        logging.info(f"Scraping page {page}.")
        print(f"Scraping page {page}.")

        # Scrape data from the current page
        page_data = scrape_current_page(driver, wait, base_url)
        all_data.extend(page_data)

        # Attempt to locate the "Load More" button
        try:
            click_load_more(driver, wait)
            page += 1
        except Exception as e:
            logging.error(f"Error while clicking 'Load More': {e}")
            driver.save_screenshot("error_load_more.png")
            print(f"Error while clicking 'Load More': {e}")
            break

    return all_data

def save_to_csv(data, filename='player_statistics.csv'):
    """Save the scraped data to a CSV file."""
    try:
        df = pd.DataFrame(data)
        df.to_csv(filename, index=False)
        logging.info(f"Data saved to {filename}.")
        print(f"Data saved to {filename}.")
    except Exception as e:
        logging.error(f"Error saving data to CSV: {e}")
        print(f"Error saving data to CSV: {e}")

def main():
    """Main function to execute the scraping workflow."""
    base_url = "https://app.utrsports.net"  # Define the base URL

    try:
        # Navigate to the login page
        driver.get(login_url)
        logging.info("Navigated to the login page.")
        print("Navigated to the login page.")

        # Allow the page to load completely
        time.sleep(3)  # Adjust based on your internet speed

        # Locate the email input field
        try:
            email_field = wait.until(EC.presence_of_element_located((By.ID, "emailInput")))  # Replace with actual ID
            email_field.clear()
            email_field.send_keys(email)
            logging.info("Entered email.")
            print("Entered email.")
        except Exception as e:
            logging.error(f"Email input field not found: {e}")
            driver.save_screenshot("error_email_field.png")
            print(f"Email input field not found: {e}")
            driver.quit()
            return

        # Locate the password input field
        try:
            password_field = driver.find_element(By.ID, "passwordInput")  # Replace with actual ID
            password_field.clear()
            password_field.send_keys(password)
            logging.info("Entered password.")
            print("Entered password.")
        except Exception as e:
            logging.error(f"Password input field not found: {e}")
            driver.save_screenshot("error_password_field.png")
            print(f"Password input field not found: {e}")
            driver.quit()
            return

        # Locate and click the login button
        try:
            sign_in_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[text()='SIGN IN']"))
            )
            driver.execute_script("arguments[0].scrollIntoView();", sign_in_button)
            sign_in_button.click()
            logging.info("Clicked the 'SIGN IN' button.")
            print("Clicked the 'SIGN IN' button.")
        except Exception as e:
            logging.error(f"An error occurred while clicking the 'SIGN IN' button: {e}")
            driver.save_screenshot("error_click_sign_in.png")
            print(f"An error occurred while clicking the 'SIGN IN' button: {e}")
            driver.quit()
            return

        # Wait until the "Continue" button appears and click it
        try:
            continue_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[text()='Continue']"))
            )
            continue_button.click()
            logging.info("Clicked the 'Continue' button.")
            print("Clicked the 'Continue' button.")
        except Exception as e:
            logging.error(f"An error occurred while clicking the 'Continue' button: {e}")
            driver.save_screenshot("error_click_continue.png")
            print(f"An error occurred while clicking the 'Continue' button: {e}")
            driver.quit()
            return

        # Navigate to the Player's Stats Page
        try:
            driver.get(stats_url)
            logging.info(f"Navigated to player's stats page: {stats_url}")
            print(f"Navigated to player's stats page: {stats_url}")

            # Allow the stats page to load
            time.sleep(5)  # Adjust based on your internet speed
        except Exception as e:
            logging.error(f"Error navigating to player's stats page: {e}")
            driver.save_screenshot("error_navigate_stats_page.png")
            print(f"Error navigating to player's stats page: {e}")
            driver.quit()
            return

        # Apply Filters (e.g., Gender: Male)
        try:
            apply_filter(driver, wait, filter_name="Gender", option="Male")
            # Add more filters as needed by calling apply_filter with different parameters
            # Example:
            # apply_filter(driver, wait, filter_name="Age", option="18-25")
        except Exception as e:
            logging.error(f"Error applying filters: {e}")
            driver.save_screenshot("error_apply_filters.png")
            print(f"Error applying filters: {e}")

        # Click the "Load More" button until all data is loaded
        try:
            click_load_more(driver, wait)
        except Exception as e:
            logging.error(f"Error while clicking 'Load More': {e}")
            driver.save_screenshot("error_load_more.png")
            print(f"Error while clicking 'Load More': {e}")

        # Scrape all pages (now all data is loaded)
        all_data = scrape_current_page(driver, wait, base_url)

        # Save the data to CSV
        save_to_csv(all_data)

    except Exception as main_e:
        logging.error(f"An unexpected error occurred: {main_e}")
        driver.save_screenshot("unexpected_error.png")
        print(f"An unexpected error occurred: {main_e}")
    finally:
        # Close the WebDriver
        driver.quit()
        logging.info("WebDriver closed.")
        print("WebDriver closed.")

if __name__ == "__main__":
    main()


adding radio button filters

In [None]:
# --- Complete Revised Script with "Gender" and "Segment" Filters ---

# Import necessary libraries
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import pandas as pd
import time
import logging
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Retrieve credentials
email = os.getenv('UTR_EMAIL')
password = os.getenv('UTR_PASSWORD')

# Verify credentials are loaded
if not email or not password:
    raise ValueError("Email or password not found in environment variables.")

# Define the is_float function
def is_float(value):
    """Check if the provided value can be converted to a float."""
    try:
        float(value)
        return True
    except ValueError:
        return False

# Configure logging
logging.basicConfig(
    filename='scraper.log',
    level=logging.INFO,
    format='%(asctime)s:%(levelname)s:%(message)s'
)

# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--start-maximized")  # Start maximized for better visibility

# Uncomment the next line to run Chrome in headless mode after successful debugging
# chrome_options.add_argument("--headless")  

# Initialize the WebDriver
driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=chrome_options
)

# Define WebDriverWait
wait = WebDriverWait(driver, 20)  # 20 seconds timeout

# Define the login URL and the player's stats URL
login_url = "https://app.utrsports.net/login"
stats_url = "https://app.utrsports.net/search?sportTypes=tennis&startDate=11/21/2024&utrMin=1&utrMax=16&utrType=verified&utrTeamType=singles&utrFitPosition=6&type=players&lat=37.2358078&lng=-121.9623751"

def apply_filter(driver, wait, filter_name, option):
    """
    Applies a button-based filter on the UTR Sports website.

    :param driver: Selenium WebDriver instance.
    :param wait: WebDriverWait instance.
    :param filter_name: The name of the filter to apply (e.g., 'Gender').
    :param option: The option to select within the filter (e.g., 'Male').
    """
    try:
        # Locate the filter button by its visible text
        filter_button = wait.until(
            EC.element_to_be_clickable(
                (By.XPATH, f"//button[contains(text(), '{filter_name}')]")
            )
        )
        filter_button.click()
        logging.info(f"Clicked the '{filter_name}' filter button.")
        print(f"Clicked the '{filter_name}' filter button.")

        # Wait for the filter options to appear
        # Assuming options are displayed as buttons within the filter dropdown/modal
        option_button = wait.until(
            EC.element_to_be_clickable(
                (By.XPATH, f"//button[text()='{option}']")
            )
        )
        option_button.click()
        logging.info(f"Selected '{option}' from '{filter_name}' filter.")
        print(f"Selected '{option}' from '{filter_name}' filter.")

        # Optional: Wait for the page to refresh/update after applying the filter
        time.sleep(3)  # Adjust based on your internet speed and website response

    except Exception as e:
        logging.error(f"Failed to apply filter '{filter_name}' with option '{option}': {e}")
        print(f"Failed to apply filter '{filter_name}' with option '{option}': {e}")
        driver.save_screenshot(f"error_apply_filter_{filter_name}_{option}.png")

def apply_radio_filter(driver, wait, filter_name, option):
    """
    Applies a radio button filter on the UTR Sports website.

    :param driver: Selenium WebDriver instance.
    :param wait: WebDriverWait instance.
    :param filter_name: The name of the filter to apply (e.g., 'Segment').
    :param option: The radio button option to select within the filter (e.g., 'Pro').
    """
    try:
        # Locate the filter button by its visible text
        filter_button = wait.until(
            EC.element_to_be_clickable(
                (By.XPATH, f"//button[contains(text(), '{filter_name}')]")
            )
        )
        filter_button.click()
        logging.info(f"Clicked the '{filter_name}' filter button.")
        print(f"Clicked the '{filter_name}' filter button.")

        # Wait for the filter modal or dropdown to appear
        wait.until(
            EC.visibility_of_element_located(
                (By.CLASS_NAME, "search__searchFilterModalBody__1EtSh")
            )
        )

        # Locate the radio button option by its label text
        option_label = wait.until(
            EC.element_to_be_clickable(
                (By.XPATH, f"//label[contains(text(), '{option}')]")
            )
        )
        option_label.click()
        logging.info(f"Selected '{option}' from '{filter_name}' filter.")
        print(f"Selected '{option}' from '{filter_name}' filter.")

        # Optional: Wait for the page to refresh/update after applying the filter
        time.sleep(3)  # Adjust based on your internet speed and website response

    except Exception as e:
        logging.error(f"Failed to apply radio filter '{filter_name}' with option '{option}': {e}")
        print(f"Failed to apply radio filter '{filter_name}' with option '{option}': {e}")
        driver.save_screenshot(f"error_apply_radio_filter_{filter_name}_{option}.png")

def click_load_more(driver, wait):
    """Click the 'Load More' button until it's no longer present."""
    while True:
        try:
            # Locate the 'Load More' button
            load_more_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[span[text()='Load More']]"))
            )
            # Scroll to the 'Load More' button to ensure it's in view
            driver.execute_script("arguments[0].scrollIntoView(true);", load_more_button)
            # Click the 'Load More' button
            load_more_button.click()
            logging.info("Clicked the 'Load More' button.")
            print("Clicked the 'Load More' button.")
            # Wait for new content to load
            time.sleep(3)  # Adjust based on your internet speed
        except Exception as e:
            logging.info("No more 'Load More' buttons to click or button not found.")
            print("No more 'Load More' buttons to click or button not found.")
            break

def scrape_current_page(driver, wait, base_url):
    """Scrape player data from the current page."""
    data = []
    try:
        # Locate all player cards
        player_cards = driver.find_elements(By.CLASS_NAME, "search__cardContainer__1Z9Ee")  # Ensure this is the correct class
        logging.info(f"Found {len(player_cards)} player cards on the current page.")
        print(f"Found {len(player_cards)} player cards on the current page.")

        for idx, card in enumerate(player_cards, start=1):
            try:
                # Extract player name within the current card
                try:
                    name_element = card.find_element(By.CSS_SELECTOR, "div.name.show-ellipsis")
                    player_name = name_element.text.strip()
                except Exception as e:
                    player_name = "N/A"
                    logging.warning(f"Player name not found for player {idx}: {e}")
                    print(f"Player name not found for player {idx}: {e}")

                # Extract player location within the current card
                try:
                    location_element = card.find_element(By.CSS_SELECTOR, "div.place.show-ellipsis")
                    location = location_element.text.strip()
                except Exception as e:
                    location = "N/A"
                    logging.warning(f"Location not found for player {idx}: {e}")
                    print(f"Location not found for player {idx}: {e}")

                # Extract UTR values within the current card
                try:
                    utr_elements = card.find_elements(By.XPATH, ".//div[@title='Rated']")
                    singles_utr = "N/A"
                    doubles_utr = "N/A"

                    if len(utr_elements) >= 2:
                        # Extract Singles UTR
                        singles_utr_text = utr_elements[0].text.strip()
                        singles_utr = singles_utr_text.split()[0]  # Assuming format "16.25 Verified"
                        if not is_float(singles_utr):
                            singles_utr = "N/A"
                            logging.warning(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                            print(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")

                        # Extract Doubles UTR
                        doubles_utr_text = utr_elements[1].text.strip()
                        doubles_utr = doubles_utr_text.split()[0]  # Assuming format "15.26 Verified"
                        if not is_float(doubles_utr):
                            doubles_utr = "N/A"
                            logging.warning(f"Invalid Doubles UTR value for player {idx}: {doubles_utr_text}")
                            print(f"Invalid Doubles UTR value for player {idx}: {doubles_utr_text}")
                    elif len(utr_elements) == 1:
                        # Only Singles UTR found
                        singles_utr_text = utr_elements[0].text.strip()
                        singles_utr = singles_utr_text.split()[0]
                        if not is_float(singles_utr):
                            singles_utr = "N/A"
                            logging.warning(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                            print(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                    else:
                        logging.warning(f"No UTR values found for player {idx}.")
                        print(f"No UTR values found for player {idx}.")

                except Exception as e:
                    singles_utr = "N/A"
                    doubles_utr = "N/A"
                    logging.warning(f"UTR not found for player {idx}: {e}")
                    print(f"UTR not found for player {idx}: {e}")

                # Extract Profile Link within the current card
                try:
                    profile_link_element = card.find_element(By.TAG_NAME, "a")
                    relative_profile_link = profile_link_element.get_attribute("href")
                    # Ensure the link is absolute
                    if relative_profile_link.startswith("/"):
                        profile_link = base_url + relative_profile_link
                    else:
                        profile_link = relative_profile_link
                except Exception as e:
                    profile_link = "N/A"
                    logging.warning(f"Profile link not found for player {idx}: {e}")
                    print(f"Profile link not found for player {idx}: {e}")

                # Compile the data
                player_data = {
                    'Player Name': player_name,
                    'Location': location,
                    'Singles UTR': singles_utr,
                    'Doubles UTR': doubles_utr,
                    'Profile Link': profile_link
                }

                data.append(player_data)
                logging.info(f"Scraped player {idx}: {player_data}")
                print(f"Scraped player {idx}: {player_data}")

            except Exception as card_e:
                logging.error(f"Error scraping player card {idx}: {card_e}")
                driver.save_screenshot(f"error_scraping_player_{idx}.png")
                print(f"Error scraping player card {idx}: {card_e}")
                continue  # Proceed to next card

    except Exception as e:
        logging.error(f"Error locating player cards: {e}")
        driver.save_screenshot("error_locating_player_cards.png")
        print(f"Error locating player cards: {e}")

    return data

def scrape_all_pages(driver, wait, base_url):
    """Scrape data from all paginated pages."""
    all_data = []
    page = 1

    while True:
        logging.info(f"Scraping page {page}.")
        print(f"Scraping page {page}.")

        # Scrape data from the current page
        page_data = scrape_current_page(driver, wait, base_url)
        all_data.extend(page_data)

        # Attempt to locate the "Load More" button
        try:
            click_load_more(driver, wait)
            page += 1
        except Exception as e:
            logging.error(f"Error while clicking 'Load More': {e}")
            driver.save_screenshot("error_load_more.png")
            print(f"Error while clicking 'Load More': {e}")
            break

    return all_data

def save_to_csv(data, filename='player_statistics.csv'):
    """Save the scraped data to a CSV file."""
    try:
        df = pd.DataFrame(data)
        df.to_csv(filename, index=False)
        logging.info(f"Data saved to {filename}.")
        print(f"Data saved to {filename}.")
    except Exception as e:
        logging.error(f"Error saving data to CSV: {e}")
        print(f"Error saving data to CSV: {e}")

def main():
    """Main function to execute the scraping workflow."""
    base_url = "https://app.utrsports.net"  # Define the base URL

    try:
        # Navigate to the login page
        driver.get(login_url)
        logging.info("Navigated to the login page.")
        print("Navigated to the login page.")

        # Allow the page to load completely
        time.sleep(3)  # Adjust based on your internet speed

        # Locate the email input field
        try:
            email_field = wait.until(EC.presence_of_element_located((By.ID, "emailInput")))  # Replace with actual ID
            email_field.clear()
            email_field.send_keys(email)
            logging.info("Entered email.")
            print("Entered email.")
        except Exception as e:
            logging.error(f"Email input field not found: {e}")
            driver.save_screenshot("error_email_field.png")
            print(f"Email input field not found: {e}")
            driver.quit()
            return

        # Locate the password input field
        try:
            password_field = driver.find_element(By.ID, "passwordInput")  # Replace with actual ID
            password_field.clear()
            password_field.send_keys(password)
            logging.info("Entered password.")
            print("Entered password.")
        except Exception as e:
            logging.error(f"Password input field not found: {e}")
            driver.save_screenshot("error_password_field.png")
            print(f"Password input field not found: {e}")
            driver.quit()
            return

        # Locate and click the login button
        try:
            sign_in_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[text()='SIGN IN']"))
            )
            driver.execute_script("arguments[0].scrollIntoView();", sign_in_button)
            sign_in_button.click()
            logging.info("Clicked the 'SIGN IN' button.")
            print("Clicked the 'SIGN IN' button.")
        except Exception as e:
            logging.error(f"An error occurred while clicking the 'SIGN IN' button: {e}")
            driver.save_screenshot("error_click_sign_in.png")
            print(f"An error occurred while clicking the 'SIGN IN' button: {e}")
            driver.quit()
            return

        # Wait until the "Continue" button appears and click it
        try:
            continue_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[text()='Continue']"))
            )
            continue_button.click()
            logging.info("Clicked the 'Continue' button.")
            print("Clicked the 'Continue' button.")
        except Exception as e:
            logging.error(f"An error occurred while clicking the 'Continue' button: {e}")
            driver.save_screenshot("error_click_continue.png")
            print(f"An error occurred while clicking the 'Continue' button: {e}")
            driver.quit()
            return

        # Navigate to the Player's Stats Page
        try:
            driver.get(stats_url)
            logging.info(f"Navigated to player's stats page: {stats_url}")
            print(f"Navigated to player's stats page: {stats_url}")

            # Allow the stats page to load
            time.sleep(5)  # Adjust based on your internet speed
        except Exception as e:
            logging.error(f"Error navigating to player's stats page: {e}")
            driver.save_screenshot("error_navigate_stats_page.png")
            print(f"Error navigating to player's stats page: {e}")
            driver.quit()
            return

        # Apply Filters

        # 1. Apply Gender Filter (Existing Functionality)
        try:
            apply_filter(driver, wait, filter_name="Gender", option="Male")
            # Add more button-based filters as needed
        except Exception as e:
            logging.error(f"Error applying 'Gender' filter: {e}")
            driver.save_screenshot("error_apply_gender_filter.png")
            print(f"Error applying 'Gender' filter: {e}")

        # 2. Apply Segment Filter (New Functionality)
        try:
            apply_radio_filter(driver, wait, filter_name="Segment", option="Pro")
            # You can change 'Pro' to any other segment like 'College', 'High School', etc.
        except Exception as e:
            logging.error(f"Error applying 'Segment' filter: {e}")
            driver.save_screenshot("error_apply_segment_filter.png")
            print(f"Error applying 'Segment' filter: {e}")

        # Click the "Load More" button until all data is loaded
        try:
            click_load_more(driver, wait)
        except Exception as e:
            logging.error(f"Error while clicking 'Load More': {e}")
            driver.save_screenshot("error_load_more.png")
            print(f"Error while clicking 'Load More': {e}")

        # Scrape all pages (now all data is loaded)
        all_data = scrape_current_page(driver, wait, base_url)

        # Save the data to CSV
        save_to_csv(all_data)

    except Exception as main_e:
        logging.error(f"An unexpected error occurred: {main_e}")
        driver.save_screenshot("unexpected_error.png")
        print(f"An unexpected error occurred: {main_e}")
    finally:
        # Close the WebDriver
        driver.quit()
        logging.info("WebDriver closed.")
        print("WebDriver closed.")

if __name__ == "__main__":
    main()
