In [1]:
# Selenium imports
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys  # Importing Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

# Other imports
from dotenv import load_dotenv
import os
import time
import logging
from bs4 import BeautifulSoup
import pandas as pd


In [2]:
from dotenv import load_dotenv
import os

# Load variables from .env file
load_dotenv()

# Retrieve credentials
email = os.getenv('UTR_EMAIL')
password = os.getenv('UTR_PASSWORD')

# Verify that credentials are loaded
print(f"Email: {email}")
print(f"Password: {'*' * len(password) if password else 'Not Found'}")


Email: neil.treat@gmail.com
Password: *************


scrape table

In [37]:

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

from dotenv import load_dotenv
import os
import time
import logging
from bs4 import BeautifulSoup
import pandas as pd

# Load environment variables
load_dotenv()

# Retrieve credentials
email = os.getenv('UTR_EMAIL')
password = os.getenv('UTR_PASSWORD')

# Configure logging
logging.basicConfig(filename='scraper.log', level=logging.INFO,
                    format='%(asctime)s:%(levelname)s:%(message)s')

def setup_driver():
    chrome_options = Options()
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.add_argument("--start-maximized")
    
    # Uncomment the next line to run Chrome in headless mode after successful debugging
    #chrome_options.add_argument("--headless")  
    
    return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

def perform_login(driver, wait):
    driver.get("https://app.utrsports.net/login")
    logging.info("Navigated to login page")
    
    # Wait for email field and enter credentials
    email_field = wait.until(EC.presence_of_element_located((By.ID, "emailInput")))
    email_field.clear()
    email_field.send_keys(email)
    
    password_field = driver.find_element(By.ID, "passwordInput")
    password_field.clear()
    password_field.send_keys(password)
    
    # Click sign in
    sign_in_button = wait.until(
        EC.element_to_be_clickable((By.XPATH, "//button[text()='SIGN IN']"))
    )
    sign_in_button.click()
    logging.info("Clicked sign in button")
    
    # Wait for and click continue button
    continue_button = wait.until(
        EC.element_to_be_clickable((By.XPATH, "//button[text()='Continue']"))
    )
    continue_button.click()
    logging.info("Login completed successfully")

def navigate_to_stats(driver, wait, stats_url):
    driver.get(stats_url)
    logging.info(f"Navigated to stats page: {stats_url}")
    # Wait for page load
    time.sleep(5)

def find_show_all_link(driver, wait):
    """Try multiple strategies to find the 'Show all' link"""
    # List of possible XPath and CSS selectors to try
    selectors = [
        (By.XPATH, "//a[text()='Show all']"),
        (By.XPATH, "//a[contains(text(), 'Show all')]"),
        (By.XPATH, "//div[contains(@class, 'mt32')]//a[text()='Show all']"),
        (By.CSS_SELECTOR, "a[href*='show-all']"),  # If the link contains 'show-all' in href
        (By.LINK_TEXT, "Show all"),
        (By.PARTIAL_LINK_TEXT, "Show all")
    ]
    
    # Try each selector
    for by, selector in selectors:
        try:
            elements = driver.find_elements(by, selector)
            if elements:
                # Print information about found elements
                print(f"Found {len(elements)} elements with selector {selector}")
                for idx, elem in enumerate(elements):
                    try:
                        print(f"Element {idx + 1}:")
                        print(f"  Text: {elem.text}")
                        print(f"  Is displayed: {elem.is_displayed()}")
                        print(f"  Location: {elem.location}")
                        print(f"  HTML: {elem.get_attribute('outerHTML')}")
                    except:
                        continue
                return elements
        except Exception as e:
            continue
    
    return []

def click_show_all(driver, wait):
    # Scroll to bottom to ensure all content is loaded
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)
    
    # Find the Show all link
    show_all_elements = find_show_all_link(driver, wait)
    
    if not show_all_elements:
        # If no elements found, take a screenshot and log the page source
        driver.save_screenshot("no_show_all_found.png")
        with open("page_source.html", "w", encoding="utf-8") as f:
            f.write(driver.page_source)
        raise Exception("No 'Show all' link found. Screenshot and page source saved.")
    
    # Try to click the last "Show all" link
    show_all_link = show_all_elements[-1]
    
    # Try multiple click methods
    click_methods = [
        lambda: show_all_link.click(),  # Regular click
        lambda: ActionChains(driver).move_to_element(show_all_link).click().perform(),  # Action chains click
        lambda: driver.execute_script("arguments[0].click();", show_all_link)  # JavaScript click
    ]
    
    for click_method in click_methods:
        try:
            # Scroll the element into view
            driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", show_all_link)
            time.sleep(1)
            
            # Try to click
            click_method()
            logging.info("Successfully clicked 'Show all' link")
            time.sleep(3)  # Wait for content to load
            return True
        except Exception as e:
            logging.warning(f"Click method failed: {str(e)}")
            continue
    
    raise Exception("Failed to click 'Show all' link with all methods")

import pandas as pd
from bs4 import BeautifulSoup

def extract_utr_data(html_content):
    # Create BeautifulSoup object
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Find all history items
    history_items = soup.find_all('div', class_='newStatsTabContent__historyItem__1Nb0C')
    
    # Lists to store data
    dates = []
    ratings = []
    
    # Extract data from each history item
    for item in history_items:
        # Extract date
        date = item.find('div', class_='newStatsTabContent__historyItemDate__jFJyD').text.strip()
        
        # Extract rating
        rating = item.find('div', class_='newStatsTabContent__historyItemRating__GQUXw').text.strip()
        
        # Append to lists
        dates.append(date)
        ratings.append(float(rating))
    
    # Create DataFrame
    df = pd.DataFrame({
        'Date': pd.to_datetime(dates),
        'UTR_Rating': ratings
    })
    
    # Sort by date
    df = df.sort_values('Date')
    
    return df


def main():
    driver = None
    try:
        driver = setup_driver()
        wait = WebDriverWait(driver, 20)
        stats_url = "https://app.utrsports.net/profiles/247320?t=6"
        
        perform_login(driver, wait)
        navigate_to_stats(driver, wait, stats_url)
        
        # Take a screenshot before attempting to click Show all
        driver.save_screenshot("before_show_all.png")
        
        # Get the initial page source to check if we need to click "Show all"
        initial_html = driver.page_source
        initial_soup = BeautifulSoup(initial_html, 'html.parser')
        show_all_link = initial_soup.find('a', string='Show all')
        
        if show_all_link:
            click_show_all(driver, wait)
            # Wait for new content to load
            time.sleep(3)
        
        # Get the final page source after clicking "Show all" (if needed)
        html_content = driver.page_source
        
        # Create the DataFrame
        df = extract_utr_data(html_content)

        # Basic statistics
        stats = {
            'Latest Rating': df['UTR_Rating'].iloc[-1],
            'Highest Rating': df['UTR_Rating'].max(),
            'Lowest Rating': df['UTR_Rating'].min(),
            'Average Rating': df['UTR_Rating'].mean(),
            'Total Records': len(df),
            'Date Range': f"{df['Date'].min().strftime('%Y-%m-%d')} to {df['Date'].max().strftime('%Y-%m-%d')}"
        }

        # Print some basic statistics
        print("\nUTR Statistics:")
        print("-" * 50)
        for key, value in stats.items():
            if isinstance(value, float):
                print(f"{key}: {value:.2f}")
            else:
                print(f"{key}: {value}")

        # Save to CSV
        output_file = 'utr_history.csv'
        df.to_csv(output_file, index=False)
        print(f"\nData saved to {output_file}")
        
        # Save raw HTML for debugging if needed
        with open("raw_data.html", "w", encoding="utf-8") as f:
            f.write(html_content)
        
    except Exception as e:
        logging.error(f"An error occurred: {e}")
        if driver:
            driver.save_screenshot(f"error_{int(time.time())}.png")
            # Save the page source when error occurs
            with open(f"error_page_{int(time.time())}.html", "w", encoding="utf-8") as f:
                f.write(driver.page_source)
        raise e
    finally:
        if driver:
            driver.quit()

if __name__ == "__main__":
    main()

TimeoutException: Message: 
Stacktrace:
0   chromedriver                        0x000000010357bac4 cxxbridge1$str$ptr + 3651580
1   chromedriver                        0x0000000103574314 cxxbridge1$str$ptr + 3620940
2   chromedriver                        0x0000000102fdc4b4 cxxbridge1$string$len + 89224
3   chromedriver                        0x0000000103020898 cxxbridge1$string$len + 368748
4   chromedriver                        0x000000010305a0fc cxxbridge1$string$len + 604368
5   chromedriver                        0x00000001030150b0 cxxbridge1$string$len + 321668
6   chromedriver                        0x0000000103015d00 cxxbridge1$string$len + 324820
7   chromedriver                        0x0000000103546e08 cxxbridge1$str$ptr + 3435328
8   chromedriver                        0x000000010354a120 cxxbridge1$str$ptr + 3448408
9   chromedriver                        0x000000010352e17c cxxbridge1$str$ptr + 3333812
10  chromedriver                        0x000000010354a9e0 cxxbridge1$str$ptr + 3450648
11  chromedriver                        0x000000010351f988 cxxbridge1$str$ptr + 3274432
12  chromedriver                        0x00000001035650f4 cxxbridge1$str$ptr + 3558956
13  chromedriver                        0x0000000103565270 cxxbridge1$str$ptr + 3559336
14  chromedriver                        0x0000000103573f88 cxxbridge1$str$ptr + 3620032
15  libsystem_pthread.dylib             0x000000019fa6df94 _pthread_start + 136
16  libsystem_pthread.dylib             0x000000019fa68d34 thread_start + 8


now need to integrate player search

In [5]:
# --- Complete Revised Script ---

# Import necessary libraries
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import pandas as pd
import time
import logging
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Retrieve credentials
email = os.getenv('UTR_EMAIL')
password = os.getenv('UTR_PASSWORD')

# Verify credentials are loaded
if not email or not password:
    raise ValueError("Email or password not found in environment variables.")

# Define the is_float function
def is_float(value):
    """Check if the provided value can be converted to a float."""
    try:
        float(value)
        return True
    except ValueError:
        return False

# Configure logging
logging.basicConfig(
    filename='scraper.log',
    level=logging.INFO,
    format='%(asctime)s:%(levelname)s:%(message)s'
)

# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--start-maximized")  # Start maximized for better visibility

# Uncomment the next line to run Chrome in headless mode after successful debugging
# chrome_options.add_argument("--headless")  

# Initialize the WebDriver
driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=chrome_options
)

# Define WebDriverWait
wait = WebDriverWait(driver, 20)  # 20 seconds timeout

# Define the login URL and the player's stats URL
login_url = "https://app.utrsports.net/login"
stats_url = "https://app.utrsports.net/search?sportTypes=tennis&startDate=11/21/2024&utrMin=1&utrMax=16&utrType=verified&utrTeamType=singles&utrFitPosition=6&type=players&lat=37.2358078&lng=-121.9623751"

def scrape_current_page(driver, wait, base_url):
    """Scrape player data from the current page."""
    data = []
    try:
        # Locate all player cards
        player_cards = driver.find_elements(By.CLASS_NAME, "search__cardContainer__1Z9Ee")  # Ensure this is the correct class
        logging.info(f"Found {len(player_cards)} player cards on the current page.")
        print(f"Found {len(player_cards)} player cards on the current page.")

        for idx, card in enumerate(player_cards, start=1):
            try:
                # Extract player name within the current card
                try:
                    name_element = card.find_element(By.CSS_SELECTOR, "div.name.show-ellipsis")
                    player_name = name_element.text.strip()
                except Exception as e:
                    player_name = "N/A"
                    logging.warning(f"Player name not found for player {idx}: {e}")
                    print(f"Player name not found for player {idx}: {e}")

                # Extract player location within the current card
                try:
                    location_element = card.find_element(By.CSS_SELECTOR, "div.place.show-ellipsis")
                    location = location_element.text.strip()
                except Exception as e:
                    location = "N/A"
                    logging.warning(f"Location not found for player {idx}: {e}")
                    print(f"Location not found for player {idx}: {e}")

                # Extract UTR values within the current card
                try:
                    utr_elements = card.find_elements(By.XPATH, ".//div[@title='Rated']")
                    singles_utr = "N/A"
                    doubles_utr = "N/A"

                    if len(utr_elements) >= 2:
                        # Extract Singles UTR
                        singles_utr_text = utr_elements[0].text.strip()
                        singles_utr = singles_utr_text.split()[0]  # Assuming format "16.25 Verified"
                        if not is_float(singles_utr):
                            singles_utr = "N/A"
                            logging.warning(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                            print(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")

                        # Extract Doubles UTR
                        doubles_utr_text = utr_elements[1].text.strip()
                        doubles_utr = doubles_utr_text.split()[0]  # Assuming format "15.26 Verified"
                        if not is_float(doubles_utr):
                            doubles_utr = "N/A"
                            logging.warning(f"Invalid Doubles UTR value for player {idx}: {doubles_utr_text}")
                            print(f"Invalid Doubles UTR value for player {idx}: {doubles_utr_text}")
                    elif len(utr_elements) == 1:
                        # Only Singles UTR found
                        singles_utr_text = utr_elements[0].text.strip()
                        singles_utr = singles_utr_text.split()[0]
                        if not is_float(singles_utr):
                            singles_utr = "N/A"
                            logging.warning(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                            print(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                    else:
                        logging.warning(f"No UTR values found for player {idx}.")
                        print(f"No UTR values found for player {idx}.")

                except Exception as e:
                    singles_utr = "N/A"
                    doubles_utr = "N/A"
                    logging.warning(f"UTR not found for player {idx}: {e}")
                    print(f"UTR not found for player {idx}: {e}")

                # Extract Profile Link within the current card
                try:
                    profile_link_element = card.find_element(By.TAG_NAME, "a")
                    relative_profile_link = profile_link_element.get_attribute("href")
                    # Ensure the link is absolute
                    if relative_profile_link.startswith("/"):
                        profile_link = base_url + relative_profile_link
                    else:
                        profile_link = relative_profile_link
                except Exception as e:
                    profile_link = "N/A"
                    logging.warning(f"Profile link not found for player {idx}: {e}")
                    print(f"Profile link not found for player {idx}: {e}")

                # Compile the data
                player_data = {
                    'Player Name': player_name,
                    'Location': location,
                    'Singles UTR': singles_utr,
                    'Doubles UTR': doubles_utr,
                    'Profile Link': profile_link
                }

                data.append(player_data)
                logging.info(f"Scraped player {idx}: {player_data}")
                print(f"Scraped player {idx}: {player_data}")

            except Exception as card_e:
                logging.error(f"Error scraping player card {idx}: {card_e}")
                driver.save_screenshot(f"error_scraping_player_{idx}.png")
                print(f"Error scraping player card {idx}: {card_e}")
                continue  # Proceed to next card

    except Exception as e:
        logging.error(f"Error locating player cards: {e}")
        driver.save_screenshot("error_locating_player_cards.png")
        print(f"Error locating player cards: {e}")

    return data

def scrape_all_pages(driver, wait, base_url):
    """Scrape data from all paginated pages."""
    all_data = []
    page = 1

    while True:
        logging.info(f"Scraping page {page}.")
        print(f"Scraping page {page}.")

        # Scrape data from the current page
        page_data = scrape_current_page(driver, wait, base_url)
        all_data.extend(page_data)

        # Check if "Next" button is present and clickable
        try:
            next_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[text()='Next']"))  # Replace with actual XPath
            )
            next_button.click()
            logging.info("Clicked the 'Next' button.")
            print("Clicked the 'Next' button.")
            page += 1
            time.sleep(3)  # Wait for the next page to load
        except Exception as e:
            logging.info("No more pages to scrape or 'Next' button not found.")
            print("No more pages to scrape or 'Next' button not found.")
            break

    return all_data

def save_to_csv(data, filename='player_statistics.csv'):
    """Save the scraped data to a CSV file."""
    try:
        df = pd.DataFrame(data)
        df.to_csv(filename, index=False)
        logging.info(f"Data saved to {filename}.")
        print(f"Data saved to {filename}.")
    except Exception as e:
        logging.error(f"Error saving data to CSV: {e}")
        print(f"Error saving data to CSV: {e}")

def main():
    """Main function to execute the scraping workflow."""
    base_url = "https://app.utrsports.net"  # Define the base URL

    try:
        # Navigate to the login page
        driver.get(login_url)
        logging.info("Navigated to the login page.")
        print("Navigated to the login page.")

        # Allow the page to load completely
        time.sleep(3)  # Adjust based on your internet speed

        # Locate the email input field
        try:
            email_field = wait.until(EC.presence_of_element_located((By.ID, "emailInput")))  # Replace with actual ID
            email_field.clear()
            email_field.send_keys(email)
            logging.info("Entered email.")
            print("Entered email.")
        except Exception as e:
            logging.error(f"Email input field not found: {e}")
            driver.save_screenshot("error_email_field.png")
            print(f"Email input field not found: {e}")
            driver.quit()
            return

        # Locate the password input field
        try:
            password_field = driver.find_element(By.ID, "passwordInput")  # Replace with actual ID
            password_field.clear()
            password_field.send_keys(password)
            logging.info("Entered password.")
            print("Entered password.")
        except Exception as e:
            logging.error(f"Password input field not found: {e}")
            driver.save_screenshot("error_password_field.png")
            print(f"Password input field not found: {e}")
            driver.quit()
            return

        # Locate and click the login button
        try:
            sign_in_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[text()='SIGN IN']"))
            )
            driver.execute_script("arguments[0].scrollIntoView();", sign_in_button)
            sign_in_button.click()
            logging.info("Clicked the 'SIGN IN' button.")
            print("Clicked the 'SIGN IN' button.")
        except Exception as e:
            logging.error(f"An error occurred while clicking the 'SIGN IN' button: {e}")
            driver.save_screenshot("error_click_sign_in.png")
            print(f"An error occurred while clicking the 'SIGN IN' button: {e}")
            driver.quit()
            return

        # Wait until the "Continue" button appears and click it
        try:
            continue_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[text()='Continue']"))
            )
            continue_button.click()
            logging.info("Clicked the 'Continue' button.")
            print("Clicked the 'Continue' button.")
        except Exception as e:
            logging.error(f"An error occurred while clicking the 'Continue' button: {e}")
            driver.save_screenshot("error_click_continue.png")
            print(f"An error occurred while clicking the 'Continue' button: {e}")
            driver.quit()
            return

        # Navigate to the Player's Stats Page
        try:
            driver.get(stats_url)
            logging.info(f"Navigated to player's stats page: {stats_url}")
            print(f"Navigated to player's stats page: {stats_url}")

            # Allow the stats page to load
            time.sleep(5)  # Adjust based on your internet speed
        except Exception as e:
            logging.error(f"Error navigating to player's stats page: {e}")
            driver.save_screenshot("error_navigate_stats_page.png")
            print(f"Error navigating to player's stats page: {e}")
            driver.quit()
            return

        # Scrape all pages (if paginated)
        all_data = scrape_all_pages(driver, wait, base_url)

        # Save the data to CSV
        save_to_csv(all_data)

    except Exception as main_e:
        logging.error(f"An unexpected error occurred: {main_e}")
        driver.save_screenshot("unexpected_error.png")
        print(f"An unexpected error occurred: {main_e}")
    finally:
        # Close the WebDriver
        driver.quit()
        logging.info("WebDriver closed.")
        print("WebDriver closed.")

if __name__ == "__main__":
    main()


Navigated to the login page.
Entered email.
Entered password.
Clicked the 'SIGN IN' button.
Clicked the 'Continue' button.
Navigated to player's stats page: https://app.utrsports.net/search?sportTypes=tennis&startDate=11/21/2024&utrMin=1&utrMax=16&utrType=verified&utrTeamType=singles&utrFitPosition=6&type=players&lat=37.2358078&lng=-121.9623751
Scraping page 1.
Found 40 player cards on the current page.
Scraped player 1: {'Player Name': 'Jannik Sinner', 'Location': 'M • Italy', 'Singles UTR': '16.25', 'Doubles UTR': '15.26', 'Profile Link': 'https://app.utrsports.net/profiles/247320'}
Scraped player 2: {'Player Name': 'Carlos Alcaraz', 'Location': 'M • Spain', 'Singles UTR': '16.21', 'Doubles UTR': '14.91', 'Profile Link': 'https://app.utrsports.net/profiles/3569175'}
UTR not found for player 3: list index out of range
Scraped player 3: {'Player Name': 'Novak Djokovic', 'Location': 'M • Serbia', 'Singles UTR': 'N/A', 'Doubles UTR': 'N/A', 'Profile Link': 'https://app.utrsports.net/prof

In [21]:
# --- Complete Revised Script ---

# Import necessary libraries
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import pandas as pd
import time
import logging
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Retrieve credentials
email = os.getenv('UTR_EMAIL')
password = os.getenv('UTR_PASSWORD')

# Verify credentials are loaded
if not email or not password:
    raise ValueError("Email or password not found in environment variables.")

# Define the is_float function
def is_float(value):
    """Check if the provided value can be converted to a float."""
    try:
        float(value)
        return True
    except ValueError:
        return False

# Configure logging
logging.basicConfig(
    filename='scraper.log',
    level=logging.INFO,
    format='%(asctime)s:%(levelname)s:%(message)s'
)

# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--start-maximized")  # Start maximized for better visibility

# Uncomment the next line to run Chrome in headless mode after successful debugging
#chrome_options.add_argument("--headless")  

# Initialize the WebDriver
driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=chrome_options
)

# Define WebDriverWait
wait = WebDriverWait(driver, 20)  # 20 seconds timeout

# Define the login URL and the player's stats URL
login_url = "https://app.utrsports.net/login"
stats_url = "https://app.utrsports.net/search?sportTypes=tennis&startDate=11/21/2024&utrMin=1&utrMax=16&utrType=verified&utrTeamType=singles&utrFitPosition=6&type=players&lat=37.2358078&lng=-121.9623751"

def click_load_more(driver, wait):
    """Click the 'Load More' button until it's no longer present."""
    while True:
        try:
            # Locate the 'Load More' button
            load_more_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[span[text()='Load More']]"))
            )
            # Scroll to the 'Load More' button to ensure it's in view
            driver.execute_script("arguments[0].scrollIntoView(true);", load_more_button)
            # Click the 'Load More' button
            load_more_button.click()
            logging.info("Clicked the 'Load More' button.")
            print("Clicked the 'Load More' button.")
            # Wait for new content to load
            time.sleep(3)  # Adjust based on your internet speed
        except Exception as e:
            logging.info("No more 'Load More' buttons to click or button not found.")
            print("No more 'Load More' buttons to click or button not found.")
            break

def scrape_current_page(driver, wait, base_url):
    """Scrape player data from the current page."""
    data = []
    try:
        # Locate all player cards
        player_cards = driver.find_elements(By.CLASS_NAME, "search__cardContainer__1Z9Ee")  # Ensure this is the correct class
        logging.info(f"Found {len(player_cards)} player cards on the current page.")
        print(f"Found {len(player_cards)} player cards on the current page.")

        for idx, card in enumerate(player_cards, start=1):
            try:
                # Extract player name within the current card
                try:
                    name_element = card.find_element(By.CSS_SELECTOR, "div.name.show-ellipsis")
                    player_name = name_element.text.strip()
                except Exception as e:
                    player_name = "N/A"
                    logging.warning(f"Player name not found for player {idx}: {e}")
                    print(f"Player name not found for player {idx}: {e}")

                # Extract player location within the current card
                try:
                    location_element = card.find_element(By.CSS_SELECTOR, "div.place.show-ellipsis")
                    location = location_element.text.strip()
                except Exception as e:
                    location = "N/A"
                    logging.warning(f"Location not found for player {idx}: {e}")
                    print(f"Location not found for player {idx}: {e}")

                # Extract UTR values within the current card
                try:
                    utr_elements = card.find_elements(By.XPATH, ".//div[@title='Rated']")
                    singles_utr = "N/A"
                    doubles_utr = "N/A"

                    if len(utr_elements) >= 2:
                        # Extract Singles UTR
                        singles_utr_text = utr_elements[0].text.strip()
                        singles_utr = singles_utr_text.split()[0]  # Assuming format "16.25 Verified"
                        if not is_float(singles_utr):
                            singles_utr = "N/A"
                            logging.warning(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                            print(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")

                        # Extract Doubles UTR
                        doubles_utr_text = utr_elements[1].text.strip()
                        doubles_utr = doubles_utr_text.split()[0]  # Assuming format "15.26 Verified"
                        if not is_float(doubles_utr):
                            doubles_utr = "N/A"
                            logging.warning(f"Invalid Doubles UTR value for player {idx}: {doubles_utr_text}")
                            print(f"Invalid Doubles UTR value for player {idx}: {doubles_utr_text}")
                    elif len(utr_elements) == 1:
                        # Only Singles UTR found
                        singles_utr_text = utr_elements[0].text.strip()
                        singles_utr = singles_utr_text.split()[0]
                        if not is_float(singles_utr):
                            singles_utr = "N/A"
                            logging.warning(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                            print(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                    else:
                        logging.warning(f"No UTR values found for player {idx}.")
                        print(f"No UTR values found for player {idx}.")

                except Exception as e:
                    singles_utr = "N/A"
                    doubles_utr = "N/A"
                    logging.warning(f"UTR not found for player {idx}: {e}")
                    print(f"UTR not found for player {idx}: {e}")

                # Extract Profile Link within the current card
                try:
                    profile_link_element = card.find_element(By.TAG_NAME, "a")
                    relative_profile_link = profile_link_element.get_attribute("href")
                    # Ensure the link is absolute
                    if relative_profile_link.startswith("/"):
                        profile_link = base_url + relative_profile_link
                    else:
                        profile_link = relative_profile_link
                except Exception as e:
                    profile_link = "N/A"
                    logging.warning(f"Profile link not found for player {idx}: {e}")
                    print(f"Profile link not found for player {idx}: {e}")

                # Compile the data
                player_data = {
                    'Player Name': player_name,
                    'Location': location,
                    'Singles UTR': singles_utr,
                    'Doubles UTR': doubles_utr,
                    'Profile Link': profile_link
                }

                data.append(player_data)
                logging.info(f"Scraped player {idx}: {player_data}")
                print(f"Scraped player {idx}: {player_data}")

            except Exception as card_e:
                logging.error(f"Error scraping player card {idx}: {card_e}")
                driver.save_screenshot(f"error_scraping_player_{idx}.png")
                print(f"Error scraping player card {idx}: {card_e}")
                continue  # Proceed to next card

    except Exception as e:
        logging.error(f"Error locating player cards: {e}")
        driver.save_screenshot("error_locating_player_cards.png")
        print(f"Error locating player cards: {e}")

    return data

def scrape_all_pages(driver, wait, base_url):
    """Scrape data from all paginated pages."""
    all_data = []
    page = 1

    while True:
        logging.info(f"Scraping page {page}.")
        print(f"Scraping page {page}.")

        # Scrape data from the current page
        page_data = scrape_current_page(driver, wait, base_url)
        all_data.extend(page_data)

        # Attempt to locate the "Load More" button
        try:
            click_load_more(driver, wait)
            page += 1
        except Exception as e:
            logging.error(f"Error while clicking 'Load More': {e}")
            driver.save_screenshot("error_load_more.png")
            print(f"Error while clicking 'Load More': {e}")
            break

    return all_data

def save_to_csv(data, filename='player_statistics.csv'):
    """Save the scraped data to a CSV file."""
    try:
        df = pd.DataFrame(data)
        df.to_csv(filename, index=False)
        logging.info(f"Data saved to {filename}.")
        print(f"Data saved to {filename}.")
    except Exception as e:
        logging.error(f"Error saving data to CSV: {e}")
        print(f"Error saving data to CSV: {e}")

def main():
    """Main function to execute the scraping workflow."""
    base_url = "https://app.utrsports.net"  # Define the base URL

    try:
        # Navigate to the login page
        driver.get(login_url)
        logging.info("Navigated to the login page.")
        print("Navigated to the login page.")

        # Allow the page to load completely
        time.sleep(3)  # Adjust based on your internet speed

        # Locate the email input field
        try:
            email_field = wait.until(EC.presence_of_element_located((By.ID, "emailInput")))  # Replace with actual ID
            email_field.clear()
            email_field.send_keys(email)
            logging.info("Entered email.")
            print("Entered email.")
        except Exception as e:
            logging.error(f"Email input field not found: {e}")
            driver.save_screenshot("error_email_field.png")
            print(f"Email input field not found: {e}")
            driver.quit()
            return

        # Locate the password input field
        try:
            password_field = driver.find_element(By.ID, "passwordInput")  # Replace with actual ID
            password_field.clear()
            password_field.send_keys(password)
            logging.info("Entered password.")
            print("Entered password.")
        except Exception as e:
            logging.error(f"Password input field not found: {e}")
            driver.save_screenshot("error_password_field.png")
            print(f"Password input field not found: {e}")
            driver.quit()
            return

        # Locate and click the login button
        try:
            sign_in_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[text()='SIGN IN']"))
            )
            driver.execute_script("arguments[0].scrollIntoView();", sign_in_button)
            sign_in_button.click()
            logging.info("Clicked the 'SIGN IN' button.")
            print("Clicked the 'SIGN IN' button.")
        except Exception as e:
            logging.error(f"An error occurred while clicking the 'SIGN IN' button: {e}")
            driver.save_screenshot("error_click_sign_in.png")
            print(f"An error occurred while clicking the 'SIGN IN' button: {e}")
            driver.quit()
            return

        # Wait until the "Continue" button appears and click it
        try:
            continue_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[text()='Continue']"))
            )
            continue_button.click()
            logging.info("Clicked the 'Continue' button.")
            print("Clicked the 'Continue' button.")
        except Exception as e:
            logging.error(f"An error occurred while clicking the 'Continue' button: {e}")
            driver.save_screenshot("error_click_continue.png")
            print(f"An error occurred while clicking the 'Continue' button: {e}")
            driver.quit()
            return

        # Navigate to the Player's Stats Page
        try:
            driver.get(stats_url)
            logging.info(f"Navigated to player's stats page: {stats_url}")
            print(f"Navigated to player's stats page: {stats_url}")

            # Allow the stats page to load
            time.sleep(5)  # Adjust based on your internet speed
        except Exception as e:
            logging.error(f"Error navigating to player's stats page: {e}")
            driver.save_screenshot("error_navigate_stats_page.png")
            print(f"Error navigating to player's stats page: {e}")
            driver.quit()
            return

        # Click the "Load More" button until all data is loaded
        try:
            click_load_more(driver, wait)
        except Exception as e:
            logging.error(f"Error while clicking 'Load More': {e}")
            driver.save_screenshot("error_load_more.png")
            print(f"Error while clicking 'Load More': {e}")

        # Scrape all pages (now all data is loaded)
        all_data = scrape_current_page(driver, wait, base_url)

        # Save the data to CSV
        save_to_csv(all_data)

    except Exception as main_e:
        logging.error(f"An unexpected error occurred: {main_e}")
        driver.save_screenshot("unexpected_error.png")
        print(f"An unexpected error occurred: {main_e}")
    finally:
        # Close the WebDriver
        driver.quit()
        logging.info("WebDriver closed.")
        print("WebDriver closed.")

if __name__ == "__main__":
    main()


Navigated to the login page.
Entered email.
Entered password.
Clicked the 'SIGN IN' button.
An error occurred while clicking the 'Continue' button: Message: 
Stacktrace:
0   chromedriver                        0x000000010529bac4 cxxbridge1$str$ptr + 3651580
1   chromedriver                        0x0000000105294314 cxxbridge1$str$ptr + 3620940
2   chromedriver                        0x0000000104cfc4b4 cxxbridge1$string$len + 89224
3   chromedriver                        0x0000000104d40898 cxxbridge1$string$len + 368748
4   chromedriver                        0x0000000104d7a0fc cxxbridge1$string$len + 604368
5   chromedriver                        0x0000000104d350b0 cxxbridge1$string$len + 321668
6   chromedriver                        0x0000000104d35d00 cxxbridge1$string$len + 324820
7   chromedriver                        0x0000000105266e08 cxxbridge1$str$ptr + 3435328
8   chromedriver                        0x000000010526a120 cxxbridge1$str$ptr + 3448408
9   chromedriver             

apply filters

In [None]:
# --- Complete Revised Script with Filter Interaction ---

# Import necessary libraries
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import pandas as pd
import time
import logging
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Retrieve credentials
email = os.getenv('UTR_EMAIL')
password = os.getenv('UTR_PASSWORD')

# Verify credentials are loaded
if not email or not password:
    raise ValueError("Email or password not found in environment variables.")

# Define the is_float function
def is_float(value):
    """Check if the provided value can be converted to a float."""
    try:
        float(value)
        return True
    except ValueError:
        return False

# Configure logging
logging.basicConfig(
    filename='scraper.log',
    level=logging.INFO,
    format='%(asctime)s:%(levelname)s:%(message)s'
)

# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--start-maximized")  # Start maximized for better visibility

# Uncomment the next line to run Chrome in headless mode after successful debugging
# chrome_options.add_argument("--headless")  

# Initialize the WebDriver
driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=chrome_options
)

# Define WebDriverWait
wait = WebDriverWait(driver, 20)  # 20 seconds timeout

# Define the login URL and the player's stats URL
login_url = "https://app.utrsports.net/login"
stats_url = "https://app.utrsports.net/search?sportTypes=tennis&startDate=11/21/2024&utrMin=1&utrMax=16&utrType=verified&utrTeamType=singles&utrFitPosition=6&type=players&lat=37.2358078&lng=-121.9623751"

def apply_filter(driver, wait, filter_name, option):
    """
    Applies a filter on the UTR Sports website.

    :param driver: Selenium WebDriver instance.
    :param wait: WebDriverWait instance.
    :param filter_name: The name of the filter to apply (e.g., 'Gender').
    :param option: The option to select within the filter (e.g., 'Male').
    """
    try:
        # Locate the filter button by its visible text
        filter_button = wait.until(
            EC.element_to_be_clickable(
                (By.XPATH, f"//button[contains(text(), '{filter_name}')]")
            )
        )
        filter_button.click()
        logging.info(f"Clicked the '{filter_name}' filter button.")
        print(f"Clicked the '{filter_name}' filter button.")

        # Wait for the filter options to appear
        # Using a more precise XPath to locate the option button directly
        option_button = wait.until(
            EC.element_to_be_clickable(
                (By.XPATH, f"//button[text()='{option}']")
            )
        )
        option_button.click()
        logging.info(f"Selected '{option}' from '{filter_name}' filter.")
        print(f"Selected '{option}' from '{filter_name}' filter.")

        # Optional: Wait for the page to refresh/update after applying the filter
        time.sleep(3)  # Adjust based on your internet speed and website response

    except Exception as e:
        logging.error(f"Failed to apply filter '{filter_name}' with option '{option}': {e}")
        print(f"Failed to apply filter '{filter_name}' with option '{option}': {e}")
        driver.save_screenshot(f"error_apply_filter_{filter_name}_{option}.png")


def click_load_more(driver, wait):
    """Click the 'Load More' button until it's no longer present."""
    while True:
        try:
            # Locate the 'Load More' button
            load_more_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[span[text()='Load More']]"))
            )
            # Scroll to the 'Load More' button to ensure it's in view
            driver.execute_script("arguments[0].scrollIntoView(true);", load_more_button)
            # Click the 'Load More' button
            load_more_button.click()
            logging.info("Clicked the 'Load More' button.")
            print("Clicked the 'Load More' button.")
            # Wait for new content to load
            time.sleep(3)  # Adjust based on your internet speed
        except Exception as e:
            logging.info("No more 'Load More' buttons to click or button not found.")
            print("No more 'Load More' buttons to click or button not found.")
            break

def scrape_current_page(driver, wait, base_url):
    """Scrape player data from the current page."""
    data = []
    try:
        # Locate all player cards
        player_cards = driver.find_elements(By.CLASS_NAME, "search__cardContainer__1Z9Ee")  # Ensure this is the correct class
        logging.info(f"Found {len(player_cards)} player cards on the current page.")
        print(f"Found {len(player_cards)} player cards on the current page.")

        for idx, card in enumerate(player_cards, start=1):
            try:
                # Extract player name within the current card
                try:
                    name_element = card.find_element(By.CSS_SELECTOR, "div.name.show-ellipsis")
                    player_name = name_element.text.strip()
                except Exception as e:
                    player_name = "N/A"
                    logging.warning(f"Player name not found for player {idx}: {e}")
                    print(f"Player name not found for player {idx}: {e}")

                # Extract player location within the current card
                try:
                    location_element = card.find_element(By.CSS_SELECTOR, "div.place.show-ellipsis")
                    location = location_element.text.strip()
                except Exception as e:
                    location = "N/A"
                    logging.warning(f"Location not found for player {idx}: {e}")
                    print(f"Location not found for player {idx}: {e}")

                # Extract UTR values within the current card
                try:
                    utr_elements = card.find_elements(By.XPATH, ".//div[@title='Rated']")
                    singles_utr = "N/A"
                    doubles_utr = "N/A"

                    if len(utr_elements) >= 2:
                        # Extract Singles UTR
                        singles_utr_text = utr_elements[0].text.strip()
                        singles_utr = singles_utr_text.split()[0]  # Assuming format "16.25 Verified"
                        if not is_float(singles_utr):
                            singles_utr = "N/A"
                            logging.warning(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                            print(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")

                        # Extract Doubles UTR
                        doubles_utr_text = utr_elements[1].text.strip()
                        doubles_utr = doubles_utr_text.split()[0]  # Assuming format "15.26 Verified"
                        if not is_float(doubles_utr):
                            doubles_utr = "N/A"
                            logging.warning(f"Invalid Doubles UTR value for player {idx}: {doubles_utr_text}")
                            print(f"Invalid Doubles UTR value for player {idx}: {doubles_utr_text}")
                    elif len(utr_elements) == 1:
                        # Only Singles UTR found
                        singles_utr_text = utr_elements[0].text.strip()
                        singles_utr = singles_utr_text.split()[0]
                        if not is_float(singles_utr):
                            singles_utr = "N/A"
                            logging.warning(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                            print(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                    else:
                        logging.warning(f"No UTR values found for player {idx}.")
                        print(f"No UTR values found for player {idx}.")

                except Exception as e:
                    singles_utr = "N/A"
                    doubles_utr = "N/A"
                    logging.warning(f"UTR not found for player {idx}: {e}")
                    print(f"UTR not found for player {idx}: {e}")

                # Extract Profile Link within the current card
                try:
                    profile_link_element = card.find_element(By.TAG_NAME, "a")
                    relative_profile_link = profile_link_element.get_attribute("href")
                    # Ensure the link is absolute
                    if relative_profile_link.startswith("/"):
                        profile_link = base_url + relative_profile_link
                    else:
                        profile_link = relative_profile_link
                except Exception as e:
                    profile_link = "N/A"
                    logging.warning(f"Profile link not found for player {idx}: {e}")
                    print(f"Profile link not found for player {idx}: {e}")

                # Compile the data
                player_data = {
                    'Player Name': player_name,
                    'Location': location,
                    'Singles UTR': singles_utr,
                    'Doubles UTR': doubles_utr,
                    'Profile Link': profile_link
                }

                data.append(player_data)
                logging.info(f"Scraped player {idx}: {player_data}")
                print(f"Scraped player {idx}: {player_data}")

            except Exception as card_e:
                logging.error(f"Error scraping player card {idx}: {card_e}")
                driver.save_screenshot(f"error_scraping_player_{idx}.png")
                print(f"Error scraping player card {idx}: {card_e}")
                continue  # Proceed to next card

    except Exception as e:
        logging.error(f"Error locating player cards: {e}")
        driver.save_screenshot("error_locating_player_cards.png")
        print(f"Error locating player cards: {e}")

    return data

def scrape_all_pages(driver, wait, base_url):
    """Scrape data from all paginated pages."""
    all_data = []
    page = 1

    while True:
        logging.info(f"Scraping page {page}.")
        print(f"Scraping page {page}.")

        # Scrape data from the current page
        page_data = scrape_current_page(driver, wait, base_url)
        all_data.extend(page_data)

        # Attempt to locate the "Load More" button
        try:
            click_load_more(driver, wait)
            page += 1
        except Exception as e:
            logging.error(f"Error while clicking 'Load More': {e}")
            driver.save_screenshot("error_load_more.png")
            print(f"Error while clicking 'Load More': {e}")
            break

    return all_data

def save_to_csv(data, filename='player_statistics.csv'):
    """Save the scraped data to a CSV file."""
    try:
        df = pd.DataFrame(data)
        df.to_csv(filename, index=False)
        logging.info(f"Data saved to {filename}.")
        print(f"Data saved to {filename}.")
    except Exception as e:
        logging.error(f"Error saving data to CSV: {e}")
        print(f"Error saving data to CSV: {e}")

def perform_search(driver, wait, search_term):
    """
    Performs a search by targeting the exact HTML structure of the search elements.
    """
    try:
        # Wait for the page to be fully loaded
        time.sleep(5)
        
        logging.info("Attempting to find search wrapper...")
        print("Attempting to find search wrapper...")
        
        # First find the desktop search wrapper
        try:
            # Target the nav-search-wrapper first
            search_wrapper = wait.until(
                EC.presence_of_element_located((
                    By.CSS_SELECTOR, 
                    "div.nav-search-wrapper div.d-none.d-lg-block div.globalSearch__globalSearchWrapper__NglK2"
                ))
            )
            
            logging.info("Found desktop search wrapper")
            print("Found desktop search wrapper")
            
            # Find the search container within the wrapper
            search_container = search_wrapper.find_element(
                By.CSS_SELECTOR,
                "div.globalSearch__globalSearchContainer__3_82H"
            )
            
            # Find the input container
            input_container = search_container.find_element(
                By.CSS_SELECTOR,
                "div.globalSearch__globalSearchInputContainer__35Wld"
            )
            
            # Find the input element directly using its data-testid
            search_input = input_container.find_element(
                By.CSS_SELECTOR,
                "input[data-testid='globalSearch-searchInputButton-eUX6nl19']"
            )
            
            # Make sure search input is visible
            if not search_input.is_displayed():
                # Try to find and click the expander
                expander = search_wrapper.find_element(
                    By.CSS_SELECTOR,
                    "div.globalSearch__searchExpander__2jpEM"
                )
                driver.execute_script("arguments[0].click();", expander)
                time.sleep(2)
            
            # Clear and enter search term
            search_input.clear()
            search_input.send_keys(search_term)
            logging.info(f"Entered search term: {search_term}")
            print(f"Entered search term: {search_term}")
            
            time.sleep(2)
            
            # Wait for and click the "SEE ALL" link
            see_all_link = wait.until(
                EC.element_to_be_clickable((
                    By.XPATH,
                    "//a[contains(., 'SEE ALL')]"
                ))
            )
            
            see_all_link.click()
            logging.info("Clicked 'SEE ALL' link")
            print("Clicked 'SEE ALL' link")
            
        except Exception as e:
            logging.error(f"Error during search: {str(e)}")
            print(f"Error during search: {str(e)}")
            # Log the current state
            logging.info("Current page state:")
            logging.info(driver.page_source)
            driver.save_screenshot("search_error.png")
            raise
            
    except Exception as e:
        logging.error(f"Error in perform_search: {str(e)}")
        print(f"Error in perform_search: {str(e)}")
        driver.save_screenshot("search_error_final.png")
        raise

def main():
    """Main function to execute the scraping workflow."""
    base_url = "https://app.utrsports.net"  # Define the base URL

    try:
        # Navigate to the login page
        driver.get(login_url)
        logging.info("Navigated to the login page.")
        print("Navigated to the login page.")

        # Allow the page to load completely
        time.sleep(3)  # Adjust based on your internet speed

        # Locate the email input field
        try:
            email_field = wait.until(EC.presence_of_element_located((By.ID, "emailInput")))  # Replace with actual ID
            email_field.clear()
            email_field.send_keys(email)
            logging.info("Entered email.")
            print("Entered email.")
        except Exception as e:
            logging.error(f"Email input field not found: {e}")
            driver.save_screenshot("error_email_field.png")
            print(f"Email input field not found: {e}")
            driver.quit()
            return

        # Locate the password input field
        try:
            password_field = driver.find_element(By.ID, "passwordInput")  # Replace with actual ID
            password_field.clear()
            password_field.send_keys(password)
            logging.info("Entered password.")
            print("Entered password.")
        except Exception as e:
            logging.error(f"Password input field not found: {e}")
            driver.save_screenshot("error_password_field.png")
            print(f"Password input field not found: {e}")
            driver.quit()
            return

        # Locate and click the login button
        try:
            sign_in_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[text()='SIGN IN']"))
            )
            driver.execute_script("arguments[0].scrollIntoView();", sign_in_button)
            sign_in_button.click()
            logging.info("Clicked the 'SIGN IN' button.")
            print("Clicked the 'SIGN IN' button.")
        except Exception as e:
            logging.error(f"An error occurred while clicking the 'SIGN IN' button: {e}")
            driver.save_screenshot("error_click_sign_in.png")
            print(f"An error occurred while clicking the 'SIGN IN' button: {e}")
            driver.quit()
            return

        # Wait until the "Continue" button appears and click it
        try:
            continue_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[text()='Continue']"))
            )
            continue_button.click()
            logging.info("Clicked the 'Continue' button.")
            print("Clicked the 'Continue' button.")
        except Exception as e:
            logging.error(f"An error occurred while clicking the 'Continue' button: {e}")
            driver.save_screenshot("error_click_continue.png")
            print(f"An error occurred while clicking the 'Continue' button: {e}")
            driver.quit()
            return

        try:
            driver.get(base_url)
            logging.info(f"Navigated to player's stats page: {base_url}")
            print(f"Navigated to player's stats page: {base_url}")

            # Allow the stats page to load
            time.sleep(2)
            
            # Perform search (add your search term here)
            search_term = "Jannik Sinner"  # Replace with desired search term
            perform_search(driver, wait, search_term)
            
            # Wait for search results to load
            #time.sleep(3)
            
            # Apply Filters (e.g., Gender: Male)
            #apply_filter(driver, wait, filter_name="Gender", option="Male")
        
        except Exception as e:
            logging.error(f"Error in main workflow: {e}")
            driver.save_screenshot("error_main_workflow.png")
            print(f"Error in main workflow: {e}")
            driver.quit()
            return

        # Navigate to the Player's Stats Page
        try:
            driver.get(stats_url)
            logging.info(f"Navigated to player's stats page: {stats_url}")
            print(f"Navigated to player's stats page: {stats_url}")

            # Allow the stats page to load
            time.sleep(5)  # Adjust based on your internet speed
        except Exception as e:
            logging.error(f"Error navigating to player's stats page: {e}")
            driver.save_screenshot("error_navigate_stats_page.png")
            print(f"Error navigating to player's stats page: {e}")
            driver.quit()
            return

        # Apply Filters (e.g., Gender: Male)
        try:
            apply_filter(driver, wait, filter_name="Gender", option="Male")
            # Add more filters as needed by calling apply_filter with different parameters
            # Example:
            # apply_filter(driver, wait, filter_name="Age", option="18-25")
        except Exception as e:
            logging.error(f"Error applying filters: {e}")
            driver.save_screenshot("error_apply_filters.png")
            print(f"Error applying filters: {e}")

        # Click the "Load More" button until all data is loaded
        try:
            click_load_more(driver, wait)
        except Exception as e:
            logging.error(f"Error while clicking 'Load More': {e}")
            driver.save_screenshot("error_load_more.png")
            print(f"Error while clicking 'Load More': {e}")

        # Scrape all pages (now all data is loaded)
        all_data = scrape_current_page(driver, wait, base_url)

        # Save the data to CSV
        save_to_csv(all_data)

    except Exception as main_e:
        logging.error(f"An unexpected error occurred: {main_e}")
        driver.save_screenshot("unexpected_error.png")
        print(f"An unexpected error occurred: {main_e}")
    finally:
        # Close the WebDriver
        driver.quit()
        logging.info("WebDriver closed.")
        print("WebDriver closed.")

if __name__ == "__main__":
    main()


search bar

In [27]:
# --- Complete Revised Script with Filter Interaction ---

# Import necessary libraries
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import pandas as pd
import time
import logging
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Retrieve credentials
email = os.getenv('UTR_EMAIL')
password = os.getenv('UTR_PASSWORD')

# Verify credentials are loaded
if not email or not password:
    raise ValueError("Email or password not found in environment variables.")

# Define the is_float function
def is_float(value):
    """Check if the provided value can be converted to a float."""
    try:
        float(value)
        return True
    except ValueError:
        return False

# Configure logging
logging.basicConfig(
    filename='scraper.log',
    level=logging.INFO,
    format='%(asctime)s:%(levelname)s:%(message)s'
)

# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--window-size=1920,1080")  # Add this
chrome_options.add_argument("--headless=new")  # Use the new headless mode
chrome_options.add_argument("--disable-blink-features=AutomationControlled")  # Add this
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])  # Add this
chrome_options.add_experimental_option("useAutomationExtension", False)  # Add this

# Uncomment the next line to run Chrome in headless mode after successful debugging
chrome_options.add_argument("--headless")  

# Initialize the WebDriver
driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=chrome_options
)

# Define WebDriverWait
wait = WebDriverWait(driver, 20)  # 20 seconds timeout

# Define the login URL and the player's stats URL
login_url = "https://app.utrsports.net/login"
stats_url = "https://app.utrsports.net/search?sportTypes=tennis&startDate=11/21/2024&utrMin=1&utrMax=16&utrType=verified&utrTeamType=singles&utrFitPosition=6&type=players&lat=37.2358078&lng=-121.9623751"

def apply_filter(driver, wait, filter_name, option):
    """
    Applies a filter on the UTR Sports website.

    :param driver: Selenium WebDriver instance.
    :param wait: WebDriverWait instance.
    :param filter_name: The name of the filter to apply (e.g., 'Gender').
    :param option: The option to select within the filter (e.g., 'Male').
    """
    try:
        # Locate the filter button by its visible text
        filter_button = wait.until(
            EC.element_to_be_clickable(
                (By.XPATH, f"//button[contains(text(), '{filter_name}')]")
            )
        )
        filter_button.click()
        logging.info(f"Clicked the '{filter_name}' filter button.")
        print(f"Clicked the '{filter_name}' filter button.")

        # Wait for the filter options to appear
        # Using a more precise XPath to locate the option button directly
        option_button = wait.until(
            EC.element_to_be_clickable(
                (By.XPATH, f"//button[text()='{option}']")
            )
        )
        option_button.click()
        logging.info(f"Selected '{option}' from '{filter_name}' filter.")
        print(f"Selected '{option}' from '{filter_name}' filter.")

        # Optional: Wait for the page to refresh/update after applying the filter
        time.sleep(3)  # Adjust based on your internet speed and website response

    except Exception as e:
        logging.error(f"Failed to apply filter '{filter_name}' with option '{option}': {e}")
        print(f"Failed to apply filter '{filter_name}' with option '{option}': {e}")
        driver.save_screenshot(f"error_apply_filter_{filter_name}_{option}.png")


def click_load_more(driver, wait):
    """Click the 'Load More' button until it's no longer present."""
    while True:
        try:
            # Locate the 'Load More' button
            load_more_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[span[text()='Load More']]"))
            )
            # Scroll to the 'Load More' button to ensure it's in view
            driver.execute_script("arguments[0].scrollIntoView(true);", load_more_button)
            # Click the 'Load More' button
            load_more_button.click()
            logging.info("Clicked the 'Load More' button.")
            print("Clicked the 'Load More' button.")
            # Wait for new content to load
            time.sleep(3)  # Adjust based on your internet speed
        except Exception as e:
            logging.info("No more 'Load More' buttons to click or button not found.")
            print("No more 'Load More' buttons to click or button not found.")
            break

def scrape_current_page(driver, wait, base_url):
    """Scrape player data from the current page."""
    data = []
    try:
        # Locate all player cards
        player_cards = driver.find_elements(By.CLASS_NAME, "search__cardContainer__1Z9Ee")  # Ensure this is the correct class
        logging.info(f"Found {len(player_cards)} player cards on the current page.")
        print(f"Found {len(player_cards)} player cards on the current page.")

        for idx, card in enumerate(player_cards, start=1):
            try:
                # Extract player name within the current card
                try:
                    name_element = card.find_element(By.CSS_SELECTOR, "div.name.show-ellipsis")
                    player_name = name_element.text.strip()
                except Exception as e:
                    player_name = "N/A"
                    logging.warning(f"Player name not found for player {idx}: {e}")
                    print(f"Player name not found for player {idx}: {e}")

                # Extract player location within the current card
                try:
                    location_element = card.find_element(By.CSS_SELECTOR, "div.place.show-ellipsis")
                    location = location_element.text.strip()
                except Exception as e:
                    location = "N/A"
                    logging.warning(f"Location not found for player {idx}: {e}")
                    print(f"Location not found for player {idx}: {e}")

                # Extract UTR values within the current card
                try:
                    utr_elements = card.find_elements(By.XPATH, ".//div[@title='Rated']")
                    singles_utr = "N/A"
                    doubles_utr = "N/A"

                    if len(utr_elements) >= 2:
                        # Extract Singles UTR
                        singles_utr_text = utr_elements[0].text.strip()
                        singles_utr = singles_utr_text.split()[0]  # Assuming format "16.25 Verified"
                        if not is_float(singles_utr):
                            singles_utr = "N/A"
                            logging.warning(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                            print(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")

                        # Extract Doubles UTR
                        doubles_utr_text = utr_elements[1].text.strip()
                        doubles_utr = doubles_utr_text.split()[0]  # Assuming format "15.26 Verified"
                        if not is_float(doubles_utr):
                            doubles_utr = "N/A"
                            logging.warning(f"Invalid Doubles UTR value for player {idx}: {doubles_utr_text}")
                            print(f"Invalid Doubles UTR value for player {idx}: {doubles_utr_text}")
                    elif len(utr_elements) == 1:
                        # Only Singles UTR found
                        singles_utr_text = utr_elements[0].text.strip()
                        singles_utr = singles_utr_text.split()[0]
                        if not is_float(singles_utr):
                            singles_utr = "N/A"
                            logging.warning(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                            print(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                    else:
                        logging.warning(f"No UTR values found for player {idx}.")
                        print(f"No UTR values found for player {idx}.")

                except Exception as e:
                    singles_utr = "N/A"
                    doubles_utr = "N/A"
                    logging.warning(f"UTR not found for player {idx}: {e}")
                    print(f"UTR not found for player {idx}: {e}")

                # Extract Profile Link within the current card
                try:
                    profile_link_element = card.find_element(By.TAG_NAME, "a")
                    relative_profile_link = profile_link_element.get_attribute("href")
                    # Ensure the link is absolute
                    if relative_profile_link.startswith("/"):
                        profile_link = base_url + relative_profile_link
                    else:
                        profile_link = relative_profile_link
                except Exception as e:
                    profile_link = "N/A"
                    logging.warning(f"Profile link not found for player {idx}: {e}")
                    print(f"Profile link not found for player {idx}: {e}")

                # Compile the data
                player_data = {
                    'Player Name': player_name,
                    'Location': location,
                    'Singles UTR': singles_utr,
                    'Doubles UTR': doubles_utr,
                    'Profile Link': profile_link
                }

                data.append(player_data)
                logging.info(f"Scraped player {idx}: {player_data}")
                print(f"Scraped player {idx}: {player_data}")

            except Exception as card_e:
                logging.error(f"Error scraping player card {idx}: {card_e}")
                driver.save_screenshot(f"error_scraping_player_{idx}.png")
                print(f"Error scraping player card {idx}: {card_e}")
                continue  # Proceed to next card

    except Exception as e:
        logging.error(f"Error locating player cards: {e}")
        driver.save_screenshot("error_locating_player_cards.png")
        print(f"Error locating player cards: {e}")

    return data

def scrape_all_pages(driver, wait, base_url):
    """Scrape data from all paginated pages."""
    all_data = []
    page = 1

    while True:
        logging.info(f"Scraping page {page}.")
        print(f"Scraping page {page}.")

        # Scrape data from the current page
        page_data = scrape_current_page(driver, wait, base_url)
        all_data.extend(page_data)

        # Attempt to locate the "Load More" button
        try:
            click_load_more(driver, wait)
            page += 1
        except Exception as e:
            logging.error(f"Error while clicking 'Load More': {e}")
            driver.save_screenshot("error_load_more.png")
            print(f"Error while clicking 'Load More': {e}")
            break

    return all_data

def save_to_csv(data, filename='player_statistics.csv'):
    """Save the scraped data to a CSV file."""
    try:
        df = pd.DataFrame(data)
        df.to_csv(filename, index=False)
        logging.info(f"Data saved to {filename}.")
        print(f"Data saved to {filename}.")
    except Exception as e:
        logging.error(f"Error saving data to CSV: {e}")
        print(f"Error saving data to CSV: {e}")

def perform_search(driver, wait, search_term):
    """
    Performs a search by targeting the exact HTML structure of the search elements.
    """
    try:
        # Wait for the page to be fully loaded
        time.sleep(5)
        
        logging.info("Attempting to find search wrapper...")
        print("Attempting to find search wrapper...")
        
        # First find the desktop search wrapper
        try:
            # Target the nav-search-wrapper first
            search_wrapper = wait.until(
                EC.presence_of_element_located((
                    By.CSS_SELECTOR, 
                    "div.nav-search-wrapper div.d-none.d-lg-block div.globalSearch__globalSearchWrapper__NglK2"
                ))
            )
            
            logging.info("Found desktop search wrapper")
            print("Found desktop search wrapper")
            
            # Find the search container within the wrapper
            search_container = search_wrapper.find_element(
                By.CSS_SELECTOR,
                "div.globalSearch__globalSearchContainer__3_82H"
            )
            
            # Find the input container
            input_container = search_container.find_element(
                By.CSS_SELECTOR,
                "div.globalSearch__globalSearchInputContainer__35Wld"
            )
            
            # Find the input element directly using its data-testid
            search_input = input_container.find_element(
                By.CSS_SELECTOR,
                "input[data-testid='globalSearch-searchInputButton-eUX6nl19']"
            )
            
            # Make sure search input is visible
            if not search_input.is_displayed():
                # Try to find and click the expander
                expander = search_wrapper.find_element(
                    By.CSS_SELECTOR,
                    "div.globalSearch__searchExpander__2jpEM"
                )
                driver.execute_script("arguments[0].click();", expander)
                time.sleep(2)
            
            # Clear and enter search term
            search_input.clear()
            search_input.send_keys(search_term)
            logging.info(f"Entered search term: {search_term}")
            print(f"Entered search term: {search_term}")
            
            time.sleep(2)
            
            # Wait for and click the "SEE ALL" link
            see_all_link = wait.until(
                EC.element_to_be_clickable((
                    By.XPATH,
                    "//a[contains(., 'SEE ALL')]"
                ))
            )
            
            see_all_link.click()
            logging.info("Clicked 'SEE ALL' link")
            print("Clicked 'SEE ALL' link")
        
            # After clicking "SEE ALL" and letting results load
            time.sleep(3)
            
            # Wait for and click the first result
            try:
                first_result = wait.until(
                    EC.element_to_be_clickable((
                        By.CSS_SELECTOR,
                        "div.search__cardContainer__1Z9Ee > a"
                    ))
                )
                
                logging.info("Found first search result, attempting to click...")
                print("Found first search result, attempting to click...")
                
                # Get the href for logging
                profile_link = first_result.get_attribute('href')
                logging.info(f"Clicking profile link: {profile_link}")
                print(f"Clicking profile link: {profile_link}")
                
                # Try JavaScript click first as it's more reliable
                driver.execute_script("arguments[0].click();", first_result)
                
                # Wait for the profile page to load
                time.sleep(3)
                
                logging.info("Clicked first search result successfully")
                print("Clicked first search result successfully")
                
                navigate_to_stats(driver, wait)
                
            except Exception as e:
                logging.error(f"Error clicking first search result: {str(e)}")
                print(f"Error clicking first search result: {str(e)}")
                driver.save_screenshot("error_clicking_result.png")
                raise
                
        except Exception as e:
            logging.error(f"Error during search: {str(e)}")
            print(f"Error during search: {str(e)}")
            # Log the current state
            logging.info("Current page state:")
            logging.info(driver.page_source)
            driver.save_screenshot("search_error.png")
            raise
            
    except Exception as e:
        logging.error(f"Error in perform_search: {str(e)}")
        print(f"Error in perform_search: {str(e)}")
        driver.save_screenshot("search_error_final.png")
        raise

def navigate_to_stats(driver, wait):
    """
    Clicks the Stats tab button on a player's profile page.
    
    Args:
        driver: Selenium WebDriver instance
        wait: WebDriverWait instance
    """
    try:
        # Wait for and find the Stats tab button
        stats_button = wait.until(
            EC.element_to_be_clickable((
                By.XPATH,
                "//button[contains(@class, 'btn-tab')][.//div[contains(text(), 'Stats')]]"
            ))
        )
        
        logging.info("Found Stats tab button, attempting to click...")
        print("Found Stats tab button, attempting to click...")
        
        # Try regular click first
        try:
            stats_button.click()
        except:
            # If regular click fails, try JavaScript click
            driver.execute_script("arguments[0].click();", stats_button)
        
        # Wait for stats content to load
        time.sleep(3)
        
        logging.info("Successfully navigated to Stats tab")
        print("Successfully navigated to Stats tab")
        
    except Exception as e:
        logging.error(f"Error navigating to Stats tab: {str(e)}")
        print(f"Error navigating to Stats tab: {str(e)}")
        driver.save_screenshot("error_stats_navigation.png")
        raise

def main():
    """Main function to execute the scraping workflow."""
    base_url = "https://app.utrsports.net"  # Define the base URL

    try:
        # Navigate to the login page
        driver.get(login_url)
        logging.info("Navigated to the login page.")
        print("Navigated to the login page.")

        # Allow the page to load completely
        time.sleep(3)

        # Locate and fill email input field
        try:
            email_field = wait.until(EC.presence_of_element_located((By.ID, "emailInput")))
            email_field.clear()
            email_field.send_keys(email)
            logging.info("Entered email.")
            print("Entered email.")
        except Exception as e:
            logging.error(f"Email input field not found: {e}")
            driver.save_screenshot("error_email_field.png")
            print(f"Email input field not found: {e}")
            raise

        # Locate and fill password input field
        try:
            password_field = driver.find_element(By.ID, "passwordInput")
            password_field.clear()
            password_field.send_keys(password)
            logging.info("Entered password.")
            print("Entered password.")
        except Exception as e:
            logging.error(f"Password input field not found: {e}")
            driver.save_screenshot("error_password_field.png")
            print(f"Password input field not found: {e}")
            raise

        # Click sign in button
        try:
            sign_in_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[text()='SIGN IN']"))
            )
            driver.execute_script("arguments[0].scrollIntoView();", sign_in_button)
            sign_in_button.click()
            logging.info("Clicked the 'SIGN IN' button.")
            print("Clicked the 'SIGN IN' button.")
        except Exception as e:
            logging.error(f"Error clicking 'SIGN IN' button: {e}")
            driver.save_screenshot("error_click_sign_in.png")
            print(f"Error clicking 'SIGN IN' button: {e}")
            raise

        # Click continue button
        try:
            continue_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[text()='Continue']"))
            )
            continue_button.click()
            logging.info("Clicked the 'Continue' button.")
            print("Clicked the 'Continue' button.")
        except Exception as e:
            logging.error(f"Error clicking 'Continue' button: {e}")
            driver.save_screenshot("error_click_continue.png")
            print(f"Error clicking 'Continue' button: {e}")
            raise

        # Navigate to stats page and perform search
        try:
            # Navigate to the correct stats URL
            driver.get(stats_url)
            logging.info(f"Navigated to player's stats page: {stats_url}")
            print(f"Navigated to player's stats page: {stats_url}")

            # Allow the stats page to load
            time.sleep(5)
            
            # Perform search
            search_term = "Jannik Sinner"  # Replace with desired search term
            perform_search(driver, wait, search_term)
            
            # Wait for search results to load
            time.sleep(3)
            
            # Apply filters if needed
            #try:
            #    apply_filter(driver, wait, filter_name="Gender", option="Male")
                # Add more filters as needed
            #except Exception as filter_e:
            #    logging.warning(f"Error applying filters: {filter_e}")
            #    print(f"Error applying filters: {filter_e}")
            
            # Scrape all pages
            #all_data = scrape_all_pages(driver, wait, base_url)
            
            # Save the data to CSV
            #save_to_csv(all_data, filename=f'player_statistics_{search_term.replace(" ", "_")}.csv')
            
            #logging.info("Scraping completed successfully.")
            #print("Scraping completed successfully.")
            
        except Exception as e:
            logging.error(f"Error in scraping workflow: {e}")
            driver.save_screenshot("error_scraping.png")
            print(f"Error in scraping workflow: {e}")
            raise
            
    except Exception as main_e:
        logging.error(f"Error in main workflow: {main_e}")
        driver.save_screenshot("error_main.png")
        print(f"Error in main workflow: {main_e}")
        raise
        
    finally:
        # Close the WebDriver
        driver.quit()
        logging.info("WebDriver closed.")
        print("WebDriver closed.")

if __name__ == "__main__":
    main()


SyntaxError: invalid syntax (3854048007.py, line 571)

adding radio button filters

In [120]:
# --- Complete Revised Script with "Gender" and "Segment" Filters ---

# Import necessary libraries
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import pandas as pd
import time
import logging
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Retrieve credentials
email = os.getenv('UTR_EMAIL')
password = os.getenv('UTR_PASSWORD')

# Verify credentials are loaded
if not email or not password:
    raise ValueError("Email or password not found in environment variables.")

# Define the is_float function
def is_float(value):
    """Check if the provided value can be converted to a float."""
    try:
        float(value)
        return True
    except ValueError:
        return False

# Configure logging
logging.basicConfig(
    filename='scraper.log',
    level=logging.INFO,
    format='%(asctime)s:%(levelname)s:%(message)s'
)

# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--start-maximized")  # Start maximized for better visibility

# Uncomment the next line to run Chrome in headless mode after successful debugging
# chrome_options.add_argument("--headless")  

# Initialize the WebDriver
driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=chrome_options
)

# Define WebDriverWait
wait = WebDriverWait(driver, 20)  # 20 seconds timeout

# Define the login URL and the player's stats URL
login_url = "https://app.utrsports.net/login"
stats_url = "https://app.utrsports.net/search?sportTypes=tennis&startDate=11/21/2024&utrMin=1&utrMax=16&utrType=verified&utrTeamType=singles&utrFitPosition=6&type=players&lat=37.2358078&lng=-121.9623751"

def apply_filter(driver, wait, filter_name, option):
    """
    Applies a button-based filter on the UTR Sports website.

    :param driver: Selenium WebDriver instance.
    :param wait: WebDriverWait instance.
    :param filter_name: The name of the filter to apply (e.g., 'Gender').
    :param option: The option to select within the filter (e.g., 'Male').
    """
    try:
        # Locate the filter button by its visible text
        filter_button = wait.until(
            EC.element_to_be_clickable(
                (By.XPATH, f"//button[contains(text(), '{filter_name}')]")
            )
        )
        filter_button.click()
        logging.info(f"Clicked the '{filter_name}' filter button.")
        print(f"Clicked the '{filter_name}' filter button.")

        # Wait for the filter options to appear
        # Assuming options are displayed as buttons within the filter dropdown/modal
        option_button = wait.until(
            EC.element_to_be_clickable(
                (By.XPATH, f"//button[text()='{option}']")
            )
        )
        option_button.click()
        logging.info(f"Selected '{option}' from '{filter_name}' filter.")
        print(f"Selected '{option}' from '{filter_name}' filter.")

        # Optional: Wait for the page to refresh/update after applying the filter
        time.sleep(3)  # Adjust based on your internet speed and website response

    except Exception as e:
        logging.error(f"Failed to apply filter '{filter_name}' with option '{option}': {e}")
        print(f"Failed to apply filter '{filter_name}' with option '{option}': {e}")
        driver.save_screenshot(f"error_apply_filter_{filter_name}_{option}.png")

def apply_radio_filter(driver, wait, filter_name, option):
    """
    Applies a radio button filter on the UTR Sports website.

    :param driver: Selenium WebDriver instance.
    :param wait: WebDriverWait instance.
    :param filter_name: The name of the filter to apply (e.g., 'Segment').
    :param option: The radio button option to select within the filter (e.g., 'Pro').
    """
    try:
        # Locate the filter button by its visible text
        filter_button = wait.until(
            EC.element_to_be_clickable(
                (By.XPATH, f"//button[contains(text(), '{filter_name}')]")
            )
        )
        filter_button.click()
        logging.info(f"Clicked the '{filter_name}' filter button.")
        print(f"Clicked the '{filter_name}' filter button.")

        # Wait for the filter modal or dropdown to appear
        wait.until(
            EC.visibility_of_element_located(
                (By.CLASS_NAME, "search__searchFilterModalBody__1EtSh")
            )
        )

        # Locate the radio button option by its label text
        option_label = wait.until(
            EC.element_to_be_clickable(
                (By.XPATH, f"//label[contains(text(), '{option}')]")
            )
        )
        option_label.click()
        logging.info(f"Selected '{option}' from '{filter_name}' filter.")
        print(f"Selected '{option}' from '{filter_name}' filter.")

        # Optional: Wait for the page to refresh/update after applying the filter
        time.sleep(3)  # Adjust based on your internet speed and website response

    except Exception as e:
        logging.error(f"Failed to apply radio filter '{filter_name}' with option '{option}': {e}")
        print(f"Failed to apply radio filter '{filter_name}' with option '{option}': {e}")
        driver.save_screenshot(f"error_apply_radio_filter_{filter_name}_{option}.png")

def click_load_more(driver, wait):
    """Click the 'Load More' button until it's no longer present."""
    while True:
        try:
            # Locate the 'Load More' button
            load_more_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[span[text()='Load More']]"))
            )
            # Scroll to the 'Load More' button to ensure it's in view
            driver.execute_script("arguments[0].scrollIntoView(true);", load_more_button)
            # Click the 'Load More' button
            load_more_button.click()
            logging.info("Clicked the 'Load More' button.")
            print("Clicked the 'Load More' button.")
            # Wait for new content to load
            time.sleep(3)  # Adjust based on your internet speed
        except Exception as e:
            logging.info("No more 'Load More' buttons to click or button not found.")
            print("No more 'Load More' buttons to click or button not found.")
            break

def scrape_current_page(driver, wait, base_url):
    """Scrape player data from the current page."""
    data = []
    try:
        # Locate all player cards
        player_cards = driver.find_elements(By.CLASS_NAME, "search__cardContainer__1Z9Ee")  # Ensure this is the correct class
        logging.info(f"Found {len(player_cards)} player cards on the current page.")
        print(f"Found {len(player_cards)} player cards on the current page.")

        for idx, card in enumerate(player_cards, start=1):
            try:
                # Extract player name within the current card
                try:
                    name_element = card.find_element(By.CSS_SELECTOR, "div.name.show-ellipsis")
                    player_name = name_element.text.strip()
                except Exception as e:
                    player_name = "N/A"
                    logging.warning(f"Player name not found for player {idx}: {e}")
                    print(f"Player name not found for player {idx}: {e}")

                # Extract player location within the current card
                try:
                    location_element = card.find_element(By.CSS_SELECTOR, "div.place.show-ellipsis")
                    location = location_element.text.strip()
                except Exception as e:
                    location = "N/A"
                    logging.warning(f"Location not found for player {idx}: {e}")
                    print(f"Location not found for player {idx}: {e}")

                # Extract UTR values within the current card
                try:
                    utr_elements = card.find_elements(By.XPATH, ".//div[@title='Rated']")
                    singles_utr = "N/A"
                    doubles_utr = "N/A"

                    if len(utr_elements) >= 2:
                        # Extract Singles UTR
                        singles_utr_text = utr_elements[0].text.strip()
                        singles_utr = singles_utr_text.split()[0]  # Assuming format "16.25 Verified"
                        if not is_float(singles_utr):
                            singles_utr = "N/A"
                            logging.warning(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                            print(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")

                        # Extract Doubles UTR
                        doubles_utr_text = utr_elements[1].text.strip()
                        doubles_utr = doubles_utr_text.split()[0]  # Assuming format "15.26 Verified"
                        if not is_float(doubles_utr):
                            doubles_utr = "N/A"
                            logging.warning(f"Invalid Doubles UTR value for player {idx}: {doubles_utr_text}")
                            print(f"Invalid Doubles UTR value for player {idx}: {doubles_utr_text}")
                    elif len(utr_elements) == 1:
                        # Only Singles UTR found
                        singles_utr_text = utr_elements[0].text.strip()
                        singles_utr = singles_utr_text.split()[0]
                        if not is_float(singles_utr):
                            singles_utr = "N/A"
                            logging.warning(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                            print(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                    else:
                        logging.warning(f"No UTR values found for player {idx}.")
                        print(f"No UTR values found for player {idx}.")

                except Exception as e:
                    singles_utr = "N/A"
                    doubles_utr = "N/A"
                    logging.warning(f"UTR not found for player {idx}: {e}")
                    print(f"UTR not found for player {idx}: {e}")

                # Extract Profile Link within the current card
                try:
                    profile_link_element = card.find_element(By.TAG_NAME, "a")
                    relative_profile_link = profile_link_element.get_attribute("href")
                    # Ensure the link is absolute
                    if relative_profile_link.startswith("/"):
                        profile_link = base_url + relative_profile_link
                    else:
                        profile_link = relative_profile_link
                except Exception as e:
                    profile_link = "N/A"
                    logging.warning(f"Profile link not found for player {idx}: {e}")
                    print(f"Profile link not found for player {idx}: {e}")

                # Compile the data
                player_data = {
                    'Player Name': player_name,
                    'Location': location,
                    'Singles UTR': singles_utr,
                    'Doubles UTR': doubles_utr,
                    'Profile Link': profile_link
                }

                data.append(player_data)
                logging.info(f"Scraped player {idx}: {player_data}")
                print(f"Scraped player {idx}: {player_data}")

            except Exception as card_e:
                logging.error(f"Error scraping player card {idx}: {card_e}")
                driver.save_screenshot(f"error_scraping_player_{idx}.png")
                print(f"Error scraping player card {idx}: {card_e}")
                continue  # Proceed to next card

    except Exception as e:
        logging.error(f"Error locating player cards: {e}")
        driver.save_screenshot("error_locating_player_cards.png")
        print(f"Error locating player cards: {e}")

    return data

def scrape_all_pages(driver, wait, base_url):
    """Scrape data from all paginated pages."""
    all_data = []
    page = 1

    while True:
        logging.info(f"Scraping page {page}.")
        print(f"Scraping page {page}.")

        # Scrape data from the current page
        page_data = scrape_current_page(driver, wait, base_url)
        all_data.extend(page_data)

        # Attempt to locate the "Load More" button
        try:
            click_load_more(driver, wait)
            page += 1
        except Exception as e:
            logging.error(f"Error while clicking 'Load More': {e}")
            driver.save_screenshot("error_load_more.png")
            print(f"Error while clicking 'Load More': {e}")
            break

    return all_data

def save_to_csv(data, filename='player_statistics.csv'):
    """Save the scraped data to a CSV file."""
    try:
        df = pd.DataFrame(data)
        df.to_csv(filename, index=False)
        logging.info(f"Data saved to {filename}.")
        print(f"Data saved to {filename}.")
    except Exception as e:
        logging.error(f"Error saving data to CSV: {e}")
        print(f"Error saving data to CSV: {e}")

def main():
    """Main function to execute the scraping workflow."""
    base_url = "https://app.utrsports.net"  # Define the base URL

    try:
        # Navigate to the login page
        driver.get(login_url)
        logging.info("Navigated to the login page.")
        print("Navigated to the login page.")

        # Allow the page to load completely
        time.sleep(3)  # Adjust based on your internet speed

        # Locate the email input field
        try:
            email_field = wait.until(EC.presence_of_element_located((By.ID, "emailInput")))  # Replace with actual ID
            email_field.clear()
            email_field.send_keys(email)
            logging.info("Entered email.")
            print("Entered email.")
        except Exception as e:
            logging.error(f"Email input field not found: {e}")
            driver.save_screenshot("error_email_field.png")
            print(f"Email input field not found: {e}")
            driver.quit()
            return

        # Locate the password input field
        try:
            password_field = driver.find_element(By.ID, "passwordInput")  # Replace with actual ID
            password_field.clear()
            password_field.send_keys(password)
            logging.info("Entered password.")
            print("Entered password.")
        except Exception as e:
            logging.error(f"Password input field not found: {e}")
            driver.save_screenshot("error_password_field.png")
            print(f"Password input field not found: {e}")
            driver.quit()
            return

        # Locate and click the login button
        try:
            sign_in_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[text()='SIGN IN']"))
            )
            driver.execute_script("arguments[0].scrollIntoView();", sign_in_button)
            sign_in_button.click()
            logging.info("Clicked the 'SIGN IN' button.")
            print("Clicked the 'SIGN IN' button.")
        except Exception as e:
            logging.error(f"An error occurred while clicking the 'SIGN IN' button: {e}")
            driver.save_screenshot("error_click_sign_in.png")
            print(f"An error occurred while clicking the 'SIGN IN' button: {e}")
            driver.quit()
            return

        # Wait until the "Continue" button appears and click it
        try:
            continue_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[text()='Continue']"))
            )
            continue_button.click()
            logging.info("Clicked the 'Continue' button.")
            print("Clicked the 'Continue' button.")
        except Exception as e:
            logging.error(f"An error occurred while clicking the 'Continue' button: {e}")
            driver.save_screenshot("error_click_continue.png")
            print(f"An error occurred while clicking the 'Continue' button: {e}")
            driver.quit()
            return

        # Navigate to the Player's Stats Page
        try:
            driver.get(stats_url)
            logging.info(f"Navigated to player's stats page: {stats_url}")
            print(f"Navigated to player's stats page: {stats_url}")

            # Allow the stats page to load
            time.sleep(5)  # Adjust based on your internet speed
        except Exception as e:
            logging.error(f"Error navigating to player's stats page: {e}")
            driver.save_screenshot("error_navigate_stats_page.png")
            print(f"Error navigating to player's stats page: {e}")
            driver.quit()
            return

        # Apply Filters

        # 1. Apply Gender Filter (Existing Functionality)
        try:
            apply_filter(driver, wait, filter_name="Gender", option="Male")
            # Add more button-based filters as needed
        except Exception as e:
            logging.error(f"Error applying 'Gender' filter: {e}")
            driver.save_screenshot("error_apply_gender_filter.png")
            print(f"Error applying 'Gender' filter: {e}")

        # 2. Apply Segment Filter (New Functionality)
        try:
            apply_radio_filter(driver, wait, filter_name="Segment", option="Pro")
            # You can change 'Pro' to any other segment like 'College', 'High School', etc.
        except Exception as e:
            logging.error(f"Error applying 'Segment' filter: {e}")
            driver.save_screenshot("error_apply_segment_filter.png")
            print(f"Error applying 'Segment' filter: {e}")

        # Click the "Load More" button until all data is loaded
        try:
            click_load_more(driver, wait)
        except Exception as e:
            logging.error(f"Error while clicking 'Load More': {e}")
            driver.save_screenshot("error_load_more.png")
            print(f"Error while clicking 'Load More': {e}")

        # Scrape all pages (now all data is loaded)
        all_data = scrape_current_page(driver, wait, base_url)

        # Save the data to CSV
        save_to_csv(all_data)

    except Exception as main_e:
        logging.error(f"An unexpected error occurred: {main_e}")
        driver.save_screenshot("unexpected_error.png")
        print(f"An unexpected error occurred: {main_e}")
    finally:
        # Close the WebDriver
        driver.quit()
        logging.info("WebDriver closed.")
        print("WebDriver closed.")

if __name__ == "__main__":
    main()


Navigated to the login page.
Entered email.
Entered password.
Clicked the 'SIGN IN' button.
Clicked the 'Continue' button.
Navigated to player's stats page: https://app.utrsports.net/search?sportTypes=tennis&startDate=11/21/2024&utrMin=1&utrMax=16&utrType=verified&utrTeamType=singles&utrFitPosition=6&type=players&lat=37.2358078&lng=-121.9623751
Clicked the 'Gender' filter button.
Selected 'Male' from 'Gender' filter.
Clicked the 'Segment' filter button.
Selected 'Pro' from 'Segment' filter.
Clicked the 'Load More' button.
Clicked the 'Load More' button.
Clicked the 'Load More' button.
Clicked the 'Load More' button.
Clicked the 'Load More' button.
Clicked the 'Load More' button.
No more 'Load More' buttons to click or button not found.
Found 100 player cards on the current page.
Scraped player 1: {'Player Name': 'Jannik Sinner', 'Location': 'M • Italy', 'Singles UTR': '16.30', 'Doubles UTR': '15.28', 'Profile Link': 'https://app.utrsports.net/profiles/247320'}
Scraped player 2: {'Playe

correcting headless mode

In [34]:
# --- Complete Revised Script with Filter Interaction ---

# Import necessary libraries
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import pandas as pd
import time
import logging
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Retrieve credentials
email = os.getenv('UTR_EMAIL')
password = os.getenv('UTR_PASSWORD')

# Verify credentials are loaded
if not email or not password:
    raise ValueError("Email or password not found in environment variables.")

# Define the is_float function
def is_float(value):
    """Check if the provided value can be converted to a float."""
    try:
        float(value)
        return True
    except ValueError:
        return False

# Configure logging
logging.basicConfig(
    filename='scraper.log',
    level=logging.INFO,
    format='%(asctime)s:%(levelname)s:%(message)s'
)

# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--window-size=1920,1080")  # Add this
chrome_options.add_argument("--headless=new")  # Use the new headless mode
chrome_options.add_argument("--disable-blink-features=AutomationControlled")  # Add this
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])  # Add this
chrome_options.add_experimental_option("useAutomationExtension", False)  # Add this

# Uncomment the next line to run Chrome in headless mode after successful debugging
#chrome_options.add_argument("--headless")  

# Initialize the WebDriver
driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=chrome_options
)

# Define WebDriverWait
wait = WebDriverWait(driver, 20)  # 20 seconds timeout

# Define the login URL and the player's stats URL
login_url = "https://app.utrsports.net/login"
stats_url = "https://app.utrsports.net/search?sportTypes=tennis&startDate=11/21/2024&utrMin=1&utrMax=16&utrType=verified&utrTeamType=singles&utrFitPosition=6&type=players&lat=37.2358078&lng=-121.9623751"

def apply_filter(driver, wait, filter_name, option):
    """
    Applies a filter on the UTR Sports website.

    :param driver: Selenium WebDriver instance.
    :param wait: WebDriverWait instance.
    :param filter_name: The name of the filter to apply (e.g., 'Gender').
    :param option: The option to select within the filter (e.g., 'Male').
    """
    try:
        # Locate the filter button by its visible text
        filter_button = wait.until(
            EC.element_to_be_clickable(
                (By.XPATH, f"//button[contains(text(), '{filter_name}')]")
            )
        )
        filter_button.click()
        logging.info(f"Clicked the '{filter_name}' filter button.")
        print(f"Clicked the '{filter_name}' filter button.")

        # Wait for the filter options to appear
        # Using a more precise XPath to locate the option button directly
        option_button = wait.until(
            EC.element_to_be_clickable(
                (By.XPATH, f"//button[text()='{option}']")
            )
        )
        option_button.click()
        logging.info(f"Selected '{option}' from '{filter_name}' filter.")
        print(f"Selected '{option}' from '{filter_name}' filter.")

        # Optional: Wait for the page to refresh/update after applying the filter
        time.sleep(3)  # Adjust based on your internet speed and website response

    except Exception as e:
        logging.error(f"Failed to apply filter '{filter_name}' with option '{option}': {e}")
        print(f"Failed to apply filter '{filter_name}' with option '{option}': {e}")
        driver.save_screenshot(f"error_apply_filter_{filter_name}_{option}.png")


def click_load_more(driver, wait):
    """Click the 'Load More' button until it's no longer present."""
    while True:
        try:
            # Locate the 'Load More' button
            load_more_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//button[span[text()='Load More']]"))
            )
            # Scroll to the 'Load More' button to ensure it's in view
            driver.execute_script("arguments[0].scrollIntoView(true);", load_more_button)
            # Click the 'Load More' button
            load_more_button.click()
            logging.info("Clicked the 'Load More' button.")
            print("Clicked the 'Load More' button.")
            # Wait for new content to load
            time.sleep(3)  # Adjust based on your internet speed
        except Exception as e:
            logging.info("No more 'Load More' buttons to click or button not found.")
            print("No more 'Load More' buttons to click or button not found.")
            break

def scrape_current_page(driver, wait, base_url):
    """Scrape player data from the current page."""
    data = []
    try:
        # Locate all player cards
        player_cards = driver.find_elements(By.CLASS_NAME, "search__cardContainer__1Z9Ee")  # Ensure this is the correct class
        logging.info(f"Found {len(player_cards)} player cards on the current page.")
        print(f"Found {len(player_cards)} player cards on the current page.")

        for idx, card in enumerate(player_cards, start=1):
            try:
                # Extract player name within the current card
                try:
                    name_element = card.find_element(By.CSS_SELECTOR, "div.name.show-ellipsis")
                    player_name = name_element.text.strip()
                except Exception as e:
                    player_name = "N/A"
                    logging.warning(f"Player name not found for player {idx}: {e}")
                    print(f"Player name not found for player {idx}: {e}")

                # Extract player location within the current card
                try:
                    location_element = card.find_element(By.CSS_SELECTOR, "div.place.show-ellipsis")
                    location = location_element.text.strip()
                except Exception as e:
                    location = "N/A"
                    logging.warning(f"Location not found for player {idx}: {e}")
                    print(f"Location not found for player {idx}: {e}")

                # Extract UTR values within the current card
                try:
                    utr_elements = card.find_elements(By.XPATH, ".//div[@title='Rated']")
                    singles_utr = "N/A"
                    doubles_utr = "N/A"

                    if len(utr_elements) >= 2:
                        # Extract Singles UTR
                        singles_utr_text = utr_elements[0].text.strip()
                        singles_utr = singles_utr_text.split()[0]  # Assuming format "16.25 Verified"
                        if not is_float(singles_utr):
                            singles_utr = "N/A"
                            logging.warning(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                            print(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")

                        # Extract Doubles UTR
                        doubles_utr_text = utr_elements[1].text.strip()
                        doubles_utr = doubles_utr_text.split()[0]  # Assuming format "15.26 Verified"
                        if not is_float(doubles_utr):
                            doubles_utr = "N/A"
                            logging.warning(f"Invalid Doubles UTR value for player {idx}: {doubles_utr_text}")
                            print(f"Invalid Doubles UTR value for player {idx}: {doubles_utr_text}")
                    elif len(utr_elements) == 1:
                        # Only Singles UTR found
                        singles_utr_text = utr_elements[0].text.strip()
                        singles_utr = singles_utr_text.split()[0]
                        if not is_float(singles_utr):
                            singles_utr = "N/A"
                            logging.warning(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                            print(f"Invalid Singles UTR value for player {idx}: {singles_utr_text}")
                    else:
                        logging.warning(f"No UTR values found for player {idx}.")
                        print(f"No UTR values found for player {idx}.")

                except Exception as e:
                    singles_utr = "N/A"
                    doubles_utr = "N/A"
                    logging.warning(f"UTR not found for player {idx}: {e}")
                    print(f"UTR not found for player {idx}: {e}")

                # Extract Profile Link within the current card
                try:
                    profile_link_element = card.find_element(By.TAG_NAME, "a")
                    relative_profile_link = profile_link_element.get_attribute("href")
                    # Ensure the link is absolute
                    if relative_profile_link.startswith("/"):
                        profile_link = base_url + relative_profile_link
                    else:
                        profile_link = relative_profile_link
                except Exception as e:
                    profile_link = "N/A"
                    logging.warning(f"Profile link not found for player {idx}: {e}")
                    print(f"Profile link not found for player {idx}: {e}")

                # Compile the data
                player_data = {
                    'Player Name': player_name,
                    'Location': location,
                    'Singles UTR': singles_utr,
                    'Doubles UTR': doubles_utr,
                    'Profile Link': profile_link
                }

                data.append(player_data)
                logging.info(f"Scraped player {idx}: {player_data}")
                print(f"Scraped player {idx}: {player_data}")

            except Exception as card_e:
                logging.error(f"Error scraping player card {idx}: {card_e}")
                driver.save_screenshot(f"error_scraping_player_{idx}.png")
                print(f"Error scraping player card {idx}: {card_e}")
                continue  # Proceed to next card

    except Exception as e:
        logging.error(f"Error locating player cards: {e}")
        driver.save_screenshot("error_locating_player_cards.png")
        print(f"Error locating player cards: {e}")

    return data

def scrape_all_pages(driver, wait, base_url):
    """Scrape data from all paginated pages."""
    all_data = []
    page = 1

    while True:
        logging.info(f"Scraping page {page}.")
        print(f"Scraping page {page}.")

        # Scrape data from the current page
        page_data = scrape_current_page(driver, wait, base_url)
        all_data.extend(page_data)

        # Attempt to locate the "Load More" button
        try:
            click_load_more(driver, wait)
            page += 1
        except Exception as e:
            logging.error(f"Error while clicking 'Load More': {e}")
            driver.save_screenshot("error_load_more.png")
            print(f"Error while clicking 'Load More': {e}")
            break

    return all_data

def save_to_csv(data, filename='player_statistics.csv'):
    """Save the scraped data to a CSV file."""
    try:
        df = pd.DataFrame(data)
        df.to_csv(filename, index=False)
        logging.info(f"Data saved to {filename}.")
        print(f"Data saved to {filename}.")
    except Exception as e:
        logging.error(f"Error saving data to CSV: {e}")
        print(f"Error saving data to CSV: {e}")

def perform_search(driver, wait, search_term):
    """
    Performs a search by targeting the exact HTML structure of the search elements.
    """
    try:
        # Wait for the page to be fully loaded
        time.sleep(5)
        
        logging.info("Attempting to find search wrapper...")
        print("Attempting to find search wrapper...")
        
        # First find the desktop search wrapper
        try:
            # Target the nav-search-wrapper first
            search_wrapper = wait.until(
                EC.presence_of_element_located((
                    By.CSS_SELECTOR, 
                    "div.nav-search-wrapper div.d-none.d-lg-block div.globalSearch__globalSearchWrapper__NglK2"
                ))
            )
            
            logging.info("Found desktop search wrapper")
            print("Found desktop search wrapper")
            
            # Find the search container within the wrapper
            search_container = search_wrapper.find_element(
                By.CSS_SELECTOR,
                "div.globalSearch__globalSearchContainer__3_82H"
            )
            
            # Find the input container
            input_container = search_container.find_element(
                By.CSS_SELECTOR,
                "div.globalSearch__globalSearchInputContainer__35Wld"
            )
            
            # Find the input element directly using its data-testid
            search_input = input_container.find_element(
                By.CSS_SELECTOR,
                "input[data-testid='globalSearch-searchInputButton-eUX6nl19']"
            )
            
            # Make sure search input is visible
            if not search_input.is_displayed():
                # Try to find and click the expander
                expander = search_wrapper.find_element(
                    By.CSS_SELECTOR,
                    "div.globalSearch__searchExpander__2jpEM"
                )
                driver.execute_script("arguments[0].click();", expander)
                time.sleep(2)
            
            # Clear and enter search term
            search_input.clear()
            search_input.send_keys(search_term)
            logging.info(f"Entered search term: {search_term}")
            print(f"Entered search term: {search_term}")
            
            time.sleep(2)
            
            # Wait for and click the "SEE ALL" link
            see_all_link = wait.until(
                EC.element_to_be_clickable((
                    By.XPATH,
                    "//a[contains(., 'SEE ALL')]"
                ))
            )
            
            see_all_link.click()
            logging.info("Clicked 'SEE ALL' link")
            print("Clicked 'SEE ALL' link")
        
            # After clicking "SEE ALL" and letting results load
            time.sleep(3)
            
            # Wait for and click the first result
            try:
                first_result = wait.until(
                    EC.element_to_be_clickable((
                        By.CSS_SELECTOR,
                        "div.search__cardContainer__1Z9Ee > a"
                    ))
                )
                
                logging.info("Found first search result, attempting to click...")
                print("Found first search result, attempting to click...")
                
                # Get the href for logging
                profile_link = first_result.get_attribute('href')
                logging.info(f"Clicking profile link: {profile_link}")
                print(f"Clicking profile link: {profile_link}")
                
                # Try JavaScript click first as it's more reliable
                driver.execute_script("arguments[0].click();", first_result)
                
                # Wait for the profile page to load
                time.sleep(3)
                
                logging.info("Clicked first search result successfully")
                print("Clicked first search result successfully")
                
                navigate_to_stats(driver, wait)
                
            except Exception as e:
                logging.error(f"Error clicking first search result: {str(e)}")
                print(f"Error clicking first search result: {str(e)}")
                driver.save_screenshot("error_clicking_result.png")
                raise
                
        except Exception as e:
            logging.error(f"Error during search: {str(e)}")
            print(f"Error during search: {str(e)}")
            # Log the current state
            logging.info("Current page state:")
            logging.info(driver.page_source)
            driver.save_screenshot("search_error.png")
            raise
            
    except Exception as e:
        logging.error(f"Error in perform_search: {str(e)}")
        print(f"Error in perform_search: {str(e)}")
        driver.save_screenshot("search_error_final.png")
        raise

def navigate_to_stats(driver, wait):
    """
    Clicks the Stats tab button on a player's profile page.
    
    Args:
        driver: Selenium WebDriver instance
        wait: WebDriverWait instance
    """
    try:
        # Wait for and find the Stats tab button
        stats_button = wait.until(
            EC.element_to_be_clickable((
                By.XPATH,
                "//button[contains(@class, 'btn-tab')][.//div[contains(text(), 'Stats')]]"
            ))
        )
        
        logging.info("Found Stats tab button, attempting to click...")
        print("Found Stats tab button, attempting to click...")
        
        # Try regular click first
        try:
            stats_button.click()
        except:
            # If regular click fails, try JavaScript click
            driver.execute_script("arguments[0].click();", stats_button)
        
        # Wait for stats content to load
        time.sleep(3)
        
        logging.info("Successfully navigated to Stats tab")
        print("Successfully navigated to Stats tab")
        
    except Exception as e:
        logging.error(f"Error navigating to Stats tab: {str(e)}")
        print(f"Error navigating to Stats tab: {str(e)}")
        driver.save_screenshot("error_stats_navigation.png")
        raise


def main():
   """Main function to execute the scraping workflow."""
   base_url = "https://app.utrsports.net"

   try:
       # Navigate and log
       driver.get(login_url)
       logging.info("Navigated to login page")
       print("Navigated to login page")
       time.sleep(3)

       # Fill form fields with logging
       try:
           email_field = wait.until(EC.presence_of_element_located((By.ID, "emailInput")))
           email_field.clear()
           email_field.send_keys(email)
           logging.info("Entered email")
           print("Entered email")

           password_field = driver.find_element(By.ID, "passwordInput")
           password_field.clear()
           password_field.send_keys(password)
           logging.info("Entered password") 
           print("Entered password")
       except Exception as e:
           logging.error(f"Error filling form fields: {e}")
           driver.save_screenshot("form_field_error.png")
           raise

       # Submit form with logging
       try:
           logging.info("Attempting form submission...")
           print("Attempting form submission...")
           
           driver.execute_script("""
               var form = document.querySelector('form');
               var event = new Event('submit', { bubbles: true, cancelable: true });
               form.dispatchEvent(event);
               if (!event.defaultPrevented) {
                   form.submit();
               }
           """)
           time.sleep(3)
           
           logging.info(f"Current URL after submission: {driver.current_url}")
           print(f"Current URL after submission: {driver.current_url}")
       except Exception as e:
           logging.error(f"Error submitting form: {e}")
           driver.save_screenshot("form_submit_error.png")
           raise

       # Handle continue button with logging
       try:
           continue_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[text()='Continue']")))
           continue_button.click()
           logging.info("Clicked continue button")
           print("Clicked continue button")
           time.sleep(3)
       except Exception as e:
           logging.error(f"Error clicking continue: {e}")
           driver.save_screenshot("continue_error.png")
           raise

       # Navigate to stats and search with logging
       try:
           driver.get(stats_url)
           logging.info(f"Navigated to stats page: {stats_url}")
           print(f"Navigated to stats page: {stats_url}")
           time.sleep(5)

           search_term = "Jannik Sinner"
           perform_search(driver, wait, search_term)
           time.sleep(3)
       except Exception as e:
           logging.error(f"Error in search workflow: {e}")
           driver.save_screenshot("search_error.png")
           raise

   except Exception as e:
       logging.error(f"Main error: {e}")
       driver.save_screenshot("main_error.png")
       raise
   finally:
       driver.quit()
       logging.info("Driver closed")
       print("Driver closed")

if __name__ == "__main__":
   main()



Navigated to login page
Entered email
Entered password
Attempting form submission...
Current URL after submission: https://app.utrsports.net/login
Driver closed


TimeoutException: Message: 
Stacktrace:
0   chromedriver                        0x0000000104607ac4 cxxbridge1$str$ptr + 3651580
1   chromedriver                        0x0000000104600314 cxxbridge1$str$ptr + 3620940
2   chromedriver                        0x00000001040684b4 cxxbridge1$string$len + 89224
3   chromedriver                        0x00000001040ac898 cxxbridge1$string$len + 368748
4   chromedriver                        0x00000001040e60fc cxxbridge1$string$len + 604368
5   chromedriver                        0x00000001040a10b0 cxxbridge1$string$len + 321668
6   chromedriver                        0x00000001040a1d00 cxxbridge1$string$len + 324820
7   chromedriver                        0x00000001045d2e08 cxxbridge1$str$ptr + 3435328
8   chromedriver                        0x00000001045d6120 cxxbridge1$str$ptr + 3448408
9   chromedriver                        0x00000001045ba17c cxxbridge1$str$ptr + 3333812
10  chromedriver                        0x00000001045d69e0 cxxbridge1$str$ptr + 3450648
11  chromedriver                        0x00000001045ab988 cxxbridge1$str$ptr + 3274432
12  chromedriver                        0x00000001045f10f4 cxxbridge1$str$ptr + 3558956
13  chromedriver                        0x00000001045f1270 cxxbridge1$str$ptr + 3559336
14  chromedriver                        0x00000001045fff88 cxxbridge1$str$ptr + 3620032
15  libsystem_pthread.dylib             0x000000019fa6df94 _pthread_start + 136
16  libsystem_pthread.dylib             0x000000019fa68d34 thread_start + 8


test api

In [38]:
import requests
import logging

logging.basicConfig(level=logging.DEBUG)
# Load environment variables
load_dotenv()

# Retrieve credentials
email = os.getenv('UTR_EMAIL')
password = os.getenv('UTR_PASSWORD')

def test_utr_api():
    url = "https://app.universaltennis.com/api/v1/auth/login"
    headers = {"Content-Type": "application/json"}
    data = {
        "email": os.getenv('UTR_EMAIL'),
        "password": os.getenv('UTR_PASSWORD')
    }
    
    response = requests.post(url, json=data, headers=headers)
    print(f"Status Code: {response.status_code}")
    print(f"Response Headers: {dict(response.headers)}")
    print(f"Response Body: {response.text}")
    
    return response

test_utr_api()

Status Code: 404
Response Headers: {'Date': 'Thu, 28 Nov 2024 15:23:41 GMT', 'Content-Type': 'application/octet-stream', 'Content-Length': '0', 'Connection': 'keep-alive', 'Access-Control-Allow-Headers': 'newrelic', 'ETag': 'W/"0-2jmj7l5rSw0yVb/vlWAYkK/YBwk"', 'X-Powered-By': 'ASP.NET', 'x-azure-ref': '20241128T152341Z-166fdcf7d95crzfmhC1LAXn9q4000000037000000000cazx', 'X-Cache': 'CONFIG_NOCACHE'}
Response Body: 


<Response [404]>

In [41]:
import requests
import json
from datetime import datetime

def test_utr_api():
    player_id = 247320
    url = f"https://api.utrsports.net/v4/player/{player_id}/all-stats"
    params = {
        'type': 'singles',
        'resultType': 'verified',
        'months': 12,
        'fetchAllResults': 'false'
    }
    
    try:
        response = requests.get(url, params=params)
        print(f"Status Code: {response.status_code}")
        print("\nResponse Headers:")
        print(json.dumps(dict(response.headers), indent=2))
        
        if response.ok:
            print("\nResponse Data:")
            print(json.dumps(response.json(), indent=2))
        else:
            print(f"\nError Response: {response.text}")
            
    except Exception as e:
        print(f"Error: {str(e)}")

if __name__ == "__main__":
    test_utr_api()

Status Code: 400

Response Headers:
{
  "Date": "Thu, 28 Nov 2024 15:34:12 GMT",
  "Transfer-Encoding": "chunked",
  "Connection": "keep-alive",
  "Access-Control-Allow-Headers": "newrelic",
  "X-Powered-By": "ASP.NET",
  "x-azure-ref": "20241128T153412Z-166fdcf7d9582bqrhC1LAX06r0000000030g000000007tpx",
  "X-Cache": "CONFIG_NOCACHE"
}

Error Response: Token is missing


In [121]:
import requests

def get_utr_stats(player_id=247320):
    url = f"https://api.utrsports.net/v4/player/{player_id}/all-stats"
    params = {
        'type': 'singles',
        'resultType': 'verified',
        'months': 12,
        'fetchAllResults': 'false'
    }
    headers = {
        'Authorization': 'Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJNZW1iZXJJZCI6IjE1Mjg2ODIiLCJlbWFpbCI6Im5laWwudHJlYXRAZ21haWwuY29tIiwiVmVyc2lvbiI6IjEiLCJEZXZpY2VMb2dpbklkIjoiMjE4ODI1NDQiLCJuYmYiOjE3MzE4NjY4MTIsImV4cCI6MTczNDQ1ODgxMiwiaWF0IjoxNzMxODY2ODEyfQ.zUh9Lvwzu6PUgTGe9Yr3sX8LQFGWKBsqxlPH7BavO-U',
        'newrelic': 'eyJ2IjpbMCwxXSwiZCI6eyJ0eSI6IkJyb3dzZXIiLCJhYyI6IjMwMjgxMjMiLCJhcCI6IjUzOTYzMzgzOCIsImlkIjoiODM5Nzg0NTY2NTcxMWFkOSIsInRyIjoiYzc2ZmI0YWMzNWE3ZjE4ZmE5MWMyNTAwMGFlNjZmNzAiLCJ0aSI6MTczMjgwODIxMTkyM319',
        'origin': 'https://app.utrsports.net',
        'referer': 'https://app.utrsports.net/',
        'x-client-name': 'buildId - 96461'
    }
    
    response = requests.get(url, params=params, headers=headers)
    return response

response = get_utr_stats()
print(f"Status: {response.status_code}")
print(f"Response: {response.text}")

Status: 200
Response: {
  "title": "verified singles stats - Last 12 Months",
  "subtitle": "December 2023 - November 2024",
  "monthsCount": 12,
  "currentRating": 16.31,
  "currentRatingDisplay": "16.31",
  "ratingCircle": {
    "singlesUtr": 16.31,
    "ratingStatusSingles": "Rated",
    "ratingProgressSingles": 100.0,
    "singlesUtrDisplay": "16.31"
  },
  "winsCount": 29,
  "winStreak": 29,
  "lossesCount": 1,
  "recordWinPercentage": "96.7%",
  "notableMatch": {
    "winner": {
      "isWinner": true,
      "set1": 7,
      "set2": 5,
      "set3": 7,
      "set4": 0,
      "set5": 0,
      "set6": null,
      "tiebreakerSet1": 11,
      "tiebreakerSet2": null,
      "tiebreakerSet3": 7,
      "tiebreakerSet4": null,
      "tiebreakerSet5": null,
      "tiebreakerSet6": null,
      "teamId": null,
      "teamName": null
    },
    "loser": {
      "isWinner": false,
      "set1": 6,
      "set2": 7,
      "set3": 6,
      "set4": 0,
      "set5": 0,
      "set6": null,
      "ti

In [133]:
import requests

def search_utr_players(query, top=10, skip=0):
    url = "https://api.utrsports.net/v2/search"
    params = {
        'schoolClubSearch': 'true',
        'query': query,
        'top': top,
        'skip': skip
    }
    headers = {
        'Authorization': 'Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJNZW1iZXJJZCI6IjE1Mjg2ODIiLCJlbWFpbCI6Im5laWwudHJlYXRAZ21haWwuY29tIiwiVmVyc2lvbiI6IjEiLCJEZXZpY2VMb2dpbklkIjoiMjE4ODI1NDQiLCJuYmYiOjE3MzE4NjY4MTIsImV4cCI6MTczNDQ1ODgxMiwiaWF0IjoxNzMxODY2ODEyfQ.zUh9Lvwzu6PUgTGe9Yr3sX8LQFGWKBsqxlPH7BavO-U',
        'newrelic': 'eyJ2IjpbMCwxXSwiZCI6eyJ0eSI6IkJyb3dzZXIiLCJhYyI6IjMwMjgxMjMiLCJhcCI6IjUzOTYzMzgzOCIsImlkIjoiODM5Nzg0NTY2NTcxMWFkOSIsInRyIjoiYzc2ZmI0YWMzNWE3ZjE4ZmE5MWMyNTAwMGFlNjZmNzAiLCJ0aSI6MTczMjgwODIxMTkyM319',
        'origin': 'https://app.utrsports.net',
        'referer': 'https://app.utrsports.net/',
        'x-client-name': 'buildId - 96461'
    }
    
    response = requests.get(url, params=params, headers=headers)
    return response

# Example usage
response = search_utr_players("Carlos Alcaraz")
print(f"Status: {response.status_code}")
print(f"Response: {response.text}")

Status: 200
Response: {"players":{"hits":[{"fields":null,"source":{"showDecimals":null,"id":3569175,"profileId":778697,"firstName":"Carlos","lastName":"Alcaraz","playerFirstName":"Carlos","playerLastName":"Alcaraz","displayName":"Carlos Alcaraz","gender":"Male","age":null,"threeMonthRating":16.14,"singlesUtr":16.23,"ratingStatusSingles":"Rated","ratingProgressSingles":100.0,"doublesUtr":14.91,"myUtrSingles":16.23,"myUtrStatusSingles":"Rated","myUtrProgressSingles":100.0,"myUtrDoubles":14.47,"finalPbr":null,"singlesPkbRating":null,"singlesPkbReliability":null,"doublesPkbRating":null,"doublesPkbReliability":null,"unverifiedSinglesPkbRating":null,"unverifiedSinglesPkbReliability":null,"unverifiedDoublesPkbRating":null,"unverifiedDoublesPkbReliability":null,"historicPkbSinglesRating":null,"historicPkbSinglesRatingDate":null,"historicPkbDoublesRating":null,"historicPkbDoublesRatingDate":null,"birthDate":null,"singlesUtrDisplay":"16.23","doublesUtrDisplay":"14.91","myUtrSinglesDisplay":"16.2

In [126]:
response.status_code

200

In [92]:
import requests
import jwt
import time
from datetime import datetime
import os
from dotenv import load_dotenv

class UTRAuthManager:
    def __init__(self):
        load_dotenv()
        self.email = os.getenv('UTR_EMAIL')
        self.password = os.getenv('UTR_PASSWORD')
        self.token = None
        self.newrelic = None
        self.session = requests.Session()

    def login(self):
        url = "https://app.utrsports.net/api/v1/auth/login"
        response = self.session.post(url, json={
            'email': self.email,
            'password': self.password
        })
        response.raise_for_status()
        self.token = response.headers.get('Authorization')
        self.newrelic = response.headers.get('newrelic')

    def get_headers(self):
        if not self.token or self._is_token_expired():
            self.login()
            
        return {
            'Authorization': self.token,
            'newrelic': self.newrelic,
            'origin': 'https://app.utrsports.net',
            'referer': 'https://app.utrsports.net/',
            'x-client-name': 'buildId - 96461'
        }

    def _is_token_expired(self):
        if not self.token:
            return True
        try:
            token = self.token.split(' ')[1]
            payload = jwt.decode(token, options={"verify_signature": False})
            return datetime.fromtimestamp(payload['exp']) <= datetime.now()
        except:
            return True

    def get_player_stats(self, player_id):
        headers = self.get_headers()
        url = f"https://api.utrsports.net/v4/player/{player_id}/all-stats"
        params = {
            'type': 'singles',
            'resultType': 'verified',
            'months': 12,
            'fetchAllResults': 'true'
        }
        response = self.session.get(url, headers=headers, params=params)
        response.raise_for_status()
        return response.json()

In [116]:
auth_manager = UTRAuthManager()
stats = auth_manager.get_player_stats(247320)

In [None]:
stats

In [61]:
import pandas as pd

# Convert the dictionary into a pandas DataFrame
df = pd.DataFrame([stats])

In [119]:
stats['extendedRatingProfile']['history']

[{'id': 483184296,
  'ratingStatus': 3,
  'rating': 16.26,
  'ratingDisplay': '16.26',
  'type': 'WeeklyAverage_Singles',
  'date': '2024-11-25T00:00:00'},
 {'id': 482274595,
  'ratingStatus': 3,
  'rating': 16.24,
  'ratingDisplay': '16.23',
  'type': 'WeeklyAverage_Singles',
  'date': '2024-11-18T00:00:00'},
 {'id': 481328517,
  'ratingStatus': 3,
  'rating': 16.21,
  'ratingDisplay': '16.21',
  'type': 'WeeklyAverage_Singles',
  'date': '2024-11-11T00:00:00'},
 {'id': 480422788,
  'ratingStatus': 3,
  'rating': 16.19,
  'ratingDisplay': '16.19',
  'type': 'WeeklyAverage_Singles',
  'date': '2024-11-04T00:00:00'},
 {'id': 479639991,
  'ratingStatus': 3,
  'rating': 16.12,
  'ratingDisplay': '16.12',
  'type': 'WeeklyAverage_Singles',
  'date': '2024-10-28T00:00:00'},
 {'id': 478986416,
  'ratingStatus': 3,
  'rating': 16.08,
  'ratingDisplay': '16.07',
  'type': 'WeeklyAverage_Singles',
  'date': '2024-10-21T00:00:00'},
 {'id': 477817403,
  'ratingStatus': 3,
  'rating': 16.05,
  'ra

In [114]:
import pandas as pd

# Assuming the provided data is stored in a variable called 'data'

# Create an empty list to store processed data
processed_data = []

# Iterate through each entry in the data
for entry in stats['victoryMarginChart']['results']:
    # Extract the 'descriptions' list
    descriptions = entry.pop('descriptions')

    # Iterate through each description and create a new row
    for description in descriptions:
        # Create a copy of the original entry
        new_row = entry.copy()

        # Update the new row with the description details
        new_row.update(description)

        # Append the new row to the processed data list
        processed_data.append(new_row)

# Create a DataFrame from the processed data
df = pd.DataFrame(processed_data)

In [115]:
df

Unnamed: 0,index,rating,playerPostedScore,aggregate,details,resultId,winnerSets,loserSets,tiebreakSets,resultDate,winner1,winner2,loser1,loser2,eventName,eventLocation,isWinner
0,75,13.85,False,"{'count': 1, 'winsCount': 1, 'lossesCount': 0,...","Wed Jan 10 Win vs. M. Polmans (13.86) 6-4, 6-0",43198569,"[6, 6, 0, 0, 0]","[4, 0, 0, 0, 0]","[-1, -1, -1, -1, -1]",2024-01-10T00:00:00,J.Sinner,,M.Polmans,,Exhibition Kooyong Classic,,True
1,60,14.95,False,"{'count': 1, 'winsCount': 1, 'lossesCount': 0,...",Sun Jan 14 Win vs. B. Van De Zandschulp (14.95...,43236849,"[6, 7, 6, 0, 0]","[4, 5, 3, 0, 0]","[-1, -1, -1, -1, -1]",2024-01-14T01:00:00,J.Sinner,,B.VanDeZandschulp,,Australian Open,,True
2,75,14.75,False,"{'count': 1, 'winsCount': 1, 'lossesCount': 0,...","Wed Jan 17 Win vs. J. De Jong (14.75) 6-2, 6-2...",43266492,"[6, 6, 6, 0, 0]","[2, 2, 2, 0, 0]","[-1, -1, -1, -1, -1]",2024-01-17T01:00:00,J.Sinner,,J.DeJong,,Australian Open,,True
3,81,15.20,False,"{'count': 2, 'winsCount': 2, 'lossesCount': 0,...","Fri Jan 19 Win vs. S. Baez (15.22) 6-0, 6-1, 6-3",43299847,"[6, 6, 6, 0, 0]","[0, 1, 3, 0, 0]","[-1, -1, -1, -1, -1]",2024-01-19T01:00:00,J.Sinner,,S.Baez,,Australian Open,,True
4,81,15.20,False,"{'count': 2, 'winsCount': 2, 'lossesCount': 0,...","Thu Nov 21 Win vs. S. Baez (15.22) 6-2, 6-1",49920112,"[6, 6, 0, 0, 0]","[2, 1, 0, 0, 0]","[-1, -1, 0, 0, 0]",2024-11-21T00:00:00,J.Sinner,,S.Baez,,Finals Quarter-final - Italy vs. Argentina,,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,57,15.75,False,"{'count': 1, 'winsCount': 1, 'lossesCount': 0,...","Wed Oct 9 Win vs. B. Shelton (15.74) 6-4, 7-6(1)",48470935,"[6, 7, 0, 0, 0]","[4, 6, 0, 0, 0]","[-1, 1, -1, -1, -1]",2024-10-09T04:30:00,J.Sinner,,B.Shelton,,"ATP Shanghai, China",,True
76,72,15.85,False,"{'count': 1, 'winsCount': 1, 'lossesCount': 0,...","Thu Oct 10 Win vs. D. Medvedev (15.83) 6-1, 6-4",48482587,"[6, 6, 0, 0, 0]","[1, 4, 0, 0, 0]","[-1, -1, -1, -1, -1]",2024-10-10T07:00:00,J.Sinner,,D.Medvedev,,"ATP Shanghai, China",,True
77,63,15.90,False,"{'count': 2, 'winsCount': 2, 'lossesCount': 0,...","Sun Nov 10 Win vs. A. De Minaur (15.88) 6-3, 6-4",49338030,"[6, 6, 0, 0, 0]","[3, 4, 0, 0, 0]","[-1, -1, -1, -1, -1]",2024-11-10T19:30:00,J.Sinner,,A.DeMinaur,,ATP World Tour Finals,,True
78,63,15.90,False,"{'count': 2, 'winsCount': 2, 'lossesCount': 0,...","Sat Nov 23 Win vs. A. De Minaur (15.88) 6-3, 6-4",49920107,"[6, 6, 0, 0, 0]","[3, 4, 0, 0, 0]","[-1, -1, 0, 0, 0]",2024-11-23T00:00:00,J.Sinner,,A.DeMinaur,,Finals Semi-final - Italy vs. Australia,,True


In [113]:
stats['victoryMarginChart']['results']

[{'index': 75,
  'rating': 13.85,
  'playerPostedScore': False,
  'aggregate': {'count': 1, 'winsCount': 1, 'lossesCount': 0, 'tiesCount': 0},
  'descriptions': [{'details': 'Wed Jan 10 Win vs. M. Polmans (13.86) 6-4, 6-0',
    'resultId': 43198569,
    'winnerSets': [6, 6, 0, 0, 0],
    'loserSets': [4, 0, 0, 0, 0],
    'tiebreakSets': [-1, -1, -1, -1, -1],
    'resultDate': '2024-01-10T00:00:00',
    'winner1': 'J.Sinner',
    'winner2': None,
    'loser1': 'M.Polmans',
    'loser2': None,
    'eventName': 'Exhibition Kooyong Classic',
    'eventLocation': None,
    'isWinner': True}]},
 {'index': 60,
  'rating': 14.95,
  'playerPostedScore': False,
  'aggregate': {'count': 1, 'winsCount': 1, 'lossesCount': 0, 'tiesCount': 0},
  'descriptions': [{'details': 'Sun Jan 14 Win vs. B. Van De Zandschulp (14.95) 6-4, 7-5, 6-3',
    'resultId': 43236849,
    'winnerSets': [6, 7, 6, 0, 0],
    'loserSets': [4, 5, 3, 0, 0],
    'tiebreakSets': [-1, -1, -1, -1, -1],
    'resultDate': '2024-01-1

In [None]:
stats.get('', {})

{}

In [99]:
stats.get('extendedRatingProfile', {}).get('history', [])

[{'id': 483184296,
  'ratingStatus': 3,
  'rating': 16.26,
  'ratingDisplay': '16.26',
  'type': 'WeeklyAverage_Singles',
  'date': '2024-11-25T00:00:00'},
 {'id': 482274595,
  'ratingStatus': 3,
  'rating': 16.24,
  'ratingDisplay': '16.23',
  'type': 'WeeklyAverage_Singles',
  'date': '2024-11-18T00:00:00'},
 {'id': 481328517,
  'ratingStatus': 3,
  'rating': 16.21,
  'ratingDisplay': '16.21',
  'type': 'WeeklyAverage_Singles',
  'date': '2024-11-11T00:00:00'},
 {'id': 480422788,
  'ratingStatus': 3,
  'rating': 16.19,
  'ratingDisplay': '16.19',
  'type': 'WeeklyAverage_Singles',
  'date': '2024-11-04T00:00:00'},
 {'id': 479639991,
  'ratingStatus': 3,
  'rating': 16.12,
  'ratingDisplay': '16.12',
  'type': 'WeeklyAverage_Singles',
  'date': '2024-10-28T00:00:00'},
 {'id': 478986416,
  'ratingStatus': 3,
  'rating': 16.08,
  'ratingDisplay': '16.07',
  'type': 'WeeklyAverage_Singles',
  'date': '2024-10-21T00:00:00'},
 {'id': 477817403,
  'ratingStatus': 3,
  'rating': 16.05,
  'ra

In [97]:
# Extract the 'history' list
history_data = stats['extendedRatingProfile']['history']

# Create a DataFrame from the 'history' list
history_df = pd.DataFrame(history_data)

In [98]:
history_df

Unnamed: 0,id,ratingStatus,rating,ratingDisplay,type,date
0,483184296,3,16.26,16.26,WeeklyAverage_Singles,2024-11-25T00:00:00
1,482274595,3,16.24,16.23,WeeklyAverage_Singles,2024-11-18T00:00:00
2,481328517,3,16.21,16.21,WeeklyAverage_Singles,2024-11-11T00:00:00
3,480422788,3,16.19,16.19,WeeklyAverage_Singles,2024-11-04T00:00:00
4,479639991,3,16.12,16.12,WeeklyAverage_Singles,2024-10-28T00:00:00
...,...,...,...,...,...,...
432,199985,3,11.12,11.12,WeeklyAverage_Singles,2016-07-25T00:00:00
433,173049,3,11.06,11.06,WeeklyAverage_Singles,2016-07-18T00:00:00
434,128092,3,11.08,11.08,WeeklyAverage_Singles,2016-07-11T00:00:00
435,83105,3,11.10,11.10,WeeklyAverage_Singles,2016-07-04T00:00:00


In [64]:
# Extract the 'ratingTrendChart' dictionary
rating_trend_chart_data = stats['ratingTrendChart']

# Create a DataFrame for the 'months' data
months_df = pd.DataFrame(rating_trend_chart_data['months'])

# Further separate the nested 'ratings' data in each month
ratings_df = months_df.explode('ratings')['ratings'].apply(pd.Series)

# Concatenate the exploded ratings data with the corresponding month data
rating_trend_df = pd.concat([months_df, ratings_df], axis=1)

# Extract the 'victoryMarginChart' dictionary
victory_margin_chart_data = stats['victoryMarginChart']

# Create a DataFrame for the 'results' data
victory_margin_df = pd.DataFrame(victory_margin_chart_data['results'])

In [91]:
victory_margin_df['descriptions'][3]

[{'details': 'Sat Aug 17 Win vs. A. Rublev (15.72) 4-6, 7-5, 6-4',
  'resultId': 47481773,
  'winnerSets': [4, 7, 6, 0, 0],
  'loserSets': [6, 5, 4, 0, 0],
  'tiebreakSets': [-1, -1, -1, -1, -1],
  'resultDate': '2024-08-17T17:55:00',
  'winner1': 'J.Sinner',
  'winner2': None,
  'loser1': 'A.Rublev',
  'loser2': None,
  'eventName': 'Cincinnati, USA',
  'eventLocation': None,
  'isWinner': True}]