In [None]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException, NoSuchElementException, WebDriverException
import time

In [None]:
options = Options()
options.add_argument('--disable-gpu')
options.add_argument('--no-sandbox')
options.add_argument('--disable-infobars')  
options.add_argument('--disable-extensions')
options.add_argument('--disable-notifications')

service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

In [None]:
def dismiss_cookies(driver, timeout=8):
    wait = WebDriverWait(driver, timeout)
    tried = []

    def _visible(el):
        try:
            return el.is_displayed() and el.is_enabled()
        except Exception:
            return False

    # 0) Give the banner a second to mount
    driver.execute_script("window._probe = Date.now();")
    try:
        wait.until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, "body")))
    except TimeoutException:
        pass

    # 1) Direct hit: common OneTrust IDs/classes
    candidates = [
        (By.ID, "onetrust-accept-btn-handler"),
        (By.CSS_SELECTOR, "button#onetrust-accept-btn-handler"),
        (By.CSS_SELECTOR,
         "#onetrust-banner-sdk button#onetrust-accept-btn-handler"),
        (By.CSS_SELECTOR, "button#onetrust-reject-all-handler"
         ),  # sometimes only Reject is visible first
        (By.CSS_SELECTOR, "[data-testid='onetrust-accept-btn-handler']"),
        (By.XPATH,
         "//button[contains(@id,'accept') and contains(translate(., 'ACEPT','acept'),'accept')]"
         ),
        (By.XPATH,
         "//button[contains(@aria-label,'Accept') or contains(normalize-space(.),'Accept')]"
         ),
    ]
    for how, what in candidates:
        tried.append(f"{how}={what}")
        try:
            btn = driver.find_element(how, what)
            if _visible(btn):
                btn.click()
                return True
        except NoSuchElementException:
            continue
        except WebDriverException:
            # Try JS click if the element exists but normal click fails
            try:
                driver.execute_script("arguments[0].click();", btn)
                return True
            except Exception:
                continue

    # 2) If not found, check for a banner container (present but hidden/animating)
    try:
        banner = driver.find_element(By.ID, "onetrust-banner-sdk")
        if _visible(banner):
            try:
                btn = banner.find_element(By.CSS_SELECTOR,
                                          "button[id*='accept']")
                driver.execute_script("arguments[0].click();", btn)
                return True
            except Exception:
                pass
    except NoSuchElementException:
        pass

    # 3)Scan iframes and try inside.
    iframes = driver.find_elements(By.TAG_NAME, "iframe")
    for i, frame in enumerate(iframes):
        # quick filter to avoid costly switches
        src = (frame.get_attribute("src") or "").lower()
        name = (frame.get_attribute("name") or "").lower()
        if any(k in src + name
               for k in ("consent", "onetrust", "privacy", "cookie")):
            tried.append(f"iframe[{i}] src={src or name}")
            try:
                driver.switch_to.frame(frame)
                # try common selectors again inside this frame
                for how, what in candidates:
                    try:
                        btn = WebDriverWait(driver, 2).until(
                            EC.presence_of_element_located((how, what)))
                        if _visible(btn):
                            driver.execute_script(
                                "arguments[0].scrollIntoView({block:'center'});",
                                btn)
                            try:
                                btn.click()
                            except Exception:
                                driver.execute_script("arguments[0].click();",
                                                      btn)
                            driver.switch_to.default_content()
                            return True
                    except Exception:
                        continue
                driver.switch_to.default_content()
            except Exception:
                # ensure we’re back
                try:
                    driver.switch_to.default_content()
                except:
                    pass

    # 4) Last resort: call OneTrust API if it exists, or remove the banner to unblock clicks
    try:
        ok = driver.execute_script("""
            if (window.OneTrust && OneTrust.AcceptAll) { OneTrust.AcceptAll(); return true; }
            const b = document.getElementById('onetrust-banner-sdk');
            if (b) { b.remove(); return 'removed'; }
            return false;
        """)
        if ok:
            return True
    except Exception:
        pass

    print("[cookies] Could not find/close cookie banner. Tried:",
          *tried,
          sep="\n - ")
    return False


In [None]:
url = 'https://www.mlssoccer.com/schedule/scores#competition=MLS-COM-000001&club=all'

driver.get(url)
wait = WebDriverWait(driver, 10)

try:
    dismiss_cookies(driver, timeout=8)
except:
    print("Cookie button not found or already clicked.")

rounds = 1000000
stop_date = "Tuesday Mar 26, 2011"
matches = []

for i in range(rounds):
    previous_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[@aria-label='Previous results']")))

    try:
        time.sleep(5)

        matches_table = wait.until(EC.presence_of_element_located((By.XPATH, '//div[@class="mls-c-schedule__matches"]')))
        if not matches_table:
            print("No matches table found on this page.")
            previous_button.click()
            continue

        for date in matches_table.find_elements(By.CSS_SELECTOR, '.sc-GKYbw.bMOKWx'):
            date_element = date.find_element(By.CSS_SELECTOR, '.sc-hLBbgP.gIKMo').text.strip()

            if date_element == stop_date:
                break

            for match in date.find_elements(By.TAG_NAME, 'a'):

                home_abbr_el = match.find_element(
                    By.XPATH,
                    ".//div[contains(@class,'mls-c-club') and contains(@class,'--home')]"
                    "//span[contains(@class,'mls-c-club__abbreviation')]"
                )

                away_abbr_el = match.find_element(
                    By.XPATH,
                    ".//div[contains(@class,'mls-c-club') and contains(@class,'--away')]"
                    "//span[contains(@class,'mls-c-club__abbreviation')]"
                )
                
                home_team = home_abbr_el.get_attribute("textContent").strip()
                away_team = away_abbr_el.get_attribute("textContent").strip()

                home_score = match.find_elements(By.CSS_SELECTOR, '.mls-c-scorebug__score')[0].text.strip()
                away_score = match.find_elements(By.CSS_SELECTOR, '.mls-c-scorebug__score')[1].text.strip()

                print(f"Match found: {date_element} - {home_team} vs {away_team} ({home_score}:{away_score})")
                matches.append({
                    "date": date_element,
                    "home_team": home_team,
                    "away_team": away_team,
                    "home_score": home_score,
                    "away_score": away_score
                })


        previous_button.click()

    except Exception as e:
        print(f"Error occurred: {e}")
        previous_button.click()
print(f"Total unique matches collected: {len(matches)}")

driver.quit()

In [None]:
matches_df = pd.DataFrame(matches)

matches_df.to_csv('matches_past.csv', index=False)