In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import csv
import time

MATCH_URL = "https://www.cagematch.net/?id=112&view=search&sParticipant1=&sParticipant2=&sParticipant3=&sParticipant4=&sEventName=&sEventType=Pay+Per+View%7CPremium+Live+Event&sDateFromDay=01&sDateFromMonth=01&sDateFromYear=2025&sDateTillDay=31&sDateTillMonth=12&sDateTillYear=2025&sPromotion=1&sLocation=&sArena=&sRegion=&sMatchType=&sConstellation=&sWorkerRelationship=Any&sFulltextSearch="

options = Options()
# options.add_argument("--headless=new")  # Uncomment to run headless
options.add_argument("--window-size=1920,1080")
driver = webdriver.Chrome(options=options)
wait = WebDriverWait(driver, 20)
time.sleep(2)  # Let browser start

def get_match_list():
    matches = []
    page = 1
    driver.get(MATCH_URL)
    while True:
        print(f"Scraping match results page {page}...")
        wait.until(EC.presence_of_element_located((By.CLASS_NAME, "TableContents")))
        soup = BeautifulSoup(driver.page_source, "html.parser")
        table_div = soup.find("div", class_="TableContents")
        if not table_div:
            print(f"No match table found on page {page}.")
            break

        rows = table_div.find_all("tr")
        if not rows:
            rows = table_div.find_all("div", recursive=False)
        if not rows:
            print(f"No match rows found on page {page}.")
            break

        for row in rows[1:]:  # Skip header row
            cols = row.find_all("td")
            if len(cols) < 4:
                continue
            match_number = cols[0].get_text(strip=True)
            date = cols[1].get_text(strip=True)
            promotion_img = cols[2].find("img")
            promotion = promotion_img['title'] if promotion_img and 'title' in promotion_img.attrs else ''
            match_cell = cols[3]

            # --- NEW: Extract MatchCard, MatchType, and MatchEventLine ---
            match_desc = match_cell.find("span", class_="MatchCard")
            match_type_span = match_cell.find("span", class_="MatchType")
            event_line = match_cell.find("div", class_="MatchEventLine")

            match_text = match_desc.get_text(" ", strip=True) if match_desc else match_cell.get_text(" ", strip=True)
            match_type = match_type_span.get_text(" ", strip=True) if match_type_span else ""
            event_text = event_line.get_text(" ", strip=True) if event_line else ""
            # ------------------------------------------------------------

            matches.append({
                "Match #": match_number,
                "Date": date,
                "Promotion": promotion,
                "Match": match_text,
                "MatchType": match_type,
                "Event/Location": event_text
            })

        # Pagination: Try to click the next page number (just like your event code)
        try:
            next_button = driver.find_element(By.LINK_TEXT, str(page + 1))
            driver.execute_script("arguments[0].scrollIntoView();", next_button)
            time.sleep(1)
            next_button.click()
            page += 1
            wait.until(EC.presence_of_element_located((By.CLASS_NAME, "TableContents")))
            time.sleep(1)
        except Exception:
            print("No more pages or could not find the next page number button.")
            break
    return matches

# --- MAIN SCRIPT ---

try:
    matches = get_match_list()
    print(f"Total matches scraped: {len(matches)}")

    with open("wwe_matches_ple_2025.csv", "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(
            f,
            fieldnames=["Match #", "Date", "Promotion", "Match", "MatchType", "Event/Location"]
        )
        writer.writeheader()
        for match in matches:
            writer.writerow(match)

    print("Done! Data saved to wwe_matches_ple_2025.csv.")

except Exception as e:
    print(f"Fatal error: {e}")

finally:
    driver.quit()


Scraping match results page 1...
No more pages or could not find the next page number button.
Total matches scraped: 50
Done! Data saved to wwe_matches_ple_2025.csv.
