In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import csv
import time

BASE_URL = "https://www.cagematch.net"
SEARCH_URL = "https://www.cagematch.net/?id=1&view=search&sEventName=&sPromotion=1&sDateFromDay=01&sDateFromMonth=01&sDateFromYear=2023&sDateTillDay=31&sDateTillMonth=12&sDateTillYear=2025&sRegion=&sEventType=&sLocation=&sArena=&sAny="

options = Options()
# options.add_argument("--headless=new")  # Run with GUI for reliability
options.add_argument("--window-size=1920,1080")
driver = webdriver.Chrome(options=options)
wait = WebDriverWait(driver, 20)
time.sleep(2)  # Let browser start

def get_event_list():
    events = []
    page = 1
    driver.get(SEARCH_URL)
    while True:
        print(f"Scraping search results page {page}...")
        wait.until(EC.presence_of_element_located((By.TAG_NAME, "table")))
        soup = BeautifulSoup(driver.page_source, "html.parser")
        table = soup.find("table")
        if not table:
            print(f"No event table found on page {page}.")
            break
        rows = table.find_all("tr")[1:]  # Skip header
        if not rows:
            print(f"No event rows found on page {page}.")
            break
        for row in rows:
            cols = row.find_all("td")
            if len(cols) < 6:
                continue
            event_name_cell = cols[2]
            links = event_name_cell.find_all("a")
            event_url = ""
            for link in links:
                href = link.get("href", "")
                if "?id=1&nr=" in href:
                    event_url = BASE_URL + href
                    break  # Use only the event link
            if not event_url:
                continue
            events.append({
                "Date": cols[1].get_text(strip=True),
                "Event Name": event_name_cell.get_text(strip=True),
                "Location": cols[3].get_text(strip=True),
                "Rating": cols[4].get_text(strip=True),
                "Votes": cols[5].get_text(strip=True),
                "Event URL": event_url
            })
        # Pagination: Try to click the next page number
        try:
            next_button = driver.find_element(By.LINK_TEXT, str(page + 1))
            driver.execute_script("arguments[0].scrollIntoView();", next_button)
            time.sleep(1)
            next_button.click()
            page += 1
            # Wait for the new table to appear
            wait.until(EC.presence_of_element_located((By.TAG_NAME, "table")))
            time.sleep(1)
        except Exception:
            print("No more pages or could not find the next page number button.")
            break
    return events

def get_event_details(event_url):
    try:
        driver.get(event_url)
        wait.until(EC.presence_of_element_located((By.CLASS_NAME, "InformationBoxTable")))
        time.sleep(2)
        soup = BeautifulSoup(driver.page_source, "html.parser")
        info_box = soup.find("div", class_="InformationBoxTable")
        arena, attendance, event_type, broadcast_type, tv_station = "", "", "", "", ""
        if info_box:
            for row in info_box.find_all("div", class_="InformationBoxRow"):
                label = row.find("div", class_="InformationBoxTitle")
                value = row.find("div", class_="InformationBoxContents")
                if not label or not value:
                    continue
                label_text = label.get_text(strip=True).lower().replace(":", "")
                value_text = value.get_text(strip=True)
                if label_text == "arena":
                    arena = value_text
                elif label_text == "attendance":
                    attendance = value_text
                elif label_text == "type":
                    event_type = value_text
                elif label_text == "broadcast type":
                    broadcast_type = value_text
                elif label_text in ["tv station/network", "tv station / network"]:
                    tv_station = value_text
        return attendance, arena, event_type, broadcast_type, tv_station
    except Exception as e:
        print(f"Error scraping {event_url}: {e}")
        return "", "", "", "", ""

# --- MAIN SCRIPT ---

try:
    events = get_event_list()
    print(f"Found {len(events)} events.")

    for i, event in enumerate(events):
        if event["Event URL"]:
            print(f"[{i+1}/{len(events)}] {event['Event Name']} - {event['Event URL']}")
            attendance, arena, event_type, broadcast_type, tv_station = get_event_details(event["Event URL"])
            event["Attendance"] = attendance
            event["Arena"] = arena
            event["Type"] = event_type
            event["Broadcast type"] = broadcast_type  # Case sensitive
            event["TV station/network"] = tv_station
            time.sleep(1)
        else:
            event["Attendance"] = ""
            event["Arena"] = ""
            event["Type"] = ""
            event["Broadcast type"] = ""
            event["TV station/network"] = ""

    with open("wwe_events_2023_2025.csv", "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(
            f,
            fieldnames=[
                "Date", "Event Name", "Location", "Rating", "Votes",
                "Arena", "Attendance", "Type", "Broadcast type", "TV station/network"
            ]
        )
        writer.writeheader()
        for event in events:
            writer.writerow({
                "Date": event["Date"],
                "Event Name": event["Event Name"],
                "Location": event["Location"],
                "Rating": event["Rating"],
                "Votes": event["Votes"],
                "Arena": event["Arena"],
                "Attendance": event["Attendance"],
                "Type": event["Type"],
                "Broadcast type": event["Broadcast type"],
                "TV station/network": event["TV station/network"]
            })

    print(f"Done! Scraped {len(events)} events with full event details.")

except Exception as e:
    print(f"Fatal error: {e}")

finally:
    driver.quit()


Scraping search results page 1...
Scraping search results page 2...
Scraping search results page 3...
Scraping search results page 4...
Scraping search results page 5...
Scraping search results page 6...
Scraping search results page 7...
Scraping search results page 8...
Scraping search results page 9...
Scraping search results page 10...
Scraping search results page 11...
No more pages or could not find the next page number button.
Found 1099 events.
[1/1099] WWE Survivor Series 2025 - https://www.cagematch.net?id=1&nr=423960
[2/1099] WWE Monday Night RAW #1690 - https://www.cagematch.net?id=1&nr=424646
[3/1099] WWE Crown Jewel 2025 - https://www.cagematch.net?id=1&nr=424644
[4/1099] WWE Friday Night SmackDown #1364 - https://www.cagematch.net?id=1&nr=424645
[5/1099] WWE Monday Night RAW #1684 - https://www.cagematch.net?id=1&nr=418130
[6/1099] WWE Clash In Paris - https://www.cagematch.net?id=1&nr=418129
[7/1099] WWE Friday Night SmackDown #1358 - https://www.cagematch.net?id=1&nr=42