In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from math import nan
from datetime import datetime
import re
import time
import json
import os


In [2]:
# Define a test user-agent string to simulate a browser request
test_ua = 'Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36'

# Initialize Chrome options for the WebDriver
options = Options()

# Uncomment the following line if you want to run the browser in headless mode (no GUI)
# options.add_argument("--headless")

# Set the window size for the browser
options.add_argument("--window-size=1920,1080")

# Set the user-agent to the test user-agent defined above
options.add_argument(f'--user-agent={test_ua}')

# Add additional options to improve stability and compatibility
options.add_argument('--no-sandbox')  # Required for running in some environments
options.add_argument("--disable-extensions")  # Disable extensions to avoid potential conflicts
options.add_argument("user-data-dir=C:\\Users\\eric9\\AppData\\Local\\Google\\Chrome\\User Data\\Default")

# Initialize the Chrome WebDriver with the specified options
driver = webdriver.Chrome(options=options)


In [3]:
def close_prompts():
    """
    Closes any modal that appears and applies ticket filters.
    """
    # Close any modal that appears
    driver.find_element(By.XPATH, '//*[@id="modal-root"]/div/div/div/div[2]/div[3]/button').click()

def apply_ticket_filters():   
    driver.find_element(By.CSS_SELECTOR, "div.sc-1urpwzu-1").click()
    reccomended_filter = driver.find_element(By.XPATH, "//*[@id='stubhub-event-detail-popular-filters']/div/div/div/div[2]/div/div/div/div[2]/div/input")
    if reccomended_filter.get_attribute("value") == "true":
        time.sleep(3)
        reccomended_filter.click()

def click_zones():
    zones_div = driver.find_elements(By.CSS_SELECTOR, 'div.sc-1s9c4ms-2.jFxikH')
    for zone in zones_div:
        if "zones" in zone.text.lower():
            zone.click()
            break
    
def click_checkboxes():
    """
    Clicks each checkbox, unchecks all others, and returns the link each time.
    """
    # Find and click the "Zones" button to expand the filter options

    checkboxes = driver.find_elements(By.CSS_SELECTOR, 'input[type="checkbox"].sc-mhai9k-2.fOnHmE')
    links = []

    for checkbox in checkboxes:
        # Uncheck all checkboxes first
        for cb in checkboxes:
            if cb.is_selected():
                cb.click()
        
        # Click the current checkbox
        checkbox.click()
        
        # Wait for the page to update
        
        # Get the current URL
        links.append(driver.current_url)
    
    return links

# Call the function and store the links



In [4]:
driver.get("https://www.stubhub.ca/coldplay-toronto-tickets-7-7-2025/event/155741566/")

close_prompts()
apply_ticket_filters()
click_zones()
checkbox_links = click_checkboxes()
print(checkbox_links)


['https://www.stubhub.ca/coldplay-toronto-tickets-7-7-2025/event/155741566/?quantity=2&sections=2018738%2C2018741%2C2018739%2C2018745%2C2018740%2C2018747%2C2018742%2C2018746%2C2018743%2C2018744%2C2018737&ticketClasses=595&rows=&seats=&seatTypes=&listingQty=', 'https://www.stubhub.ca/coldplay-toronto-tickets-7-7-2025/event/155741566/?quantity=2&sections=2018752%2C2018753%2C2018748%2C2018750%2C2018749%2C2018751%2C2018754%2C2018763&ticketClasses=596&rows=&seats=&seatTypes=&listingQty=', 'https://www.stubhub.ca/coldplay-toronto-tickets-7-7-2025/event/155741566/?quantity=2&sections=2018733%2C2018735%2C2018736%2C2018730%2C2018731%2C2018734%2C2018732%2C2018762&ticketClasses=597&rows=&seats=&seatTypes=&listingQty=', 'https://www.stubhub.ca/coldplay-toronto-tickets-7-7-2025/event/155741566/?quantity=2&sections=2018765&ticketClasses=682&rows=&seats=&seatTypes=&listingQty=', 'https://www.stubhub.ca/coldplay-toronto-tickets-7-7-2025/event/155741566/?quantity=2&sections=2018758%2C2018757%2C2018756%

In [11]:
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed

def fetch_data(url):
    # Get the page source using requests
    response = requests.get(url)
    page_source = response.text
    
    # Parse the page source with BeautifulSoup
    soup = BeautifulSoup(page_source, 'html.parser')
    
    # Find the script tag with id 'index-data'
    script_tag = soup.find('script', id='index-data', type='application/json')
    
    # Extract the JSON string from the script tag
    if script_tag is None:
        return None
    json_string = script_tag.string
    
    # Parse the JSON string
    index_data = json.loads(json_string)
    
    # Extract grid items
    grid_items = index_data['grid']['items']
    
    # Create a DataFrame from the grid items
    df = pd.DataFrame(grid_items)
    return df

dfs = []

# Use ThreadPoolExecutor to fetch data in parallel
with ThreadPoolExecutor(max_workers=100) as executor:
    future_to_url = {executor.submit(fetch_data, url): url for url in checkbox_links}
    for future in as_completed(future_to_url):
        df = future.result()
        if df is not None:
            dfs.append(df)

# Combine all dataframes into a single dataframe
combined_df = pd.concat(dfs, ignore_index=True)
print(combined_df)


            id  clientApplicationId    eventId  section  sectionId  \
0   8812118424                  123  155741566    Floor    2018765   
1   8784487510                  312  155741566    Floor    2018765   
2   8697603296                 1231  155741566    Floor    2018765   
3   8393825776                 1231  155741566    Floor    2018765   
4   8406218100                 1231  155741566    Floor    2018765   
..         ...                  ...        ...      ...        ...   
76  8081933620                    3  155741566  107 WCR    2018756   
77  8082025090                  312  155741566  102 WCR    2018755   
78  8671002954                  134  155741566  124 WCR    2018758   
79  8390223927                 1231  155741566  119 WCR    2018757   
80  8515790133                 1231  155741566  102 WCR    2018755   

   sectionMapName  sectionType  row   seat seatFromInternal  ...  isFavorite  \
0           Floor            4       41_42               41  ...       False   

In [6]:
# import requests
# from multiprocessing import Pool

# def fetch_data(url):
#     # Get the page source using requests
#     response = requests.get(url)
#     page_source = response.text
    
#     # Parse the page source with BeautifulSoup
#     soup = BeautifulSoup(page_source, 'html.parser')
    
#     # Find the script tag with id 'index-data'
#     script_tag = soup.find('script', id='index-data', type='application/json')
    
#     # Extract the JSON string from the script tag
#     if script_tag is None:
#         return None
#     json_string = script_tag.string
    
#     # Parse the JSON string
#     index_data = json.loads(json_string)
    
#     # Extract grid items
#     grid_items = index_data['grid']['items']
    
#     # Create a DataFrame from the grid items
#     df = pd.DataFrame(grid_items)
#     return df

# # Use multiprocessing Pool to fetch data in parallel
# with Pool(processes=5) as pool:
#     dfs = pool.map(fetch_data, checkbox_links)

# # Filter out None values
# dfs = [df for df in dfs if df is not None]

# # Combine all dataframes into a single dataframe
# combined_df = pd.concat(dfs, ignore_index=True)
# print(combined_df)


In [7]:
dfs = []

for url in checkbox_links:
    # Get the page source using requests
    response = requests.get(url)
    page_source = response.text
    
    # Parse the page source with BeautifulSoup
    soup = BeautifulSoup(page_source, 'html.parser')
    
    # Find the script tag with id 'index-data'
    script_tag = soup.find('script', id='index-data', type='application/json')
    
    # Extract the JSON string from the script tag
    if script_tag is None:
        continue
    json_string = script_tag.string
    
    # Parse the JSON string
    index_data = json.loads(json_string)
    
    # Extract grid items
    grid_items = index_data['grid']['items']
    
    # Create a DataFrame from the grid items
    df = pd.DataFrame(grid_items)
    
    # Append the DataFrame to the list
    dfs.append(df)

# Combine all dataframes into a single dataframe
combined_df = pd.concat(dfs, ignore_index=True)
print(combined_df)


            id  clientApplicationId    eventId  section  sectionId  \
0   8732207340                    3  155741566      112    2018741   
1   8795939527                  312  155741566      108    2018737   
2   8334307623                  312  155741566      117    2018746   
3   8265672755                    3  155741566      116    2018745   
4   8819999744                  123  155741566      114    2018743   
..         ...                  ...        ...      ...        ...   
76  8081933620                    3  155741566  107 WCR    2018756   
77  8082025090                  312  155741566  102 WCR    2018755   
78  8671002954                  134  155741566  124 WCR    2018758   
79  8390223927                 1231  155741566  119 WCR    2018757   
80  8515790133                 1231  155741566  102 WCR    2018755   

   sectionMapName  sectionType  row   seat seatFrom  ... isMostAffordable  \
0             112            2   15    7_8        7  ...            False   
1    

In [8]:
combined_df

Unnamed: 0,id,clientApplicationId,eventId,section,sectionId,sectionMapName,sectionType,row,seat,seatFrom,...,isMostAffordable,isSponsored,isCheapestListing,isFavorite,aggregateFavorites,listingId,formattedFees,soldXTimeAgoSiteMessage,bestSellingInSectionMessage,lastTicketInSectionMessage
0,8732207340,3,155741566,112,2018741,112,2,15,7_8,7,...,False,False,False,False,0,8732207340,,,,
1,8795939527,312,155741566,108,2018737,108,2,33,7_8,7,...,False,False,False,False,0,8795939527,,,,
2,8334307623,312,155741566,117,2018746,117,2,36,28_29,28,...,False,False,False,False,0,8334307623,,,,
3,8265672755,3,155741566,116,2018745,116,2,30,11_14,11,...,False,False,False,False,0,8265672755,,,,
4,8819999744,123,155741566,114,2018743,114,2,36,38_39,,...,False,False,False,False,0,8819999744,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76,8081933620,3,155741566,107 WCR,2018756,107 WCR,2,WC2,27_28,27,...,False,False,False,False,0,8081933620,,,,
77,8082025090,312,155741566,102 WCR,2018755,102 WCR,2,WC3,22_23,22,...,False,False,False,False,0,8082025090,,,,
78,8671002954,134,155741566,124 WCR,2018758,124 WCR,2,WC2,22_23,,...,False,False,False,False,0,8671002954,,,,
79,8390223927,1231,155741566,119 WCR,2018757,119 WCR,2,,,,...,False,False,False,False,0,8390223927,,,,
