In [1]:
# FOR DATA PROCESSING:
import pandas as pd
import numpy as np

# FOR MEASURING COMPUTATION TIME, CREATING FIXED DELAYS:
import time

# FOR APPLYING BEAUTIFULSOUP
from bs4 import BeautifulSoup

# FOR APPLYING SELENIUM:
import selenium # Python Selenium
from selenium import webdriver # for specifying webdriver

from webdriver_manager.firefox import GeckoDriverManager # geckodriver for automatized access to Firefox

from selenium.webdriver.chrome.service import Service # needed since Selenium 4.10.0 see: https://github.com/SeleniumHQ/selenium/commit/9f5801c82fb3be3d5850707c46c3f8176e3ccd8e

from selenium.webdriver.support.ui import WebDriverWait # this three enable waiting until sth is displayed on website
from selenium.webdriver.support import expected_conditions as EC # for checking visibility of an element
from selenium.webdriver.common.by import By # for checking element visibility by XPath

# FOR SAVING DATA:
import pickle # pickle format of saved output

# FOR GET TODAYS DATE
from datetime import datetime, timedelta


def save_object(obj, filename): #  function defined for saving Python objects
    with open(filename, 'wb') as output: # overwrites any existing file
        pickle.dump(obj, output, pickle.HIGHEST_PROTOCOL)
        
firefoxpath = GeckoDriverManager().install(); print(firefoxpath)

/Users/agastyaharta/.wdm/drivers/geckodriver/mac64/v0.36.0/geckodriver


In [2]:
# Open Website
website = "https://www.ticketmaster.com/"

service_firefox = Service(executable_path = firefoxpath) 
options_firefox = webdriver.FirefoxOptions()
driver_firefox = webdriver.Firefox(service = service_firefox, options = options_firefox) # opens Firefox

driver_firefox.maximize_window() # maximizes browser's window
driver_firefox.get(website) # opens a website

In [3]:
# Accept Cookies
cookies_button_xpath = '''//*[@id='onetrust-accept-btn-handler']'''

# wait at most 30 seconds until cookies button is visible
WebDriverWait(driver_firefox, 30).until(EC.visibility_of_element_located((By.XPATH, cookies_button_xpath))) 

# + wait random time drawn from specific (strongly right-side-skewed) distribution to better imitate human behavior
time.sleep(np.random.chisquare(1)+3)

content = driver_firefox.find_element("xpath", cookies_button_xpath) # finds the button
content.click() # clicks the button

In [4]:
website_concert = "https://www.ticketmaster.com/discover/concerts"
driver_firefox.get(website_concert) # opens a website

start = time.time()
# + wait random time drawn from specific (strongly right-side-skewed) distribution to better imitate human behavior
time.sleep(np.random.chisquare(1)+3)

target_button_xpath = "//div[@class='sc-517d4ffa-0 iAGgnt']"
target_button = WebDriverWait(driver_firefox, 4).until(
    EC.element_to_be_clickable((By.XPATH, target_button_xpath))
)
target_button.click()


search_input_xpath = "//input[@class='sc-eb59abf5-7 bhvzZz']"
search_box = WebDriverWait(driver_firefox, 10).until(
    EC.presence_of_element_located((By.XPATH, search_input_xpath))
)
search_box.clear()
search_box.send_keys("Salt Lake City, UT")

dropdown_option_xpath = "//li[@id='-172424123']"
dropdown_option = WebDriverWait(driver_firefox, 2).until(
    EC.element_to_be_clickable((By.XPATH, dropdown_option_xpath))
)
dropdown_option.click()

In [5]:
start = time.time()
time.sleep(np.random.chisquare(1)+3)

target_date_xpath = "//span[@class='sc-61fe2bcc-2 iVxoxI']"
target_date_xpath = WebDriverWait(driver_firefox, 4).until(
    EC.element_to_be_clickable((By.XPATH, target_date_xpath))
)
target_date_xpath.click()

today_date = datetime.today()

start_date = today_date.strftime("%m%d%Y")
search_start_date_xpath = "//input[@id='startDate-input']"
search_start_date_box = WebDriverWait(driver_firefox, 10).until(
    EC.presence_of_element_located((By.XPATH, search_start_date_xpath))
)
search_start_date_box.clear()
search_start_date_box.send_keys(start_date)

start = time.time()
# + wait random time drawn from specific (strongly right-side-skewed) distribution to better imitate human behavior
time.sleep(np.random.chisquare(1)+3)

end_date = today_date + timedelta(days=7)
end_date_formatted = end_date.strftime("%m%d%Y")
end_date_formatted = end_date.strftime("%m%d%Y")
search_end_date_xpath = "//input[@id='endDate-input']"
search_end_date_box = WebDriverWait(driver_firefox, 4).until(
    EC.presence_of_element_located((By.XPATH, search_end_date_xpath))
)
search_end_date_box.send_keys(end_date_formatted)

start = time.time()
# + wait random time drawn from specific (strongly right-side-skewed) distribution to better imitate human behavior
time.sleep(np.random.chisquare(1)+3)

apply_button_xpath = "//*[@type='submit']"
apply_button = WebDriverWait(driver_firefox, 10).until(
    EC.visibility_of_element_located((By.XPATH, apply_button_xpath))
)

driver_firefox.execute_script("arguments[0].scrollIntoView(true);", apply_button)
time.sleep(0.5)
apply_button.click()

In [7]:
WebDriverWait(driver_firefox, 20).until(
    EC.presence_of_element_located((By.XPATH, "//span[contains(text(), 'Salt Lake City, UT')]"))
)

html = driver_firefox.page_source
soup = BeautifulSoup(html, "html.parser")

event_blocks = soup.select("div.sc-5b932363-0.jODkC > ul > li")

artist_names = []
locations = []
venues = []
statuses = []
eventdates = []

month_map = {
    "Jan": "01", "Feb": "02", "Mar": "03", "Apr": "04", "May": "05", "Jun": "06",
    "Jul": "07", "Aug": "08", "Sep": "09", "Oct": "10", "Nov": "11", "Dec": "12"
}


for event in event_blocks:
    # ARTIST
    artist_span = event.select_one("span.sc-dd1f217b-7 span.sc-dd1f217b-6")
    artist = artist_span.get_text(strip=True) if artist_span else ""

    # LOCATION
    location_span = event.select_one("span.sc-dd1f217b-8")
    full_location_text = location_span.get_text(strip=True) if location_span else ""
    location = full_location_text.split(",")[0] if "," in full_location_text else full_location_text  
    
    # VENUE
    venue_spans = event.select("span.sc-dd1f217b-8 span.sc-dd1f217b-6")
    venue = venue_spans[1].get_text(strip=True) if len(venue_spans) > 1 else ""

    # BADGE (Status)
    badge_span = event.select_one("span[class^='Badge__Label-sc-yqcrvp-1']")
    status = badge_span.get_text(strip=True) if badge_span else "Active"
    
    # MONTH
    month_span = event.select_one("div.sc-d4c18b64-1 span")
    month_text = month_span.get_text(strip=True) if month_span else ""
    month = month_map.get(month_text, "00")
    
    # DATE
    date_span = event.select_one("div.sc-d4c18b64-2 span")
    date = date_span.get_text(strip=True) if date_span else "00"
    
    formatted_date = f"{date.zfill(2)}.{month}.2025"

    artist_names.append(artist)
    locations.append(location)
    venues.append(venue)
    eventdates.append(formatted_date)
    statuses.append(status)

In [8]:
# Create DataFrame
df = pd.DataFrame({
    "Artist": artist_names,
    "Location": locations,
    "Venue": venues,
    "Event Dates" : eventdates,
    "Availability": statuses
})

In [9]:
df

Unnamed: 0,Artist,Location,Venue,Event Dates,Availability
0,Anberlin With Support From Copeland And Spital...,Salt Lake City,The Depot,21.03.2025,Near You
1,Post Sex Nachos,Salt Lake City,Kilby Court,21.03.2025,Near You
2,Dvor??k???s Symphony No. 8,Salt Lake City,Abravanel Hall,21.03.2025,Near You
3,Maya Hawke: The Chaos Angel Tour,Salt Lake City,The Union,21.03.2025,Cancelled
4,Billy Raffoul,Salt Lake City,Kilby Court,24.03.2025,Near You
5,Tim Atlas,Salt Lake City,Kilby Court,25.03.2025,Near You
6,CHIODOS: 20 Years of All's Well That Ends Well,Salt Lake City,The Depot,26.03.2025,Near You
7,Daily Bread w/ Maddy O'Neal,Salt Lake City,Metro Music Hall - UT,26.03.2025,Near You
8,The Driver Era: Obsession Tour,Salt Lake City,The Union,26.03.2025,Near You
9,Restless Road - GOIN' OUT LIKE THAT TOUR,Salt Lake City,The Depot,27.03.2025,Near You


In [None]:
driver_firefox.close() # this closes the webdriver