In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from math import nan
from datetime import datetime
import re
import time
import json
import os




# Get today's date
current_date = datetime.today()

In [2]:


# Initialize the Chrome WebDriver with the specified options
driver = webdriver.Chrome()


In [3]:
from urllib.parse import urlsplit, urlunsplit

def remove_query_from_url(url):
    url_parts = urlsplit(url)
    url_without_query = urlunsplit((url_parts.scheme, url_parts.netloc, url_parts.path, '', ''))
    return url_without_query



In [4]:

venues = ["Budweiser", "History", "Rogers Stadium", "Massey Hall", "DPRTMNT", "Rogers Arena", "Axis", "Noir", "Rebel", "Cabana", "Woodbine Park", "CODA", "Metropolis"]
path = "../../Documents/Ticket Sales.xlsx"
events = pd.read_excel(path, sheet_name ="Events")

In [5]:
def generate_stubhub_url(artist, location= "Toronto"):
    """
    Generates a StubHub search URL for a given string.
    This function takes a string input, replaces spaces with plus signs,
    and appends it to a predefined StubHub search URL. If the input is 
    None or NaN, it returns a placeholder string "lol".
    
    Args:
        search_query (str): The search query string.
    
    Returns:
        str: A formatted StubHub search URL or "lol" if the input is None or NaN.
    """
    if pd.isna(artist):
        artist = "lol"
    if pd.isna(location):
        location = "Toronto"
    return "https://www.stubhub.ca/secure/search?q=" + artist + "%20" + location

def get_event_link(event_grid, date):
    """
    Retrieves the href attribute of the event link from the specified XPath.
    
    Returns:
        str: The href attribute of the event link.
    """
    events =  event_grid["0"]["items"]
    if pd.isna(date):
        return events[0]["url"] + "&betterValueTickets=false" + "&estimatedFees=false"
    for event in events:
        if event["formattedDate"] == date.strftime("%d %b"):
            return event["url"] + "&betterValueTickets=false" + "&estimatedFees=false"
    return events[0]["url"] + "&betterValueTickets=false" + "&estimatedFees=false"+"&quantity=0"

In [6]:
def close_prompts():
    """
    Closes any modal that appears and applies ticket filters.
    """
    # Close any modal that appears
    try:
        driver.find_element(By.XPATH, '//*[@id="modal-root"]/div/div/div/div[2]/div[3]/button').click()
    except:
        pass
def apply_ticket_filters():
    try:
        driver.find_element(By.CSS_SELECTOR, "div.sc-1urpwzu-1").click()
        reccomended_filter = driver.find_element(By.XPATH, "//*[@id='stubhub-event-detail-popular-filters']/div/div/div/div[2]/div/div/div/div[2]/div/input")
        if reccomended_filter.get_attribute("value") == "true":
            time.sleep(3)
            reccomended_filter.click()
    except:
        pass

def click_zones():
    zones_div = driver.find_elements(By.CSS_SELECTOR, 'div.sc-1s9c4ms-2.jFxikH')
    for zone in zones_div:
        if "zones" in zone.text.lower():
            zone.click()
            break
    
def click_checkboxes():
    """
    Clicks each checkbox, unchecks all others, and returns the link each time.
    """
    # Find and click the "Zones" button to expand the filter options

    checkboxes = driver.find_elements(By.CSS_SELECTOR, 'input[type="checkbox"].sc-mhai9k-2.fOnHmE')
    links = []

    for checkbox in checkboxes:
        # Uncheck all checkboxes first
        for cb in checkboxes:
            if cb.is_selected():
                cb.click()
        
        # Click the current checkbox
        checkbox.click()
        
        # Wait for the page to update
        time.sleep(2)
        
        # Get the current URL
        links.append(driver.current_url)
    
    return links

# Call the function and store the links



In [9]:
import pandas as pd
from bs4 import BeautifulSoup
from sympy import symbols
import requests
import concurrent.futures
import json

events = pd.read_excel(path, sheet_name="Events")

processed = []
new_df = pd.DataFrame()

def process_event(row):
    if row["Artist"] in processed:
        return None

    if row["Artist"] not in ["black pink", "Oasis"]:
        return None

    processed.append(row["Artist"])

    artist_search_url = generate_stubhub_url(row["Artist"])
    response = requests.get(artist_search_url)
    soup = BeautifulSoup(response.content, 'html.parser')

    event_grid = soup.find('script', type='application/json', string=lambda x: x and 'eventGrids' in x)
    if event_grid is None:
        return None

    json_str = event_grid.text.strip()
    data = json.loads(json_str)
    event_grid = data["eventGrids"]
    events = event_grid["0"]["items"][0:10]

    results = []
    for event in events:
        if event["countryName"] not in ["Canada", "USA"]:
            continue

        try:
            url = remove_query_from_url(event["url"]) + "?listingQty=&quantity=0" + "&betterValueTickets=false" + "&estimatedFees=false"
            driver.get(url)
            close_prompts()
            time.sleep(1)
            apply_ticket_filters()
            click_zones()
            checkbox_links = click_checkboxes()
        except:
            print( row["Artist"] + " error")
            checkbox_links = [url]

        for url in checkbox_links:
            response = requests.get(url)
            page_source = response.text
            soup = BeautifulSoup(page_source, 'html.parser')

            script_tag = soup.find('script', id='index-data', type='application/json')
            if script_tag is None:
                continue

            json_string = script_tag.string
            index_data = json.loads(json_string)
            grid_items = index_data['grid']['items']
            df = pd.DataFrame(grid_items)

            df["Venue"] = event["venueName"]
            df["Artist"] = row["Artist"]
            df["Event Name"] = event["name"]
            df["City"] = event["venueCity"]
            df["Event Date"] = event['formattedDate']
            df["Event ID"] = event['eventId']
            df["countryName"] = event['countryName']
            df["Updated"] = pd.Timestamp.today().strftime('%Y-%m-%d')

            results.append(df)

    return results

with concurrent.futures.ThreadPoolExecutor() as executor:
    futures = [executor.submit(process_event, row) for index, row in events.iterrows()]
    for future in concurrent.futures.as_completed(futures):
        result = future.result()
        if result:
            for df in result:
                new_df = pd.concat([new_df, df], ignore_index=True, axis=0)


black pink error
Oasis error
Oasis error
black pink error


In [10]:
new_df

Unnamed: 0,id,clientApplicationId,eventId,section,sectionId,sectionMapName,sectionType,row,seat,seatFrom,...,bestSellingInSectionMessage,Venue,Artist,Event Name,City,Event Date,Event ID,countryName,Updated,lastTicketInSectionMessage
0,8132826090,312,155283966,109,2018738,109,2,31,19_20,19,...,,"Rogers Stadium (Concert Venue, Canada)",Oasis,Oasis,Toronto,Aug 24,155283966,Canada,2025-02-28,
1,8100497248,653,155283966,117,2018746,117,2,23,24_24,24,...,,"Rogers Stadium (Concert Venue, Canada)",Oasis,Oasis,Toronto,Aug 24,155283966,Canada,2025-02-28,
2,8793990341,123,155283966,112,2018741,112,2,32,12_15,12,...,,"Rogers Stadium (Concert Venue, Canada)",Oasis,Oasis,Toronto,Aug 24,155283966,Canada,2025-02-28,
3,8512884444,312,155283966,110,2018739,110,2,35,,,...,,"Rogers Stadium (Concert Venue, Canada)",Oasis,Oasis,Toronto,Aug 24,155283966,Canada,2025-02-28,
4,8360004727,177,155283966,115,2018744,115,2,28,19_20,19,...,,"Rogers Stadium (Concert Venue, Canada)",Oasis,Oasis,Toronto,Aug 24,155283966,Canada,2025-02-28,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83,8852338459,312,157423305,118,2018747,118,2,36,9001_9004,,...,,"Rogers Stadium (Concert Venue, Canada)",black pink,BLACKPINK,Toronto,Jul 23,157423305,Canada,2025-02-28,{'message': 'Last tickets remaining in Section...
84,8852404684,312,157423305,107,2018736,107,2,,,,...,,"Rogers Stadium (Concert Venue, Canada)",black pink,BLACKPINK,Toronto,Jul 23,157423305,Canada,2025-02-28,
85,8856235206,372,157423305,119,2018748,119,2,50,894_897,,...,,"Rogers Stadium (Concert Venue, Canada)",black pink,BLACKPINK,Toronto,Jul 23,157423305,Canada,2025-02-28,
86,8856236564,372,157423305,107,2018736,107,2,50,894_897,,...,,"Rogers Stadium (Concert Venue, Canada)",black pink,BLACKPINK,Toronto,Jul 23,157423305,Canada,2025-02-28,


In [None]:
# xlsx_file = 'Concert Seats.xlsx'


# df = pd.read_excel(xlsx_file)

# # Merge the new grid items with the existing DataFrame
# updated_df = pd.concat([df, new_df], axis=0).drop_duplicates(subset='id', keep='last')

# # Save the updated DataFrame to the existing Excel file
# with pd.ExcelWriter(xlsx_file, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
#     updated_df.to_excel(writer, sheet_name='Sheet1', index=False)

    

In [None]:
seats = pd.read_excel('Concert Seats.xlsx', sheet_name='Sheet1')
short_seats =seats[['Artist', 'Venue','City', 'Event Date', 'section', 'row', 'faceValue', 'rawPrice']]




In [None]:
with_face = short_seats[short_seats["faceValue"] >10]

In [None]:
with_face['profit'] = with_face['rawPrice'] - with_face['faceValue']
with_face['margin'] = with_face['profit'] / with_face['faceValue']
with_face = with_face.sort_values(by = "margin", ascending = False,)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  with_face['profit'] = with_face['rawPrice'] - with_face['faceValue']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  with_face['margin'] = with_face['profit'] / with_face['faceValue']


In [None]:
with_face

Unnamed: 0,Artist,Venue,City,Event Date,section,row,faceValue,rawPrice,profit,margin
7553,boywithuke,History,,,Floor General Admission,,45.00,6602.03,6557.03,145.711778
4716,Baynk,The Danforth Music Hall Theatre,,,Adult Admission - GA Floor,,40.93,4220.48,4179.55,102.114586
4031,inhaler,History,,,Box 203,,77.57,6540.57,6463.00,83.318293
3166,BBNO$,History,,,Box 201,A,94.16,6514.17,6420.01,68.181924
7907,Alex warren,Rebel,,,FLOOR - Standing Room Only,,97.34,6579.41,6482.07,66.592048
...,...,...,...,...,...,...,...,...,...,...
19467,Kelsea ballerini,Scotiabank Arena,Toronto,Apr 13,309,16,9999.00,136.50,-9862.50,-0.986349
5291,my chemical romance,Rogers Centre,,,117,6,33750.00,383.82,-33366.18,-0.988628
20214,Avril Lavigne,Bethel Woods Center for the Arts - Complex,Bethel,Jun 27,SEC100,U,9999.00,103.07,-9895.93,-0.989692
20209,Avril Lavigne,Bethel Woods Center for the Arts - Complex,Bethel,Jun 27,SEC12,B,9999.00,96.10,-9902.90,-0.990389


In [None]:
with pd.ExcelWriter('Concert Seats.xlsx', engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
    short_seats.to_excel(writer, sheet_name='Short Seats', index=False)