In [92]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from math import nan
from datetime import datetime
import re
import time
import json




# Get today's date
current_date = datetime.today()

In [93]:
artists = ["Taylor Swift", "ColdPlay", "Metalica", 'Oasis', 'Dua Lipa']
venues = ["Budweiser", "History", "Rogers Stadium", "Massey Hall", "DPRTMNT", "Rogers Arena", "Axis", "Noir", "Rebel", "Cabana", "Woodbine Park", "CODA", "Metropolis"]


In [94]:
def generate_stubhub_url(artist, location= "Toronto"):
    """
    Generates a StubHub search URL for a given string.
    This function takes a string input, replaces spaces with plus signs,
    and appends it to a predefined StubHub search URL. If the input is 
    None or NaN, it returns a placeholder string "lol".
    
    Args:
        search_query (str): The search query string.
    
    Returns:
        str: A formatted StubHub search URL or "lol" if the input is None or NaN.
    """
    if pd.isna(artist):
        artist = "lol"
    if pd.isna(location):
        location = "Toronto"
    return "https://www.stubhub.ca/secure/search?q=" + artist + "%20" + location

def get_event_link(event_grid, date):
    """
    Retrieves the href attribute of the event link from the specified XPath.
    
    Returns:
        str: The href attribute of the event link.
    """
    events =  event_grid["0"]["items"]
    if pd.isna(date):
        return events[0]["url"] + "&betterValueTickets=false" + "&estimatedFees=false"
    for event in events:
        if event["formattedDate"] == date.strftime("%d %b %Y"):
            return event["url"] + "&betterValueTickets=false" + "&estimatedFees=false"
    return events[0]["url"] + "&betterValueTickets=false" + "&estimatedFees=false"+"&quantity=0"

In [105]:
import requests
from sympy import sec

for a in artists:
    # Get the page source
    artist_search_url = generate_stubhub_url(a, "Toronto")
    # print(artist_search_url)
    response = requests.get(artist_search_url)
    soup = BeautifulSoup(response.content, 'html.parser')
    # print(soup.prettify())
    
    # Extract the JSON string from the script tag with id 'page-content'
    event_grid = soup.find('script', type='application/json',  string=lambda x: x and 'eventGrids' in x)

    
    json_str = event_grid.text.strip()
    data = json.loads(json_str)
    event_grid = data["eventGrids"]
    events =  event_grid["0"]["items"]
    for event in events:
        if "toronto" not in event["formattedVenueLocation"].lower():
            continue
        url = event["url"] + "&betterValueTickets=false" + "&estimatedFees=false"
        # Get the page source using requests
        response = requests.get(url)
        page_source = response.text
        # Parse the page source with BeautifulSoup
        soup = BeautifulSoup(page_source, 'html.parser')
        # print(soup.prettify())

        # Find the script tag with id 'index-data'
        script_tag = soup.find('script', id='index-data', type='application/json')

        # Extract the JSON string from the script tag
        json_string = script_tag.string

        # Parse the JSON string
        index_data = json.loads(json_string)

        # Print the parsed JSON data
        # print(json.dumps(index_data, indent=4))


        # Extract grid items
        grid_items = index_data['grid']['items']

        # Create a DataFrame from the grid items
        df = pd.DataFrame(grid_items)
        
        df['sectionId'] = df['sectionId'].fillna('')
        df['sectionId'] = df['sectionId'].astype(str)
        df["Artist"] = a
        venue = ""
        for v in venues:
            if v.lower() in event["venueName"].lower():
                venue = v
                break

        if venue != "":
            df["Venue"] = venue
        if venue == "":
            df["Venue"] = event["venueName"]
        # Display the DataFrame
        # # print(df)

        # Save the DataFrame as a CSV file
        # Read the existing CSV file
        # Check if the CSV file exists
        csv_file = 'Concert Seats.csv'
        if not os.path.isfile(csv_file):
            # If the file does not exist, create it with the appropriate columns
            df.to_csv(csv_file, index=False)
        existing_df = pd.read_csv('Concert Seats.csv')

        # Merge the new grid items with the existing DataFrame
        updated_df = pd.concat([existing_df, df], axis = 0, join = 'inner').drop_duplicates(subset='id', keep='last')

        # # Save the updated DataFrame as a CSV file
        updated_df.to_csv('Concert Seats.csv', index=False)
        # df.to_csv('Concert Seats.csv', mode='a', header=True, index=False)


```markdown
## Sections

This section provides an overview of the different sections available for the Taylor Swift event at the Rogers Centre in Toronto. Each section includes details such as section ID, section name, row, seat, and other relevant information.
```

In [108]:
import json
seats = pd.read_csv('Concert Seats.csv')
# print(seats["sectionId"])
seats["sectionId"] = seats["sectionId"].astype(str)
for v in seats["Venue"].unique():
    if pd.isna(v):
        continue
    # Extract section and sectionId from the DataFrame

    section_id_dict = seats[seats['Venue'] == v].set_index('section')['sectionId'].to_dict()
    
    section_id_dict = {k: v.rstrip('.0') for k, v in section_id_dict.items()}
    # Convert the dictionary to a JSON string
    section_id_json = json.dumps(section_id_dict, indent=4)

    # print(section_id_json)
    # Save the JSON string to a file
    # Read the existing JSON file
    filename = "venue sections/"+ v +  " sections.json"
    try:
        with open(filename, 'r') as json_file:
            existing_data = json.load(json_file)
    except FileNotFoundError:
        existing_data = {}

    # Update the existing data with new keys
    existing_data.update(section_id_dict)
    # Remove the .0 from the section id keys
  

    # Save the updated JSON data to the file
    with open(filename, 'w') as json_file:
        json.dump(existing_data, json_file, indent=4)


In [115]:
print(seats.columns)
short_seats =seats[['Artist', 'Venue', 'section', 'row', 'faceValue', 'rawPrice', 'priceWithFees']]

Index(['id', 'clientApplicationId', 'eventId', 'section', 'sectionId',
       'sectionMapName', 'sectionType', 'row', 'hasSeatDetails',
       'hasSeatDetailsUS', 'availableTickets',
       'listingPreviewPriceAndFeeDisclosure', 'showRecentlySold',
       'availableQuantities', 'ticketClass', 'ticketClassName', 'maxQuantity',
       'hasListingNotes', 'listingNotes', 'rowId', 'isUsersListing',
       'isPreUploaded', 'rowContent', 'rawPrice', 'price', 'sellerNetProceeds',
       'priceWithFees', 'ticketTypeId', 'ticketTypeGroupId', 'listingTypeId',
       'listingCurrencyCode', 'buyerCurrencyCode', 'faceValue',
       'faceValueCurrencyCode', 'vfsUrl', 'formattedActiveSince',
       'isSeatedTogether', 'sellerUserId', 'showVfsInListing',
       'hideSeatAndRowInfo', 'sellerHideSeatInfo', 'aipHash', 'isMLBVerified',
       'isStanding', 'createdDateTime', 'isHighestListingScore',
       'isMostAffordable', 'isSponsored', 'isCheapestListing', 'Artist',
       'Venue'],
      dtype='objec

In [122]:
with_face = short_seats[short_seats["faceValue"] >0]

In [123]:
with_face

Unnamed: 0,Artist,Venue,section,row,faceValue,rawPrice,priceWithFees
0,Taylor Swift,Rogers Centre,Right Field Bleachers,2.0,352.58,2250.00,"C$2,906"
1,Taylor Swift,Rogers Centre,102B,4.0,75.00,2250.00,"C$2,906"
12,Taylor Swift,Rogers Centre,519,6.0,5000.00,2700.00,"C$3,488"
15,Taylor Swift,Rogers Centre,539,19.0,91.00,2700.00,"C$3,488"
17,Taylor Swift,Rogers Centre,540,8.0,167.00,2716.20,"C$3,509"
...,...,...,...,...,...,...,...
327,Oasis,Rogers Stadium,122,1,516.00,1285.26,
328,Oasis,Rogers Stadium,118,29,114.00,243.72,C$314
329,Oasis,Rogers Stadium,115,34,114.00,244.86,C$316
335,Oasis,Rogers Stadium,109,34,114.00,249.80,C$322
