In [13]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time
import re

# Setup Selenium
options = webdriver.ChromeOptions()
# options.add_argument('--headless')  # Uncomment to hide the browser
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
wait = WebDriverWait(driver, 10)

# Navigate to shows page
driver.get("https://www.listeningroomcafe.com/nashville-shows")
time.sleep(3)

# List to store event data
events = []

# Loop through pages 1, 2, 3
for page_number in range(1, 4):
    print(f"\n🔄 Scraping Page {page_number}...")
    
    if page_number > 1:
        try:
            page_link = wait.until(EC.element_to_be_clickable(
                (By.CSS_SELECTOR, f'a.pager__number[data-page-number="{page_number}"]')))
            page_link.click()
            time.sleep(2)
        except Exception as e:
            print(f"❌ Couldn't click to page {page_number}: {e}")
            continue

    # Get all clickable show tiles
    show_tiles = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "grid-product__image")))

    for tile_index in range(len(show_tiles)):
        # Re-fetch the clickable tiles each time to avoid stale elements
        show_tiles = driver.find_elements(By.CLASS_NAME, "grid-product__image")
        if tile_index >= len(show_tiles):
            continue

        try:
            show_tiles[tile_index].click()
            time.sleep(1.5)

            wait.until(EC.presence_of_element_located((By.CLASS_NAME, "product-details__product-sku")))
            soup = BeautifulSoup(driver.page_source, 'html.parser')

            artist = soup.find('h1', class_='product-details__product-title')
            artist = artist.text.strip() if artist else 'N/A'

            room = soup.find('div', class_='label__text')
            room = room.text.strip() if room else 'N/A'
            capacity = 255 if "MAIN STAGE" in room.upper() else 75 if "FRONT CAFE" in room.upper() else None

            datetime = soup.find('div', class_='product-details__product-sku')
            datetime = datetime.text.strip() if datetime else 'N/A'

            price = soup.find('span', class_='details-product-price__value')
            price = price.text.strip() if price else 'N/A'

            tickets_tag = soup.find('div', class_='product-details-module__title')
            if tickets_tag:
                match = re.search(r'(\d+)\s+available', tickets_tag.get_text())
                tickets_left = int(match.group(1)) if match else None
            else:
                tickets_left = None

            percent_sold = round(100 * (1 - tickets_left / capacity), 1) if capacity and tickets_left is not None else None

            events.append({
                'Artist/Event': artist,
                'Date/Time': datetime,
                'Room': room,
                'Capacity': capacity,
                'Tickets Left': tickets_left,
                '% Sold': percent_sold,
                'Price': price
            })

            driver.back()
            time.sleep(1.5)
        except Exception as e:
            print(f"⚠️ Error scraping show {tile_index+1} on page {page_number}: {e}")
            driver.back()
            time.sleep(1.5)
            continue

driver.quit()

# Build DataFrame
df = pd.DataFrame(events)

# Save to CSV
df.to_csv("listening_room_shows.csv", index=False)
print("\n✅ Data saved to 'listening_room_shows.csv'")

# Enhance with date/time & price analysis
df['Date/Time'] = pd.to_datetime(df['Date/Time'], errors='coerce')
df['Day of Week'] = df['Date/Time'].dt.day_name()
df['Hour'] = df['Date/Time'].dt.hour
df['Price ($)'] = df['Price'].replace(r'[\$,]', '', regex=True).astype(float)

# Compare price to average for same day+hour
avg_price = df.groupby(['Day of Week', 'Hour'])['Price ($)'].transform('mean')
df['Above Avg for Day+Hour'] = df['Price ($)'] > avg_price

# Show final results
print("\n🎤 Final Scraped Shows with Pricing Analysis:\n")
print(df[['Artist/Event', 'Date/Time', 'Room', 'Price ($)', 'Tickets Left', '% Sold', 'Above Avg for Day+Hour']].to_string(index=False))

# Save FINAL DataFrame to CSV
df.to_csv("listening_room_shows.csv", index=False)
print("✅ Final CSV saved with pricing analysis included.")


🔄 Scraping Page 1...

🔄 Scraping Page 2...

🔄 Scraping Page 3...

✅ Data saved to 'listening_room_shows.csv'

🎤 Final Scraped Shows with Pricing Analysis:

                                                                                                                                  Artist/Event           Date/Time                      Room  Price ($)  Tickets Left  % Sold  Above Avg for Day+Hour
                                                                                                  BRANDY NEELLY, KAYLIN ROBERSON, & NICK DELEO 2025-04-08 20:30:00 WRITERS ROUND: MAIN STAGE       15.0         150.0    41.2                   False
                                                                                                                LUNCH TIME TUNES FEAT. BETHANY 2025-04-09 12:00:00                FRONT CAFE        0.0          75.0     0.0                   False
                                                                                                         

  df['Date/Time'] = pd.to_datetime(df['Date/Time'], errors='coerce')


In [7]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time
import re

# --- Setup Selenium ---
options = webdriver.ChromeOptions()
# options.add_argument('--headless')  # Uncomment this if you want the browser hidden
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
wait = WebDriverWait(driver, 10)

# --- Go to Listening Room Shows Page ---
driver.get("https://www.listeningroomcafe.com/nashville-shows")
time.sleep(4)

# --- Function to extract tiles from current page ---
def extract_show_tiles(driver):
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    return soup.find_all('a', class_='grid-product__image')

# --- Click through all pages and collect show tiles ---
all_tiles = []
while True:
    all_tiles.extend(extract_show_tiles(driver))
    try:
        next_button = driver.find_element(By.CLASS_NAME, 'pager__button-text')
        if "Next" in next_button.text:
            next_button.click()
            time.sleep(3)
        else:
            break
    except:
        break

# --- Loop through each show tile across pages ---
events = []
for i in range(len(all_tiles)):
    # Reload and go back to correct tile
    driver.get("https://www.listeningroomcafe.com/nashville-shows")
    time.sleep(4)

    # Navigate to the correct page
    page_index = i // 12  # assume ~12 shows per page
    tile_index = i % 12
    for _ in range(page_index):
        try:
            next_btn = driver.find_element(By.CLASS_NAME, 'pager__button-text')
            next_btn.click()
            time.sleep(2)
        except:
            break

    tiles = driver.find_elements(By.CLASS_NAME, "grid-product__image")
    if tile_index >= len(tiles):
        continue
    tiles[tile_index].click()
    time.sleep(2)

    wait.until(EC.presence_of_element_located((By.CLASS_NAME, "product-details__product-sku")))
    page_soup = BeautifulSoup(driver.page_source, 'html.parser')

    # Extract fields
    artist = page_soup.find('h1', class_='product-details__product-title')
    artist = artist.text.strip() if artist else 'N/A'

    room = page_soup.find('div', class_='label__text')
    room = room.text.strip() if room else 'N/A'
    capacity = 255 if "MAIN STAGE" in room.upper() else 75 if "FRONT CAFE" in room.upper() else None

    datetime = page_soup.find('div', class_='product-details__product-sku')
    datetime = datetime.text.strip() if datetime else 'N/A'

    price = page_soup.find('span', class_='details-product-price__value')
    price = price.text.strip() if price else 'N/A'

    tickets_tag = page_soup.find('div', class_='product-details-module__title')
    if tickets_tag:
        match = re.search(r'(\d+)\s+available', tickets_tag.get_text())
        tickets_left = int(match.group(1)) if match else None
    else:
        tickets_left = None

    percent_sold = round(100 * (1 - tickets_left / capacity), 1) if capacity and tickets_left is not None else None

    events.append({
        'Artist/Event': artist,
        'Date/Time': datetime,
        'Room': room,
        'Capacity': capacity,
        'Tickets Left': tickets_left,
        '% Sold': percent_sold,
        'Price': price
    })

driver.quit()

# --- Convert to DataFrame ---
df = pd.DataFrame(events)

# --- Save to CSV ---
df.to_csv("listening_room_shows.csv", index=False)
print("✅ Saved to listening_room_shows.csv")

# --- Analyze pricing vs time slot ---
df['Date/Time'] = pd.to_datetime(df['Date/Time'], errors='coerce')
df['Day of Week'] = df['Date/Time'].dt.day_name()
df['Hour'] = df['Date/Time'].dt.hour
df['Price ($)'] = df['Price'].replace('[\$,]', '', regex=True).astype(float)

# Calculate average price per Day+Hour
avg_price = df.groupby(['Day of Week', 'Hour'])['Price ($)'].transform('mean')
df['Above Avg for Day+Hour'] = df['Price ($)'] > avg_price

# --- Output final result ---
print("\n🎤 Final Data Preview:\n")
print(df.to_string(index=False))


  df['Price ($)'] = df['Price'].replace('[\$,]', '', regex=True).astype(float)


✅ Saved to listening_room_shows.csv

🎤 Final Data Preview:

                                                           Artist/Event           Date/Time                      Room  Capacity  Tickets Left  % Sold  Price Day of Week  Hour  Price ($)  Above Avg for Day+Hour
                           BRANDY NEELLY, KAYLIN ROBERSON, & NICK DELEO 2025-04-08 20:30:00 WRITERS ROUND: MAIN STAGE       255           150    41.2 $15.00     Tuesday    20       15.0                   False
                                         LUNCH TIME TUNES FEAT. BETHANY 2025-04-09 12:00:00                FRONT CAFE        75            75     0.0  $0.00   Wednesday    12        0.0                   False
                                       HAPPY HOUR FEAT. MARY LOU LACKEY 2025-04-09 16:00:00                FRONT CAFE        75            70     6.7  $0.00   Wednesday    16        0.0                   False
             MATT MULHARE, COLTON VENNER, CHASE MCDANIEL, & LYNN HUTTON 2025-04-09 18:00:00 WRITERS 

  df['Date/Time'] = pd.to_datetime(df['Date/Time'], errors='coerce')


In [9]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time
import re

# --- Setup Selenium ---
options = webdriver.ChromeOptions()
# options.add_argument('--headless')  # Uncomment if you want browser hidden
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
wait = WebDriverWait(driver, 10)

# --- Go to Listening Room Shows Page ---
driver.get("https://www.listeningroomcafe.com/nashville-shows")
time.sleep(4)

# --- Function to extract tiles from current page ---
def extract_show_tiles(driver):
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    return soup.find_all('a', class_='grid-product__image')

# --- Loop through max 3 pages ---
all_tiles = []
for page in range(3):
    print(f"Scraping page {page + 1}")
    all_tiles.extend(extract_show_tiles(driver))
    
    try:
        next_button = WebDriverWait(driver, 5).until(
            EC.element_to_be_clickable((By.CLASS_NAME, 'pager__button-text'))
        )
        if "Next" in next_button.text:
            next_button.click()
            time.sleep(3)
        else:
            break
    except:
        break

# --- Loop through each show tile ---
events = []
for i in range(len(all_tiles)):
    # Return to the correct tile
    driver.get("https://www.listeningroomcafe.com/nashville-shows")
    time.sleep(3)

    # Navigate back to correct page
    page_index = i // 12
    tile_index = i % 12
    for _ in range(page_index):
        try:
            next_btn = WebDriverWait(driver, 5).until(
                EC.element_to_be_clickable((By.CLASS_NAME, 'pager__button-text'))
            )
            next_btn.click()
            time.sleep(2)
        except:
            break

    tiles = driver.find_elements(By.CLASS_NAME, "grid-product__image")
    if tile_index >= len(tiles):
        continue

    tiles[tile_index].click()
    time.sleep(2)

    wait.until(EC.presence_of_element_located((By.CLASS_NAME, "product-details__product-sku")))
    page_soup = BeautifulSoup(driver.page_source, 'html.parser')

    # Artist name
    artist = page_soup.find('h1', class_='product-details__product-title')
    artist = artist.text.strip() if artist else 'N/A'

    # Room
    room = page_soup.find('div', class_='label__text')
    room = room.text.strip() if room else 'N/A'
    capacity = 255 if "MAIN STAGE" in room.upper() else 75 if "FRONT CAFE" in room.upper() else None

    # Date/time
    datetime = page_soup.find('div', class_='product-details__product-sku')
    datetime = datetime.text.strip() if datetime else 'N/A'

    # Ticket price
    price = page_soup.find('span', class_='details-product-price__value')
    price = price.text.strip() if price else 'N/A'

    # Tickets available
    tickets_tag = page_soup.find('div', class_='product-details-module__title')
    if tickets_tag:
        match = re.search(r'(\d+)\s+available', tickets_tag.get_text())
        tickets_left = int(match.group(1)) if match else None
    else:
        tickets_left = None

    # % Sold
    percent_sold = round(100 * (1 - tickets_left / capacity), 1) if capacity and tickets_left is not None else None

    events.append({
        'Artist/Event': artist,
        'Date/Time': datetime,
        'Room': room,
        'Capacity': capacity,
        'Tickets Left': tickets_left,
        '% Sold': percent_sold,
        'Price': price
    })

driver.quit()

# --- Build DataFrame ---
df = pd.DataFrame(events)

# Save to CSV
df.to_csv("listening_room_shows.csv", index=False)
print("✅ Data saved to listening_room_shows.csv")

# --- Price Analysis ---
df['Date/Time'] = pd.to_datetime(df['Date/Time'], errors='coerce')
df['Day of Week'] = df['Date/Time'].dt.day_name()
df['Hour'] = df['Date/Time'].dt.hour
df['Price ($)'] = df['Price'].replace(r'[\$,]', '', regex=True).astype(float)

# Compare price to average for same day+hour
avg_price = df.groupby(['Day of Week', 'Hour'])['Price ($)'].transform('mean')
df['Above Avg for Day+Hour'] = df['Price ($)'] > avg_price

# Show results
print("\n🎤 Final Scraped Shows:\n")
print(df[['Artist/Event', 'Date/Time', 'Room', 'Price ($)', 'Tickets Left', '% Sold', 'Above Avg for Day+Hour']].to_string(index=False))


Scraping page 1
✅ Data saved to listening_room_shows.csv

🎤 Final Scraped Shows:

                                                           Artist/Event           Date/Time                      Room  Price ($)  Tickets Left  % Sold  Above Avg for Day+Hour
                           BRANDY NEELLY, KAYLIN ROBERSON, & NICK DELEO 2025-04-08 20:30:00 WRITERS ROUND: MAIN STAGE       15.0           150    41.2                   False
                                         LUNCH TIME TUNES FEAT. BETHANY 2025-04-09 12:00:00                FRONT CAFE        0.0            75     0.0                   False
                                       HAPPY HOUR FEAT. MARY LOU LACKEY 2025-04-09 16:00:00                FRONT CAFE        0.0            70     6.7                   False
             MATT MULHARE, COLTON VENNER, CHASE MCDANIEL, & LYNN HUTTON 2025-04-09 18:00:00 WRITERS ROUND: MAIN STAGE       15.0           206    19.2                   False
D VINCENT WILLIAMS, SCOTT SEAN WHITE, & TRE

  df['Date/Time'] = pd.to_datetime(df['Date/Time'], errors='coerce')
