# Add New Venue to Database

This notebook helps you quickly add new venues by:
1. Testing the scraping selectors
2. Auto-generating the config
3. Inserting to the database

In [16]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import time
import psycopg2
import json
import os
from dotenv import load_dotenv
from populate_events_functions import start_selenium, parse_date

load_dotenv()

True

## Step 1: Configure Your New Venue

Fill in the basic info and selectors for your venue.

In [17]:
# ========================================
# CONFIGURE YOUR VENUE HERE
# ========================================

# Basic venue info
VENUE_NAME = "The Armory"
VENUE_ADDRESS = ""
VENUE_CITY = "Fort Collins"
BASE_URL = "https://bohemianlivemusic.org/our-venues/the-armory/"

# Scraping method: 'html' or 'json-ld'
SCRAPING_METHOD = "html"

# CSS Selectors (inspect the website to find these)
EVENT_CONTAINER = 'div.elementor-element.elementor-element-18ac28d'
ARTIST_SELECTOR = 'h4.the7-heading-title'
DATE_SELECTOR = 'span.elementor-icon-list-text'
DATE_FORMAT = '%A, %B %d @ %I:%M %p'
GENRE_SELECTOR = None
# Pagination (if the venue has multiple pages)
HAS_PAGINATION = False
PAGINATION_URL_PATTERN = None  # e.g., "https://venue.com/events?page={page}"
MAX_PAGES = 5

## Step 2: Test the Scraping

This will load the page and show you what events it finds.

In [18]:
def test_scraping(base_url, event_container, artist_selector, date_selector, genre_selector, date_format):
    """
    Test scraping configuration and return preview of found events
    """
    print(f"Loading {base_url}...\n")
    
    driver = start_selenium()
    driver.get(base_url)
    time.sleep(2)
    
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    driver.quit()
    
    # Find event containers
    containers = soup.select(event_container)
    print(f"Found {len(containers)} event containers\n")
    
    if len(containers) == 0:
        print("‚ùå No event containers found! Check your EVENT_CONTAINER selector.")
        return []
    
    # Preview first 5 events
    events = []
    for i, container in enumerate(containers[:5]):
        artist_elem = container.select_one(artist_selector)
        artist = artist_elem.text.strip() if artist_elem else "[NOT FOUND]"
        
        date_elem = container.select_one(date_selector)
        date_text = date_elem.text.strip() if date_elem else "[NOT FOUND]"
        
        genre = None
        if genre_selector:
            genre_elem = container.select_one(genre_selector)
            genre = genre_elem.text.strip() if genre_elem else None
        
        # Try to parse date
        parsed_date = None
        try:
            if date_text != "[NOT FOUND]":
                parsed_date = parse_date(date_text, date_format)
        except Exception as e:
            parsed_date = f"[ERROR: {e}]"
        
        events.append({
            'artist': artist,
            'date_text': date_text,
            'parsed_date': parsed_date,
            'genre': genre
        })
        
        print(f"Event {i+1}:")
        print(f"  Artist: {artist}")
        print(f"  Date (raw): {date_text}")
        print(f"  Date (parsed): {parsed_date}")
        if genre:
            print(f"  Genre: {genre}")
        print()
    
    if len(containers) > 5:
        print(f"... and {len(containers) - 5} more events\n")
    
    return events

# Run the test
test_events = test_scraping(
    BASE_URL,
    EVENT_CONTAINER,
    ARTIST_SELECTOR,
    DATE_SELECTOR,
    GENRE_SELECTOR,
    DATE_FORMAT
)

Loading https://bohemianlivemusic.org/our-venues/the-armory/...

Found 9 event containers

Event 1:
  Artist: An Evening with The Third Mind ‚Äì Live
  Date (raw): Friday, October 24 @ 7:00 pm
  Date (parsed): 2025-10-24

Event 2:
  Artist: Stelth Ulvang & Dan Mangan with Frail Talk
  Date (raw): Wednesday, November 5 @ 7:00 pm
  Date (parsed): 2025-11-05

Event 3:
  Artist: Steph Strings ‚Äì North America 2025 Tour Part One
  Date (raw): Thursday, November 6 @ 7:00 pm
  Date (parsed): 2025-11-06

Event 4:
  Artist: Dean Johnson
  Date (raw): Saturday, November 8 @ 7:00 pm
  Date (parsed): 2025-11-08

Event 5:
  Artist: Sean Hayes & Sway Wild
  Date (raw): Thursday, November 20 @ 7:00 pm
  Date (parsed): 2025-11-20

... and 4 more events



## Step 3: Review Auto-Generated Config

If the test looks good, here's the config that will be saved.

In [19]:
test_events

[{'artist': 'An Evening with The Third Mind ‚Äì Live',
  'date_text': 'Friday, October 24 @ 7:00 pm',
  'parsed_date': datetime.date(2025, 10, 24),
  'genre': None},
 {'artist': 'Stelth Ulvang & Dan Mangan with Frail Talk',
  'date_text': 'Wednesday, November 5 @ 7:00 pm',
  'parsed_date': datetime.date(2025, 11, 5),
  'genre': None},
 {'artist': 'Steph Strings ‚Äì North America 2025 Tour Part One',
  'date_text': 'Thursday, November 6 @ 7:00 pm',
  'parsed_date': datetime.date(2025, 11, 6),
  'genre': None},
 {'artist': 'Dean Johnson',
  'date_text': 'Saturday, November 8 @ 7:00 pm',
  'parsed_date': datetime.date(2025, 11, 8),
  'genre': None},
 {'artist': 'Sean Hayes & Sway Wild',
  'date_text': 'Thursday, November 20 @ 7:00 pm',
  'parsed_date': datetime.date(2025, 11, 20),
  'genre': None}]

In [20]:
# Generate the scraping config
scraping_config = {
    "scraping_method": SCRAPING_METHOD,
    "base_url": BASE_URL,
    "pagination": {
        "enabled": HAS_PAGINATION,
    },
    "selectors": {
        "event_container": EVENT_CONTAINER,
        "artist": ARTIST_SELECTOR,
        "date": DATE_SELECTOR,
        "genre": GENRE_SELECTOR,
        "cancellation_indicator": None
    },
    "date_format": DATE_FORMAT,
    "filters": {
        "check_cancelled": False
    }
}

if HAS_PAGINATION:
    scraping_config["pagination"]["url_pattern"] = PAGINATION_URL_PATTERN
    scraping_config["pagination"]["pages"] = MAX_PAGES

print("Generated Scraping Config:")
print(json.dumps(scraping_config, indent=2))

Generated Scraping Config:
{
  "scraping_method": "html",
  "base_url": "https://bohemianlivemusic.org/our-venues/the-armory/",
  "pagination": {
    "enabled": false
  },
  "selectors": {
    "event_container": "div.elementor-element.elementor-element-18ac28d",
    "artist": "h4.the7-heading-title",
    "date": "span.elementor-icon-list-text",
    "genre": null,
    "cancellation_indicator": null
  },
  "date_format": "%A, %B %d @ %I:%M %p",
  "filters": {
    "check_cancelled": false
  }
}


## Step 4: Insert to Database

‚ö†Ô∏è **Only run this once you've confirmed the scraping test works!**

In [21]:
def add_venue_to_db(name, address, city, url, scraping_config):
    """
    Insert or update venue in the database
    """
    conn = psycopg2.connect(os.getenv('DATABASE_URL_UNPOOLED'))
    cur = conn.cursor()
    
    insert_query = """
    INSERT INTO venues (name, address, city, url, scraping_config)
    VALUES (%s, %s, %s, %s, %s)
    ON CONFLICT (url) DO UPDATE SET
        name = EXCLUDED.name,
        address = EXCLUDED.address,
        city = EXCLUDED.city,
        scraping_config = EXCLUDED.scraping_config,
        updated_at = CURRENT_TIMESTAMP
    RETURNING venue_id, name;
    """
    
    try:
        cur.execute(insert_query, (
            name,
            address,
            city,
            url,
            json.dumps(scraping_config)
        ))
        
        venue_id, venue_name = cur.fetchone()
        conn.commit()
        
        print(f"‚úÖ Successfully added/updated: {venue_name} (ID: {venue_id})")
        return venue_id
        
    except Exception as e:
        conn.rollback()
        print(f"‚ùå Error: {e}")
        raise
    finally:
        cur.close()
        conn.close()

if len(test_events) == 5:
    # Insert the venue
    venue_id = add_venue_to_db(
        VENUE_NAME,
        VENUE_ADDRESS,
        VENUE_CITY,
        BASE_URL,
        scraping_config
    )

    print(f"\nüéâ {VENUE_NAME} is now ready to be scraped!")

‚úÖ Successfully added/updated: The Armory (ID: 34)

üéâ The Armory is now ready to be scraped!


## Quick Reference: Common Date Formats

| Example | Format String |
|---------|---------------|
| Mon Jan 15 | `%a %b %d` |
| Monday January 15 | `%A %B %d` |
| 01/15/2024 | `%m/%d/%Y` |
| 2024-01-15 | `%Y-%m-%d` |
| Jan 15, 2024 | `%b %d, %Y` |
| Mon, Jan 15, 2024 | `%a, %b %d, %Y` |
| Friday, October 24 @ 7:30 pm | `%A, %B %d @ %I:%M %p` |