# NYC Event Fetcher

Fetches upcoming events from multiple NYC sources, organized by retrieval method.

## Sources

**API-based:**
- Caveat NYC - Comedy and variety shows
- Riverside Park - Park events and volunteer activities

**Web Scraping (Playwright):**
- AMNH - American Museum of Natural History
- The Met - Metropolitan Museum of Art
- Asia Society - Cultural events and programs
- Strand Books - Author talks and book events
- NY Historical Society - History talks and lectures
- Open House NY - Architecture tours and events

In [None]:
import requests
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo
import re
import time
import html

# Fix for Playwright sync API in Jupyter notebooks
import nest_asyncio
nest_asyncio.apply()

---
# Method 1: API-Based Sources

These sources provide JSON APIs that can be fetched directly with `requests`.

### Caveat NYC

In [6]:
def fetch_caveat_events():
    """Fetch events from Caveat NYC API."""
    url = "https://www.caveat.nyc/api/events/upcoming"
    response = requests.get(url)
    data = response.json()
    
    events = []
    for record in data.get("records", []):
        fields = record.get("fields", {})
        
        start_time = fields.get("Event start date and time", "")
        if start_time:
            try:
                dt = datetime.fromisoformat(start_time.replace("Z", "+00:00"))
                dt_eastern = dt.astimezone(ZoneInfo("America/New_York"))
            except:
                dt_eastern = None
        else:
            dt_eastern = None
        
        # Get short description, clean HTML
        description = fields.get("Short description", "") or fields.get("description", "")
        if description:
            description = re.sub(r'<[^>]+>', '', description)  # Strip HTML
            description = description.strip()[:150]
            if len(description) == 150:
                description += "..."
        
        events.append({
            'name': fields.get("Event", "Unknown"),
            'datetime': dt_eastern,
            'date_str': dt_eastern.strftime("%a %m/%d %I:%M %p") if dt_eastern else "TBD",
            'type': 'Livestream' if fields.get("Livestream") else 'In-Person',
            'sold_out': fields.get("Sold out", False),
            'source': 'Caveat NYC',
            'location': 'Caveat NYC, 21 A Clinton St',
            'description': description,
            'url': fields.get("Ticket URL", "https://www.caveat.nyc")
        })
    
    return events

caveat_events = fetch_caveat_events()
print(f"Caveat NYC: {len(caveat_events)} events")

Caveat NYC: 141 events


### Riverside Park NYC

In [7]:
def fetch_riverside_events():
    """Fetch events from Riverside Park NYC API (WordPress/Tribe Events)."""
    url = "https://riversideparknyc.org/wp-json/tribe/events/v1/events?per_page=50"
    response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
    data = response.json()
    
    events = []
    for e in data.get("events", []):
        start_time = e.get("start_date", "")
        if start_time:
            try:
                dt = datetime.strptime(start_time, "%Y-%m-%d %H:%M:%S")
                dt_eastern = dt.replace(tzinfo=ZoneInfo("America/New_York"))
            except:
                dt_eastern = None
        else:
            dt_eastern = None
        
        categories = e.get("categories", [])
        event_type = categories[0].get("name", "Event") if categories else "Event"
        
        # Get venue
        venue = e.get("venue", {})
        venue_name = html.unescape(venue.get("venue", "")) if isinstance(venue, dict) else ""
        location = f"Riverside Park - {venue_name}" if venue_name else "Riverside Park"
        
        # Get description from excerpt
        description = e.get("excerpt", "") or ""
        if description:
            description = re.sub(r'<[^>]+>', '', description)  # Strip HTML
            description = description.strip()[:150]
            if len(description) == 150:
                description += "..."
        
        events.append({
            'name': e.get("title", "Unknown"),
            'datetime': dt_eastern,
            'date_str': dt_eastern.strftime("%a %m/%d %I:%M %p") if dt_eastern else "TBD",
            'type': event_type,
            'sold_out': False,
            'source': 'Riverside Park',
            'location': location,
            'description': description,
            'url': e.get("url", "https://riversideparknyc.org/events/")
        })
    
    return events

riverside_events = fetch_riverside_events()
print(f"Riverside Park: {len(riverside_events)} events")

Riverside Park: 27 events


---
# Method 2: Web Scraping (Playwright)

These sources require browser automation to bypass Cloudflare or render JavaScript content.

### AMNH (American Museum of Natural History)

In [8]:
def fetch_amnh_events():
    """Scrape events from AMNH calendar using Playwright (bypasses Cloudflare)."""
    from playwright.sync_api import sync_playwright
    
    events = []
    
    with sync_playwright() as p:
        browser = p.chromium.launch(
            headless=True,
            args=['--disable-blink-features=AutomationControlled']
        )
        
        context = browser.new_context(
            viewport={'width': 1920, 'height': 1080},
            user_agent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
        )
        page = context.new_page()
        
        print("Loading AMNH calendar (bypassing Cloudflare)...")
        page.goto("https://www.amnh.org/calendar", timeout=30000)
        
        # Wait for Cloudflare challenge
        for i in range(15):
            time.sleep(2)
            if "moment" not in page.title().lower():
                break
        
        time.sleep(2)
        
        links = page.query_selector_all("a.amnh-calendar-new-event")
        
        seen = set()
        for link in links:
            href = link.get_attribute("href")
            if not href or href in seen:
                continue
            seen.add(href)
            
            text = ' '.join(link.evaluate("el => el.textContent").split())
            
            type_match = re.match(r'^(Festival|Planetarium Program|Member Program|Science Social|After-Hours Program|Cultural Program|Lecture|Workshop)', text, re.IGNORECASE)
            event_type = type_match.group(1) if type_match else "Event"
            
            date_pattern = r'((?:Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)[s,]*[^\d]*(?:January|February|March|April|May|June|July|August|September|October|November|December)[\s\d,]+(?:20\d{2})?)'
            date_match = re.search(date_pattern, text)
            date_str = date_match.group(1).strip() if date_match else "See website"
            
            # Try to parse datetime
            dt_eastern = None
            if date_match:
                try:
                    date_text = date_match.group(1).strip().replace(",", "")
                    for fmt in ["%A %B %d %Y", "%A %B %d  %Y"]:
                        try:
                            dt_eastern = datetime.strptime(date_text, fmt).replace(tzinfo=ZoneInfo("America/New_York"))
                            break
                        except:
                            continue
                except:
                    pass
            
            name_start = type_match.end() if type_match else 0
            name_end = text.find(date_match.group(0)) if date_match else min(100, len(text))
            full_text = text[name_start:].strip()
            name = text[name_start:name_end].strip()
            
            # Extract description (text after the name, before the date)
            description = ""
            if date_match:
                desc_end = text.find(date_match.group(0))
                if desc_end > name_start + len(name):
                    description = text[name_start:desc_end].strip()
                    # Remove the name from description
                    if description.startswith(name):
                        description = description[len(name):].strip()
            if len(description) > 150:
                description = description[:150] + "..."
            
            if len(name) > 80:
                name = name[:80] + "..."
            
            events.append({
                'name': name,
                'datetime': dt_eastern,
                'date_str': date_str,
                'type': event_type,
                'sold_out': False,
                'source': 'AMNH',
                'location': 'American Museum of Natural History',
                'description': description,
                'has_specific_time': False,  # AMNH calendar only shows dates, not times
                'url': f"https://www.amnh.org{href}" if href.startswith('/') else href
            })
        
        browser.close()
    
    return events

amnh_events = fetch_amnh_events()
print(f"AMNH: {len(amnh_events)} events")

Error: It looks like you are using Playwright Sync API inside the asyncio loop.
Please use the Async API instead.

### The Metropolitan Museum of Art

In [None]:
def fetch_met_events():
    """Scrape events from The Met using Playwright (bypasses Vercel protection)."""
    from playwright.sync_api import sync_playwright
    
    events = []
    
    with sync_playwright() as p:
        browser = p.chromium.launch(
            headless=True,
            args=['--disable-blink-features=AutomationControlled']
        )
        
        context = browser.new_context(
            viewport={'width': 1920, 'height': 1080},
            user_agent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
        )
        page = context.new_page()
        
        print("Loading Met Museum events (bypassing Vercel protection)...")
        page.goto("https://www.metmuseum.org/events", timeout=60000)
        
        for i in range(15):
            time.sleep(2)
            if "met" in page.title().lower():
                break
        time.sleep(3)
        
        # Get text content and parse day by day
        text = page.evaluate("() => document.body.innerText")
        
        # Split by day headers
        day_pattern = r'((?:Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday),\s+(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2})'
        parts = re.split(day_pattern, text)
        
        current_date = None
        raw_events = []
        
        for part in parts:
            if re.match(day_pattern, part):
                current_date = part.strip()
                continue
            
            if not current_date:
                continue
            
            lines = part.strip().split('\n')
            i = 0
            while i < len(lines):
                line = lines[i].strip()
                
                if re.match(r'\d{1,2}:\d{2}\s*(?:AM|PM)', line):
                    time_str = line
                    name = ""
                    desc = ""
                    j = i - 1
                    while j >= 0:
                        prev_line = lines[j].strip()
                        if not prev_line:
                            j -= 1
                            continue
                        if not name:
                            if len(prev_line) > 100 or prev_line.startswith('Join') or prev_line.startswith('Explore'):
                                desc = prev_line
                            else:
                                name = prev_line
                                break
                        j -= 1
                    
                    location = "The Met Fifth Avenue"
                    if i + 1 < len(lines):
                        next_line = lines[i + 1].strip()
                        if "Cloisters" in next_line:
                            location = "The Met Cloisters"
                    
                    if name and len(name) > 3 and "CANCELED" not in name:
                        raw_events.append({
                            'date': current_date,
                            'name': name[:70],
                            'time': time_str,
                            'location': location,
                            'description': desc[:100] if desc else ''
                        })
                i += 1
        
        browser.close()
    
    # Convert to standard format
    for e in raw_events:
        dt_eastern = None
        has_specific_time = True
        
        try:
            date_str = e['date'] + ", 2026"
            dt = datetime.strptime(date_str, "%A, %B %d, %Y")
            time_obj = datetime.strptime(e['time'], "%I:%M %p")
            dt_eastern = dt.replace(hour=time_obj.hour, minute=time_obj.minute, tzinfo=ZoneInfo("America/New_York"))
        except:
            has_specific_time = False
        
        events.append({
            'name': e['name'],
            'datetime': dt_eastern,
            'date_str': e['date'],
            'type': 'Museum Event',
            'sold_out': False,
            'source': 'The Met',
            'location': e['location'],
            'description': e['description'],
            'has_specific_time': has_specific_time,
            'url': 'https://www.metmuseum.org/events'
        })
    
    return events

met_events = fetch_met_events()
print(f"The Met: {len(met_events)} events")

### Asia Society

In [None]:
def fetch_asia_society_events():
    """Scrape events from Asia Society using Playwright (bypasses 403)."""
    from playwright.sync_api import sync_playwright
    
    events = []
    
    with sync_playwright() as p:
        browser = p.chromium.launch(
            headless=True,
            args=['--disable-blink-features=AutomationControlled']
        )
        
        context = browser.new_context(
            viewport={'width': 1920, 'height': 1080},
            user_agent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
        )
        page = context.new_page()
        
        print("Loading Asia Society events...")
        page.goto("https://asiasociety.org/new-york/events", timeout=60000)
        
        for i in range(10):
            time.sleep(2)
            if "asia" in page.title().lower():
                break
        time.sleep(2)
        
        # Get text content
        text = page.evaluate("() => document.body.innerText")
        
        browser.close()
    
    # Parse events - split by "IN-PERSON" or "VIRTUAL" markers
    # Format: IN-PERSON\nCATEGORY\nTitle\nDate\nTime\nDescription
    sections = re.split(r'\n(IN-PERSON|VIRTUAL)\n', text)
    
    for i in range(1, len(sections), 2):
        if i + 1 >= len(sections):
            break
            
        event_type = sections[i].strip()
        content = sections[i + 1].strip()
        
        lines = content.split('\n')
        lines = [l.strip() for l in lines if l.strip()]
        
        if len(lines) < 3:
            continue
        
        # Find the date line - looks for "Day DD Mon YYYY" pattern
        date_idx = -1
        dt_eastern = None
        has_specific_time = False
        
        for idx, line in enumerate(lines):
            date_match = re.match(r'(\w+)\s+(\d{1,2})\s+(\w+)\s+(\d{4})', line)
            if date_match:
                date_idx = idx
                try:
                    day_name, day, month, year = date_match.groups()
                    date_str_parsed = f"{day} {month} {year}"
                    dt = datetime.strptime(date_str_parsed, "%d %b %Y")
                    dt_eastern = dt.replace(tzinfo=ZoneInfo("America/New_York"))
                except:
                    pass
                break
        
        if date_idx < 0 or not dt_eastern:
            continue
        
        # Category is before the date (usually line 0)
        # Title is between category and date (could be multiple lines for categories with slashes)
        category = lines[0] if date_idx > 0 else ""
        
        # Title is usually line before date, or could have subcategory
        title = ""
        for idx in range(date_idx - 1, -1, -1):
            # Skip category-looking lines (all caps, short)
            if lines[idx].isupper() and len(lines[idx]) < 60:
                continue
            # Skip lines with just slashes (category separators)
            if "/" in lines[idx] and lines[idx].replace("/", "").replace(" ", "").isupper():
                continue
            title = lines[idx][:70]
            break
        
        if not title:
            continue
        
        # Time is usually the line after date
        if date_idx + 1 < len(lines):
            time_line = lines[date_idx + 1]
            time_match = re.search(r'(\d{1,2}(?::\d{2})?)\s*(?:-|–)?\s*(?:\d{1,2}(?::\d{2})?)?\s*(a\.?m\.?|p\.?m\.?)', time_line, re.I)
            
            if time_match:
                try:
                    time_str = time_match.group(1)
                    am_pm = time_match.group(2).replace('.', '').lower()
                    if ':' not in time_str:
                        time_str += ':00'
                    time_obj = datetime.strptime(f"{time_str} {am_pm}", "%I:%M %p")
                    dt_eastern = dt_eastern.replace(hour=time_obj.hour, minute=time_obj.minute)
                    has_specific_time = True
                except:
                    pass
        
        # Description is usually after time line
        description = ""
        desc_start = date_idx + 2 if date_idx + 2 < len(lines) else -1
        if desc_start > 0 and desc_start < len(lines):
            desc_line = lines[desc_start]
            # Skip buttons like "BUY TICKETS", "REGISTER NOW"
            if desc_line not in ["BUY TICKETS", "REGISTER NOW", "LEARN MORE"]:
                description = desc_line[:100]
        
        events.append({
            'name': title,
            'datetime': dt_eastern,
            'date_str': lines[date_idx] if date_idx >= 0 else "",
            'type': event_type,
            'sold_out': False,
            'source': 'Asia Society',
            'location': 'Asia Society, 725 Park Ave',
            'description': description,
            'has_specific_time': has_specific_time,
            'url': 'https://asiasociety.org/new-york/events'
        })
    
    return events

asia_society_events = fetch_asia_society_events()
print(f"Asia Society: {len(asia_society_events)} events")

### Strand Books

In [None]:
def fetch_strand_events():
    """Scrape events from Strand Books using Playwright."""
    from playwright.sync_api import sync_playwright
    
    events = []
    
    with sync_playwright() as p:
        browser = p.chromium.launch(
            headless=True,
            args=['--disable-blink-features=AutomationControlled']
        )
        
        context = browser.new_context(
            viewport={'width': 1920, 'height': 1080},
            user_agent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
        )
        page = context.new_page()
        
        print("Loading Strand Books events...")
        page.goto("https://www.strandbooks.com/events.html", timeout=60000)
        time.sleep(3)
        
        text = page.evaluate("() => document.body.innerText")
        browser.close()
    
    # Parse the text content
    current_month = None
    current_year = None
    
    lines = text.split('\n')
    lines = [l.strip() for l in lines if l.strip()]
    
    i = 0
    while i < len(lines):
        line = lines[i]
        
        # Check for month header
        month_match = re.match(r'^(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{4})$', line)
        if month_match:
            current_month = month_match.group(1)
            current_year = int(month_match.group(2))
            i += 1
            continue
        
        # Check for day abbreviation (start of event)
        day_abbr_match = re.match(r'^(MON|TUE|WED|THU|FRI|SAT|SUN)$', line)
        if day_abbr_match and current_month and i + 1 < len(lines):
            day_abbr = day_abbr_match.group(1)
            
            # Next line should be the date number
            date_num_match = re.match(r'^(\d{1,2})$', lines[i + 1])
            if date_num_match:
                date_num = int(date_num_match.group(1))
                
                # Title is the line after date number
                if i + 2 < len(lines):
                    title = lines[i + 2]
                    
                    # Check if sold out
                    sold_out = title.startswith("SOLD OUT")
                    if sold_out:
                        title = title.replace("SOLD OUT - ", "").replace("SOLD OUT -", "")
                    
                    # Description is typically the next non-empty line
                    description = ""
                    if i + 3 < len(lines) and lines[i + 3] not in ["View More Details", ""]:
                        desc_line = lines[i + 3]
                        if not re.match(r'^(MON|TUE|WED|THU|FRI|SAT|SUN)$', desc_line) and not re.match(r'^(January|February|March|April|May|June|July|August|September|October|November|December)', desc_line):
                            description = desc_line[:100]
                    
                    # Determine location
                    location = "Strand Bookstore, 828 Broadway"
                    if "Offsite at" in title:
                        offsite_match = re.search(r'Offsite at ([^-:]+)', title)
                        if offsite_match:
                            location = offsite_match.group(1).strip()
                            title = re.sub(r'^Offsite at [^-:]+ - ', '', title)
                    
                    # Parse datetime
                    try:
                        dt = datetime.strptime(f"{current_month} {date_num} {current_year}", "%B %d %Y")
                        dt_eastern = dt.replace(tzinfo=ZoneInfo("America/New_York"))
                    except:
                        dt_eastern = None
                    
                    if dt_eastern and title:
                        events.append({
                            'name': title[:70],
                            'datetime': dt_eastern,
                            'date_str': f"{day_abbr}, {current_month} {date_num}",
                            'type': 'Book Event',
                            'sold_out': sold_out,
                            'source': 'Strand Books',
                            'location': location,
                            'description': description,
                            'has_specific_time': False,
                            'url': 'https://www.strandbooks.com/events.html'
                        })
                
                i += 4
                continue
        
        i += 1
    
    return events

strand_events = fetch_strand_events()
print(f"Strand Books: {len(strand_events)} events")

### NY Historical Society

In [None]:
def fetch_nyhistory_events():
    """Scrape events from NY Historical Society using Playwright."""
    from playwright.sync_api import sync_playwright
    
    events = []
    
    with sync_playwright() as p:
        browser = p.chromium.launch(
            headless=True,
            args=['--disable-blink-features=AutomationControlled']
        )
        
        context = browser.new_context(
            viewport={'width': 1920, 'height': 1080},
            user_agent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
        )
        page = context.new_page()
        
        print("Loading NY Historical Society programs...")
        page.goto("https://www.nyhistory.org/programs?genres=talks&subgenres=", timeout=60000)
        time.sleep(5)
        
        text = page.evaluate("() => document.body.innerText")
        browser.close()
    
    # Split by "TALKS" section markers
    sections = re.split(r'\nTALKS\n', text)
    
    for section in sections[1:]:
        lines = section.strip().split('\n')
        lines = [l.strip() for l in lines if l.strip()]
        
        if len(lines) < 3:
            continue
        
        # First line is the title
        title = lines[0][:70]
        
        description = ""
        date_line = ""
        speakers = ""
        sold_out = False
        
        for i, line in enumerate(lines[1:], 1):
            if line.startswith("Date:"):
                date_line = line
                continue
            
            if line.startswith("Speakers:"):
                speakers = line.replace("Speakers:", "").strip()
                continue
            
            if "Sold Out" in line:
                sold_out = True
                continue
            
            if line in ["In-Person Ticket", "Livestream", "Buy Tickets", "Register", "Learn More"]:
                continue
            
            if not description and len(line) > 30 and not line.startswith("Date:") and not line.startswith("Speakers:"):
                description = line[:100]
        
        # Parse date - Format: "Date: Tuesday, January 27, 6:30 – 7:30 pm ET"
        dt_eastern = None
        has_specific_time = False
        
        date_match = re.search(r'Date:\s*(\w+),\s*(\w+)\s+(\d{1,2}),?\s*(\d{1,2}(?::\d{2})?)\s*(?:–|-)?\s*(?:\d{1,2}(?::\d{2})?)?\s*(am|pm)', date_line, re.I)
        if date_match:
            try:
                month = date_match.group(2)
                day = int(date_match.group(3))
                time_str = date_match.group(4)
                am_pm = date_match.group(5).lower()
                
                if ':' not in time_str:
                    time_str += ':00'
                
                year = 2026
                
                dt = datetime.strptime(f"{month} {day} {year} {time_str} {am_pm}", "%B %d %Y %I:%M %p")
                dt_eastern = dt.replace(tzinfo=ZoneInfo("America/New_York"))
                has_specific_time = True
            except:
                pass
        
        if dt_eastern and title:
            events.append({
                'name': title,
                'datetime': dt_eastern,
                'date_str': date_line.replace("Date:", "").strip() if date_line else "",
                'type': 'Talk',
                'sold_out': sold_out,
                'source': 'NY Historical Society',
                'location': 'NY Historical Society, 170 Central Park West',
                'description': description,
                'has_specific_time': has_specific_time,
                'url': 'https://www.nyhistory.org/programs?genres=talks'
            })
    
    return events

nyhistory_events = fetch_nyhistory_events()
print(f"NY Historical Society: {len(nyhistory_events)} events")

### Open House NY

In [None]:
def fetch_ohny_events():
    """Scrape events from Open House NY using Playwright."""
    from playwright.sync_api import sync_playwright
    
    events = []
    
    with sync_playwright() as p:
        browser = p.chromium.launch(
            headless=True,
            args=['--disable-blink-features=AutomationControlled']
        )
        
        context = browser.new_context(
            viewport={'width': 1920, 'height': 1080},
            user_agent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
        )
        page = context.new_page()
        
        print("Loading Open House NY calendar...")
        page.goto("https://ohny.org/calendar/", timeout=60000)
        time.sleep(5)
        
        text = page.evaluate("() => document.body.innerText")
        browser.close()
    
    # Only get upcoming events (before "Past" section)
    if "Past" in text:
        text = text.split("Past")[0]
    
    if "Upcoming" in text:
        text = text.split("Upcoming", 1)[1]
    
    lines = [l.strip() for l in text.split('\n') if l.strip()]
    
    i = 0
    while i < len(lines):
        line = lines[i]
        
        # Look for date pattern: "January 22nd, 2026"
        date_match = re.match(r'^(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{1,2})(?:st|nd|rd|th),?\s+(\d{4})$', line)
        
        if date_match:
            month = date_match.group(1)
            day = int(date_match.group(2))
            year = int(date_match.group(3))
            
            # Title is the line before the date
            title = ""
            event_type = ""
            
            j = i - 1
            while j >= 0:
                prev_line = lines[j]
                if prev_line in ["Members Only", "Weekend"] or "·" in prev_line:
                    event_type = prev_line
                    j -= 1
                    continue
                title = prev_line[:70]
                break
            
            # Time is the line after the date
            time_str = ""
            has_specific_time = False
            if i + 1 < len(lines):
                time_line = lines[i + 1]
                time_match = re.match(r'^(\d{1,2}:\d{2}(?:AM|PM))\s*-\s*(\d{1,2}:\d{2}(?:AM|PM))$', time_line)
                if time_match:
                    time_str = time_match.group(1)
                    has_specific_time = True
            
            # Parse datetime
            dt_eastern = None
            try:
                dt = datetime.strptime(f"{month} {day} {year}", "%B %d %Y")
                dt_eastern = dt.replace(tzinfo=ZoneInfo("America/New_York"))
                
                if has_specific_time:
                    time_obj = datetime.strptime(time_str, "%I:%M%p")
                    dt_eastern = dt_eastern.replace(hour=time_obj.hour, minute=time_obj.minute)
            except:
                pass
            
            if dt_eastern and title:
                members_only = "Members Only" in event_type
                
                events.append({
                    'name': title,
                    'datetime': dt_eastern,
                    'date_str': f"{month} {day}, {year}",
                    'type': event_type if event_type else 'Tour/Event',
                    'sold_out': False,
                    'source': 'Open House NY',
                    'location': 'Various NYC Locations',
                    'description': "Members Only event" if members_only else "",
                    'has_specific_time': has_specific_time,
                    'url': 'https://ohny.org/calendar/'
                })
        
        i += 1
    
    return events

ohny_events = fetch_ohny_events()
print(f"Open House NY: {len(ohny_events)} events")

---
# Combined Results

In [None]:
# Combine all events
all_events = caveat_events + riverside_events + amnh_events + met_events + asia_society_events + strand_events + nyhistory_events + ohny_events

print("Event Summary")
print("=" * 40)
print(f"\nAPI Sources:")
print(f"  - Caveat NYC: {len(caveat_events)}")
print(f"  - Riverside Park: {len(riverside_events)}")
print(f"\nPlaywright Sources:")
print(f"  - AMNH: {len(amnh_events)}")
print(f"  - The Met: {len(met_events)}")
print(f"  - Asia Society: {len(asia_society_events)}")
print(f"  - Strand Books: {len(strand_events)}")
print(f"  - NY Historical Society: {len(nyhistory_events)}")
print(f"  - Open House NY: {len(ohny_events)}")
print(f"\nTotal: {len(all_events)} events")

---
# Events in the Next Two Weeks

In [None]:
# Filter events occurring in the next two weeks
now = datetime.now(ZoneInfo("America/New_York"))
two_weeks = now + timedelta(weeks=2)

upcoming_events = [
    e for e in all_events 
    if e['datetime'] is not None and now <= e['datetime'] <= two_weeks
]

# Sort by datetime
upcoming_events.sort(key=lambda x: x['datetime'])

print(f"Events from {now.strftime('%m/%d')} to {two_weeks.strftime('%m/%d/%Y')}")
print(f"Found {len(upcoming_events)} events")

In [None]:
# Display upcoming events as a formatted table
from itertools import groupby

def get_date_key(event):
    return event['datetime'].strftime("%A, %B %d")

# Print header
print(f"\n{'Day':<15} | {'Location':<30} | {'Event':<35} | {'Time':<10} | Description")
print("=" * 140)

current_day = None
for event in upcoming_events:
    day = event['datetime'].strftime("%a, %b %d")
    
    # Only show time if event has a specific time (not midnight)
    if event.get('has_specific_time', True):
        time_str = event['datetime'].strftime("%I:%M %p")
    else:
        time_str = "--"
    
    # Truncate fields for display
    location = event.get('location', '')[:30]
    name = event.get('name', '')[:35]
    if event.get('sold_out'):
        name = name[:27] + " [SOLD]"
    description = event.get('description', '')[:50]
    if len(event.get('description', '')) > 50:
        description += "..."
    
    # Only show day on first event of that day
    if day != current_day:
        current_day = day
        print("-" * 140)
        day_display = day
    else:
        day_display = ""
    
    print(f"{day_display:<15} | {location:<30} | {name:<35} | {time_str:<10} | {description}")

---
# Export to CSV

In [None]:
import csv

# Export upcoming events to CSV
csv_filename = "upcoming_events.csv"

with open(csv_filename, 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    
    # Write header
    writer.writerow(['Day', 'Date', 'Time', 'Location', 'Event', 'Source', 'Type', 'Sold Out', 'Description', 'URL'])
    
    # Write events
    for event in upcoming_events:
        day = event['datetime'].strftime("%A")
        date = event['datetime'].strftime("%Y-%m-%d")
        time_str = event['datetime'].strftime("%I:%M %p") if event.get('has_specific_time', True) else ""
        
        writer.writerow([
            day,
            date,
            time_str,
            event.get('location', ''),
            event.get('name', ''),
            event.get('source', ''),
            event.get('type', ''),
            'Yes' if event.get('sold_out') else 'No',
            event.get('description', ''),
            event.get('url', '')
        ])

print(f"Exported {len(upcoming_events)} events to {csv_filename}")