In [None]:
# File: art_openings_brooklyn_v2.py
from browser_use import Agent, ChatGoogle
from dotenv import load_dotenv
import os
import asyncio
import json
from datetime import datetime, timedelta

# ===============================
# Setup
# ===============================

load_dotenv()
llm = ChatGoogle(model="gemini-2.5-flash")

# ===============================
# Opening Detection Keywords
# ===============================

OPENING_KEYWORDS = [
    "opening reception",
    "vernissage",
    "artist reception",
    "opening night",
    "first thursday",
    "opening party",
    "preview",
    "private view",
    "launch party",
    "meet the artist",
]

# ===============================
# Gallery Sources
# ===============================

sources = [
    # Bushwick
    "https://www.tigerstrikesasteroid.com/brooklyn",
    "https://www.bushwickgallery.com",
    "https://www.brooklynartcave.com/events",
    "https://carvalhopark.com/exhibitions",
    "https://www.the-living-gallery.com/events",
    "http://www.transmitter.nyc/",
    "https://www.activespacestudios.com/gallery",

    # Williamsburg
    "https://www.amant.org/exhibitions",
    "https://www.brooklyncc.com/dog-house-gallery",
    "https://awitanewyorkartmag.squarespace.com/announcements",
    "https://miriamgallery.com/",

    # Ridgewood
    "https://www.tempestonweirfield.com/",
    "https://www.lorimoto.com/exhibitions",

    # Maspeth
    "https://www.mrsgallery.com/exhibitions",
]

visited_sources_file = "visited_galleries_v2.json"
markdown_path = "brooklyn_openings_v2.md"
structured_path = "brooklyn_openings_v2.json"

# ===============================
# Helpers
# ===============================

def load_visited_sources():
    if os.path.exists(visited_sources_file):
        with open(visited_sources_file, "r", encoding="utf-8") as f:
            return set(json.load(f))
    return set()

def save_visited_sources(visited):
    with open(visited_sources_file, "w", encoding="utf-8") as f:
        json.dump(list(visited), f, indent=2)

def get_remaining_sources():
    visited = load_visited_sources()
    return [s for s in sources if s not in visited]

def append_markdown(new_text, path=markdown_path):
    with open(path, "a", encoding="utf-8") as f:
        f.write("\n\n---\n\n")
        f.write(new_text)

def append_structured(new_data, path=structured_path):
    if os.path.exists(path):
        with open(path, "r", encoding="utf-8") as f:
            existing = json.load(f)
    else:
        existing = []

    if isinstance(new_data, str):
        try:
            new_data = json.loads(new_data)
        except:
            new_data = [{"raw": new_data}]

    combined = existing + new_data
    with open(path, "w", encoding="utf-8") as f:
        json.dump(combined, f, indent=2)

def get_current_date_context():
    """Get date context for the agent prompt."""
    today = datetime.now()
    three_months = today + timedelta(days=90)
    return {
        "today": today.strftime("%B %d, %Y"),
        "today_short": today.strftime("%Y-%m-%d"),
        "three_months_out": three_months.strftime("%B %d, %Y"),
        "current_month": today.strftime("%B %Y"),
        "current_year": today.strftime("%Y"),
    }

# ===============================
# Improved Agent Setup
# ===============================

def make_agent(remaining_sources):
    date_ctx = get_current_date_context()
    
    task = f"""
You are searching for ART OPENING RECEPTIONS at Brooklyn galleries.

CRITICAL DISTINCTION:
- An "OPENING RECEPTION" is a specific one-time EVENT when a new exhibition launches.
  It's typically an evening gathering (e.g., "Saturday, January 18, 6-8PM") with wine,
  refreshments, and a chance to meet the artist.
- "Exhibition dates" are when the show RUNS (e.g., "January 18 - February 28").
- You MUST find the OPENING RECEPTION, not just the exhibition run dates.

CURRENT DATE: {date_ctx['today']}
SEARCH WINDOW: Now through {date_ctx['three_months_out']}

FOR EACH GALLERY, EXTRACT:
1. gallery_name - The gallery's name
2. exhibition_title - Name of the show
3. opening_reception_date - The SPECIFIC opening event date (e.g., "Saturday, January 18, 2025")
4. opening_reception_time - The time range (e.g., "6-8PM", "7-9PM")
5. exhibition_start_date - When the show opens to public
6. exhibition_end_date - When the show closes
7. artists - List of exhibiting artists
8. address - Gallery street address
9. event_type - One of: "opening_reception", "closing_reception", "artist_talk", "group_show_opening", "solo_show_opening"
10. source_url - The URL where you found this info

SEARCH STRATEGY:
1. First check the provided URL
2. If no events found, look for navigation links: "Exhibitions", "Events", "Upcoming", "Calendar", "Current"
3. Scroll down to find upcoming events
4. Look for these OPENING KEYWORDS: {', '.join(OPENING_KEYWORDS)}

TYPICAL OPENING PATTERNS:
- Evening times: 6-8PM, 6-9PM, 7-9PM, 5-8PM
- Common days: Thursday, Friday, Saturday
- Look for phrases like "Reception:", "Opening:", "Join us for..."

OUTPUT FORMAT - Return a JSON array:
[
  {{
    "gallery_name": "Example Gallery",
    "exhibition_title": "Show Title",
    "opening_reception_date": "Saturday, January 18, 2025",
    "opening_reception_time": "6-8PM",
    "exhibition_start_date": "January 18, 2025",
    "exhibition_end_date": "February 28, 2025",
    "artists": ["Artist Name"],
    "address": "123 Street, Brooklyn, NY",
    "event_type": "solo_show_opening",
    "source_url": "https://..."
  }}
]

If NO upcoming opening reception is found for a gallery, include:
{{
  "gallery_name": "...",
  "status": "no_upcoming_openings",
  "notes": "Reason why (e.g., 'only past exhibitions shown', 'no events page found')",
  "source_url": "..."
}}

IMPORTANT:
- Only include events with dates AFTER {date_ctx['today']}
- If you see an opening time like "6PM" without end time, record as "6PM"
- If opening_reception_time is truly not listed, set to "TBA" not "Not available"

GALLERIES TO CHECK:
{chr(10).join(f'- {url}' for url in remaining_sources)}
"""
    
    return Agent(
        task=task,
        llm=llm,
        browser_config={
            "headless": True,
            "browser_type": "chromium",
            "browser_timeout": 90,
            "viewport_size": {"width": 1280, "height": 720},
            "extra_chromium_args": [
                "--no-sandbox",
                "--disable-dev-shm-usage",
                "--disable-gpu",
            ]
        }
    )

# ===============================
# Main Run
# ===============================

async def run_openings(batch_size=3):
    visited = load_visited_sources()
    remaining = get_remaining_sources()

    if not remaining:
        print("All galleries already checked.")
        return

    next_batch = remaining[:batch_size]
    date_ctx = get_current_date_context()
    print(f"Checking galleries (as of {date_ctx['today']}):")
    for url in next_batch:
        print(f"  - {url}")

    agent = make_agent(next_batch)

    try:
        history = await asyncio.wait_for(agent.run(max_steps=30), timeout=360)
    except asyncio.TimeoutError:
        print("Agent timed out")
        return

    final_result = history.final_result()
    if final_result:
        batch_header = f"## Batch: {date_ctx['today']}\n\nGalleries checked:\n"
        batch_header += "\n".join(f"- {url}" for url in next_batch)
        batch_header += f"\n\n{final_result}"
        append_markdown(batch_header)

        if hasattr(history, "structured_output") and history.structured_output:
            append_structured(history.structured_output)

        print(f"Results saved for {len(next_batch)} galleries.")
    else:
        print("No final result produced.")

    visited.update(next_batch)
    save_visited_sources(visited)

# ===============================
# Reset Function
# ===============================

def reset_visited():
    """Clear visited galleries to start fresh."""
    if os.path.exists(visited_sources_file):
        os.remove(visited_sources_file)
        print("Visited galleries reset.")
    else:
        print("No visited file to reset.")

# ===============================
# Manual Execution
# ===============================

if __name__ == "__main__":
    try:
        get_ipython()
        print("\nRunning in Jupyter notebook.")
        print("Run: await run_openings()")
        print("To reset: reset_visited()")
    except NameError:
        asyncio.run(run_openings())

In [None]:
await run_openings()