# LocAIted Project Setup Verification

This notebook helps you explore and verify the project setup for LocAIted.

In [None]:
# Setup imports
import sys
import os
from pathlib import Path
from datetime import datetime
import pandas as pd
import json

# Add src to path
sys.path.insert(0, str(Path.cwd() / "src"))

from config import *
from database import *

print("✅ Imports successful!")

## 1. Environment Check

In [None]:
# Check Python version and environment
print(f"Python Version: {sys.version}")
print(f"\nVirtual Environment: {sys.prefix}")
print(f"\nProject Root: {PROJECT_ROOT}")
print(f"Test Data Path: {TEST_DATA_PATH}")
print(f"Database: {DATABASE_URL}")

## 2. API Keys Configuration

In [None]:
# Check API keys (masked for security)
def mask_key(key):
    if key:
        return key[:10] + "..." + key[-4:]
    return "NOT CONFIGURED"

print("API Keys Status:")
print(f"\n✅ Tavily API Key: {mask_key(TAVILY_API_KEY)}")
print(f"✅ OpenAI API Key: {mask_key(OPENAI_API_KEY)}")
print(f"\nOpenAI Model: {OPENAI_MODEL}")
print(f"Max Cost per Query: ${MAX_COST_PER_QUERY}")
print(f"\nTavily Settings:")
print(f"  - Search Depth: {TAVILY_SEARCH_DEPTH}")
print(f"  - Max Results: {TAVILY_MAX_RESULTS}")
print(f"  - Max URLs to Extract: {TAVILY_EXTRACT_MAX_URLS}")

## 3. Dependencies Check

In [None]:
# Check installed packages
packages = [
    ('langgraph', 'Agent Orchestration'),
    ('tavily', 'Web Search API'),
    ('openai', 'LLM API'),
    ('sqlalchemy', 'Database ORM'),
    ('fastapi', 'API Framework'),
    ('pydantic', 'Data Validation')
]

print("Installed Packages:")
for package, description in packages:
    try:
        module = __import__(package.replace('-', '_'))
        version = getattr(module, '__version__', 'installed')
        print(f"✅ {description:20} ({package}): {version}")
    except ImportError:
        print(f"❌ {description:20} ({package}): Not installed")

## 4. Database Setup

In [None]:
# Initialize database if needed
init_db()

# Show tables
print("Database Tables:")
for table_name in Base.metadata.tables.keys():
    print(f"  • {table_name}")

In [None]:
# Check database contents
db = next(get_db())

stats = {
    'Users': db.query(User).count(),
    'Events': db.query(Event).count(),
    'Recommendations': db.query(Recommendation).count(),
    'Feedback': db.query(Feedback).count(),
    'Cached Queries': db.query(QueryCache).count()
}

print("Database Record Counts:")
for table, count in stats.items():
    print(f"  {table:15}: {count}")

db.close()

In [None]:
# Show sample data if exists
db = next(get_db())

if db.query(Event).count() > 0:
    print("Sample Events in Database:")
    events = db.query(Event).limit(5).all()
    for event in events:
        print(f"  • {event.title}")
        print(f"    Location: {event.location}")
        print(f"    Organizer: {event.organizer}")
        print(f"    Created: {event.created_at}\n")
else:
    print("No events in database yet.")

db.close()

## 5. Test Data Exploration

In [None]:
# Load and explore test data
if TEST_DATA_PATH.exists():
    df = pd.read_csv(TEST_DATA_PATH)
    print(f"Test Data: {len(df)} events loaded")
    print(f"\nColumns: {list(df.columns)}")
    print(f"\nFirst 5 events:")
    display(df.head())
else:
    print(f"Test data file not found at: {TEST_DATA_PATH}")

In [None]:
# Analyze event types
if 'df' in locals():
    print("Event Types Distribution:")
    type_counts = df['type'].value_counts()
    for event_type, count in type_counts.items():
        print(f"  {event_type:20}: {count} events")
    
    print(f"\nAccess Requirements:")
    access_counts = df['access'].value_counts()
    for access_type, count in access_counts.items():
        print(f"  {access_type:20}: {count} events")

In [None]:
# Show interesting events for testing
if 'df' in locals():
    print("Sample Events for Testing:")
    print("\nProtests:")
    protests = df[df['type'] == 'Protest'].head(2)
    for _, row in protests.iterrows():
        print(f"  • {row['event']} - {row['date']}")
    
    print("\nCultural Events:")
    cultural = df[df['type'] == 'Cultural Event'].head(2)
    for _, row in cultural.iterrows():
        print(f"  • {row['event']} - {row['date']}")
    
    print("\nPolitical Events:")
    political = df[df['type'] == 'Political event'].head(2)
    for _, row in political.iterrows():
        print(f"  • {row['event']} - {row['date']}")

## 6. Project Structure

In [None]:
# Show project structure
import os

def show_tree(path, prefix="", max_depth=3, current_depth=0):
    if current_depth >= max_depth:
        return
    
    items = sorted(Path(path).iterdir())
    for i, item in enumerate(items):
        # Skip certain directories
        if item.name in ['venv', '__pycache__', '.git', '.vscode', 'micromamba']:
            continue
        
        is_last = i == len(items) - 1
        current_prefix = "└── " if is_last else "├── "
        print(f"{prefix}{current_prefix}{item.name}")
        
        if item.is_dir():
            next_prefix = prefix + ("    " if is_last else "│   ")
            show_tree(item, next_prefix, max_depth, current_depth + 1)

print("Project Structure:")
show_tree(".", max_depth=3)

## 7. Test Database Operations

In [None]:
# Test creating a user
db = next(get_db())

# Check if test user exists
test_user = db.query(User).filter_by(email="notebook@test.com").first()

if not test_user:
    test_user = create_user(
        db,
        name="Notebook Test User",
        email="notebook@test.com",
        primary_location="NYC",
        interest_areas=["protests", "cultural", "political"]
    )
    print(f"✅ Created test user: {test_user.name}")
else:
    print(f"✅ Test user exists: {test_user.name}")

print(f"   ID: {test_user.id}")
print(f"   Email: {test_user.email}")
print(f"   Location: {test_user.primary_location}")
print(f"   Interests: {test_user.interest_areas}")

db.close()

In [None]:
# Test event deduplication
db = next(get_db())

event_data = {
    'title': "Test Notebook Event",
    'start_time': datetime.now(),
    'location': "NYC",
    'organizer': "Notebook Org",
    'summary': "Test event from notebook"
}

# Create event twice - should deduplicate
event1 = get_or_create_event(db, event_data)
print(f"First call - Event ID: {event1.id}")

event2 = get_or_create_event(db, event_data)
print(f"Second call - Event ID: {event2.id}")

if event1.id == event2.id:
    print("✅ Deduplication working correctly!")
else:
    print("❌ Deduplication not working")

db.close()

## 8. Quick API Tests

In [None]:
# Test Tavily connection (without making actual API call)
from tavily import TavilyClient

try:
    client = TavilyClient(api_key=TAVILY_API_KEY)
    print("✅ Tavily client initialized successfully")
    print(f"   Ready to search with depth='{TAVILY_SEARCH_DEPTH}'")
    print(f"   Max results: {TAVILY_MAX_RESULTS}")
except Exception as e:
    print(f"❌ Tavily initialization error: {e}")

In [None]:
# Test OpenAI connection (without making actual API call)
from openai import OpenAI

try:
    client = OpenAI(api_key=OPENAI_API_KEY)
    print("✅ OpenAI client initialized successfully")
    print(f"   Model: {OPENAI_MODEL}")
    print(f"   Budget: ${MAX_COST_PER_QUERY} per query")
except Exception as e:
    print(f"❌ OpenAI initialization error: {e}")

## 9. Next Steps - Phase 2 Preview

In [None]:
# Preview of what's coming in Phase 2
phase2_tasks = [
    {
        "task": "LangGraph Skeleton",
        "components": [
            "State definition (query, candidates, extracted, top10)",
            "Workflow transitions",
            "Cycle detection and management"
        ]
    },
    {
        "task": "Profile & Planner Agent",
        "components": [
            "Parse CSV events",
            "Extract domains from URLs",
            "Build user profile"
        ]
    },
    {
        "task": "Retriever Agent",
        "components": [
            "Tavily search integration",
            "Apply date/location filters",
            "Return candidate URLs"
        ]
    },
    {
        "task": "Extractor Agent",
        "components": [
            "Extract from top URLs",
            "Normalize event fields",
            "Structure data"
        ]
    },
    {
        "task": "Recommender Agent",
        "components": [
            "Score events against profile",
            "Generate rationales (≤350 chars)",
            "Select top 10"
        ]
    }
]

print("Phase 2: Core Agents Development\n")
for i, task in enumerate(phase2_tasks, 1):
    print(f"{i}. {task['task']}")
    for component in task['components']:
        print(f"   • {component}")
    print()

## Summary

In [None]:
# Final summary
print("="*60)
print("  LocAIted Setup Summary")
print("="*60)

checklist = [
    ("Environment configured", 'venv' in sys.prefix or 'micromamba' in sys.prefix),
    ("API keys configured", bool(TAVILY_API_KEY and OPENAI_API_KEY)),
    ("Database initialized", Path(DATABASE_URL.replace('sqlite:///', '')).exists()),
    ("Test data available", TEST_DATA_PATH.exists()),
    ("Dependencies installed", True)  # If we got this far, they're installed
]

all_good = True
for task, status in checklist:
    icon = "✅" if status else "❌"
    print(f"{icon} {task}")
    all_good = all_good and status

if all_good:
    print("\n🎉 Phase 1 Complete! Ready for Phase 2: Building Agents")
else:
    print("\n⚠️  Some items need attention before proceeding to Phase 2")