# LocAIted Project Setup Verification

This notebook helps you explore and verify the project setup for LocAIted.

In [None]:
# Setup imports
import sys
import os
from pathlib import Path
from datetime import datetime
import pandas as pd
import json

# Add src to path
sys.path.insert(0, str(Path.cwd() / "src"))

# Import config variables explicitly
from config import (
    TAVILY_API_KEY, 
    OPENAI_API_KEY, 
    DATABASE_URL, 
    MAX_COST_PER_QUERY, 
    OPENAI_MODEL,
    PROJECT_ROOT, 
    TEST_DATA_PATH, 
    DEFAULT_CITY, 
    DEFAULT_DATE_RANGE_DAYS,
    TAVILY_SEARCH_DEPTH, 
    TAVILY_MAX_RESULTS, 
    TAVILY_EXTRACT_MAX_URLS
)

# Import database components explicitly
from database import (
    Base, 
    User, 
    Event, 
    Recommendation, 
    Feedback, 
    QueryCache,
    init_db, 
    get_db, 
    create_user, 
    get_or_create_event, 
    create_recommendation,
    add_feedback, 
    check_cache, 
    save_to_cache
)

print("✅ Imports successful!")

## 1. Environment Check

In [2]:
# Check Python version and environment
print(f"Python Version: {sys.version}")
print(f"\nVirtual Environment: {sys.prefix}")
print(f"\nProject Root: {PROJECT_ROOT}")
print(f"Test Data Path: {TEST_DATA_PATH}")
print(f"Database: {DATABASE_URL}")

Python Version: 3.13.3 (v3.13.3:6280bb54784, Apr  8 2025, 10:47:54) [Clang 15.0.0 (clang-1500.3.9.4)]

Virtual Environment: /Users/eilonbaer/Projects/locaited/venv

Project Root: /Users/eilonbaer/Projects/locaited
Test Data Path: /Users/eilonbaer/Projects/locaited/test data/Liri Interesting events.csv
Database: sqlite:///locaited.db


## 2. API Keys Configuration

In [3]:
# Check API keys (masked for security)
def mask_key(key):
    if key:
        return key[:10] + "..." + key[-4:]
    return "NOT CONFIGURED"

print("API Keys Status:")
print(f"\n✅ Tavily API Key: {mask_key(TAVILY_API_KEY)}")
print(f"✅ OpenAI API Key: {mask_key(OPENAI_API_KEY)}")
print(f"\nOpenAI Model: {OPENAI_MODEL}")
print(f"Max Cost per Query: ${MAX_COST_PER_QUERY}")
print(f"\nTavily Settings:")
print(f"  - Search Depth: {TAVILY_SEARCH_DEPTH}")
print(f"  - Max Results: {TAVILY_MAX_RESULTS}")
print(f"  - Max URLs to Extract: {TAVILY_EXTRACT_MAX_URLS}")

API Keys Status:

✅ Tavily API Key: tvly-dev-e...TDyw
✅ OpenAI API Key: sk-svcacct...kYIA

OpenAI Model: gpt-3.5-turbo
Max Cost per Query: $0.1

Tavily Settings:
  - Search Depth: basic
  - Max Results: 15
  - Max URLs to Extract: 8


## 3. Dependencies Check

In [4]:
# Check installed packages
packages = [
    ('langgraph', 'Agent Orchestration'),
    ('tavily', 'Web Search API'),
    ('openai', 'LLM API'),
    ('sqlalchemy', 'Database ORM'),
    ('fastapi', 'API Framework'),
    ('pydantic', 'Data Validation')
]

print("Installed Packages:")
for package, description in packages:
    try:
        module = __import__(package.replace('-', '_'))
        version = getattr(module, '__version__', 'installed')
        print(f"✅ {description:20} ({package}): {version}")
    except ImportError:
        print(f"❌ {description:20} ({package}): Not installed")

Installed Packages:
✅ Agent Orchestration  (langgraph): installed
✅ Web Search API       (tavily): installed
✅ LLM API              (openai): 1.99.9
✅ Database ORM         (sqlalchemy): 2.0.43
✅ API Framework        (fastapi): 0.116.1
✅ Data Validation      (pydantic): 2.11.7


## 4. Database Setup

In [5]:
# Initialize database if needed
init_db()

# Show tables
print("Database Tables:")
for table_name in Base.metadata.tables.keys():
    print(f"  • {table_name}")

Database initialized at sqlite:///locaited.db
Database Tables:
  • users
  • events
  • recommendations
  • feedback
  • query_cache


In [6]:
# Check database contents
db = next(get_db())

stats = {
    'Users': db.query(User).count(),
    'Events': db.query(Event).count(),
    'Recommendations': db.query(Recommendation).count(),
    'Feedback': db.query(Feedback).count(),
    'Cached Queries': db.query(QueryCache).count()
}

print("Database Record Counts:")
for table, count in stats.items():
    print(f"  {table:15}: {count}")

db.close()

Database Record Counts:
  Users          : 2
  Events         : 2
  Recommendations: 1
  Feedback       : 0
  Cached Queries : 0


In [7]:
# Show sample data if exists
db = next(get_db())

if db.query(Event).count() > 0:
    print("Sample Events in Database:")
    events = db.query(Event).limit(5).all()
    for event in events:
        print(f"  • {event.title}")
        print(f"    Location: {event.location}")
        print(f"    Organizer: {event.organizer}")
        print(f"    Created: {event.created_at}\n")
else:
    print("No events in database yet.")

db.close()

Sample Events in Database:
  • Test Event
    Location: NYC
    Organizer: Test Org
    Created: 2025-08-18 12:13:30.277938

  • Test Notebook Event
    Location: NYC
    Organizer: Notebook Org
    Created: 2025-08-18 13:26:06.168377



## 5. Test Data Exploration

In [8]:
# Load and explore test data
if TEST_DATA_PATH.exists():
    df = pd.read_csv(TEST_DATA_PATH)
    print(f"Test Data: {len(df)} events loaded")
    print(f"\nColumns: {list(df.columns)}")
    print(f"\nFirst 5 events:")
    display(df.head())
else:
    print(f"Test data file not found at: {TEST_DATA_PATH}")

Test Data: 20 events loaded

Columns: ['Day', 'date', 'event', 'type', 'access', 'Location ', 'Time']

First 5 events:


Unnamed: 0,Day,date,event,type,access,Location,Time
0,,"Mar 14, 2025",Purim,Cultural Event,open to all,South Williamsburg,Throughout the day
1,Monday,"Mar 17, 2025",St. Patrick's Day Parade,Parade,open to all,Marches along Fifth Avenue from 44th to 79th S...,Starts at 11:00 AM
2,Wednesday,"Mar 19, 2025",Eli Sharabi in the UN,Political event,Need to be approved,United Nations HQ,
3,Tuesday,"Apr 1, 2025",International women of courge,Political event,Need to be approved,"Department of State, DC",
4,Saturday,"Apr 5, 2025",Hands off (Protest against trump),Protest,Press Card only,Bryant Park,1:00 PM


In [9]:
# Analyze event types
if 'df' in locals():
    print("Event Types Distribution:")
    type_counts = df['type'].value_counts()
    for event_type, count in type_counts.items():
        print(f"  {event_type:20}: {count} events")
    
    print(f"\nAccess Requirements:")
    access_counts = df['access'].value_counts()
    for access_type, count in access_counts.items():
        print(f"  {access_type:20}: {count} events")

Event Types Distribution:
  Cultural Event      : 7 events
  Political event     : 5 events
  News                : 4 events
  Parade              : 2 events
  Protest             : 2 events

Access Requirements:
  Need to be approved : 7 events
  open to all         : 6 events
  Press Card only     : 6 events


In [10]:
# Show interesting events for testing
if 'df' in locals():
    print("Sample Events for Testing:")
    print("\nProtests:")
    protests = df[df['type'] == 'Protest'].head(2)
    for _, row in protests.iterrows():
        print(f"  • {row['event']} - {row['date']}")
    
    print("\nCultural Events:")
    cultural = df[df['type'] == 'Cultural Event'].head(2)
    for _, row in cultural.iterrows():
        print(f"  • {row['event']} - {row['date']}")
    
    print("\nPolitical Events:")
    political = df[df['type'] == 'Political event'].head(2)
    for _, row in political.iterrows():
        print(f"  • {row['event']} - {row['date']}")

Sample Events for Testing:

Protests:
  • Hands off (Protest against trump) - Apr 5, 2025
  • March to Protect Migrants and the Planet By 50501 NY - Apr 19, 2025

Cultural Events:
  • Purim  - Mar 14, 2025
  • Easter Parade - Apr 20, 2025

Political Events:
  • Eli Sharabi in the UN  - Mar 19, 2025
  • International women of courge - Apr 1, 2025


## 6. Project Structure

In [11]:
# Show project structure
import os

def show_tree(path, prefix="", max_depth=3, current_depth=0):
    if current_depth >= max_depth:
        return
    
    items = sorted(Path(path).iterdir())
    for i, item in enumerate(items):
        # Skip certain directories
        if item.name in ['venv', '__pycache__', '.git', '.vscode', 'micromamba']:
            continue
        
        is_last = i == len(items) - 1
        current_prefix = "└── " if is_last else "├── "
        print(f"{prefix}{current_prefix}{item.name}")
        
        if item.is_dir():
            next_prefix = prefix + ("    " if is_last else "│   ")
            show_tree(item, next_prefix, max_depth, current_depth + 1)

print("Project Structure:")
show_tree(".", max_depth=3)

Project Structure:
├── .DS_Store
├── .env
├── .gitignore
├── .ipynb_checkpoints
│   └── check_setup-checkpoint.ipynb
├── check_setup.ipynb
├── data
├── documents
│   └── Agent Architecture (Cyclic LangGraph, cost-aware Tavily use).pdf
├── environment.yml
├── locaited.db
├── main.py
├── requirements.txt
├── setup.sh
├── setup_venv.sh
├── src
│   ├── __init__.py
│   ├── agents
│   │   ├── __init__.py
│   │   └── workflow.py
│   ├── config.py
│   ├── database.py
│   └── utils
│       └── __init__.py
├── test data
│   └── Liri Interesting events.csv


## 7. Test Database Operations

In [12]:
# Test creating a user
db = next(get_db())

# Check if test user exists
test_user = db.query(User).filter_by(email="notebook@test.com").first()

if not test_user:
    test_user = create_user(
        db,
        name="Notebook Test User",
        email="notebook@test.com",
        primary_location="NYC",
        interest_areas=["protests", "cultural", "political"]
    )
    print(f"✅ Created test user: {test_user.name}")
else:
    print(f"✅ Test user exists: {test_user.name}")

print(f"   ID: {test_user.id}")
print(f"   Email: {test_user.email}")
print(f"   Location: {test_user.primary_location}")
print(f"   Interests: {test_user.interest_areas}")

db.close()

✅ Test user exists: Notebook Test User
   ID: 2
   Email: notebook@test.com
   Location: NYC
   Interests: ['protests', 'cultural', 'political']


In [13]:
# Test event deduplication
db = next(get_db())

event_data = {
    'title': "Test Notebook Event",
    'start_time': datetime.now(),
    'location': "NYC",
    'organizer': "Notebook Org",
    'summary': "Test event from notebook"
}

# Create event twice - should deduplicate
event1 = get_or_create_event(db, event_data)
print(f"First call - Event ID: {event1.id}")

event2 = get_or_create_event(db, event_data)
print(f"Second call - Event ID: {event2.id}")

if event1.id == event2.id:
    print("✅ Deduplication working correctly!")
else:
    print("❌ Deduplication not working")

db.close()

First call - Event ID: 3
Second call - Event ID: 3
✅ Deduplication working correctly!


## 8. Quick API Tests

In [14]:
# Test Tavily connection (without making actual API call)
from tavily import TavilyClient

try:
    client = TavilyClient(api_key=TAVILY_API_KEY)
    print("✅ Tavily client initialized successfully")
    print(f"   Ready to search with depth='{TAVILY_SEARCH_DEPTH}'")
    print(f"   Max results: {TAVILY_MAX_RESULTS}")
except Exception as e:
    print(f"❌ Tavily initialization error: {e}")

✅ Tavily client initialized successfully
   Ready to search with depth='basic'
   Max results: 15


In [15]:
# Test OpenAI connection (without making actual API call)
from openai import OpenAI

try:
    client = OpenAI(api_key=OPENAI_API_KEY)
    print("✅ OpenAI client initialized successfully")
    print(f"   Model: {OPENAI_MODEL}")
    print(f"   Budget: ${MAX_COST_PER_QUERY} per query")
except Exception as e:
    print(f"❌ OpenAI initialization error: {e}")

✅ OpenAI client initialized successfully
   Model: gpt-3.5-turbo
   Budget: $0.1 per query


## 9. LangGraph Workflow Testing (NEW!)

In [16]:
# Phase 2 Progress Update
completed = ["✅ LangGraph Skeleton - COMPLETE!"]
pending = [
    "⏳ Profile & Planner Agent - Parse CSV and build real profile",
    "⏳ Retriever Agent - Integrate Tavily search API",
    "⏳ Extractor Agent - Extract event details from URLs",
    "⏳ Recommender Agent - Score and rank events"
]

print("Phase 2 Progress:\n")
for item in completed:
    print(item)
print()
for item in pending:
    print(item)

print("\n📍 Current: The workflow skeleton is working with mock data.")
print("📍 Next: Build Profile & Planner to parse the test CSV.")

Phase 2 Progress:

✅ LangGraph Skeleton - COMPLETE!

⏳ Profile & Planner Agent - Parse CSV and build real profile
⏳ Retriever Agent - Integrate Tavily search API
⏳ Extractor Agent - Extract event details from URLs
⏳ Recommender Agent - Score and rank events

📍 Current: The workflow skeleton is working with mock data.
📍 Next: Build Profile & Planner to parse the test CSV.


## 10. Next Steps - Phase 2 (Updated)

In [17]:
# View the user profile that was built
print("\nUser Profile Built:")
profile = result['user_profile']
print(f"  Domains: {profile['allowlist_domains'][:3]}...")  # First 3 domains
print(f"  Keywords: {profile['keywords']}")
print(f"  Interest Areas: {profile['interest_areas']}")
print(f"  Credentials: {profile['credentials']}")


User Profile Built:


NameError: name 'result' is not defined

In [None]:
# Examine the state structure
print("\nWorkflow State Keys:")
for key in result.keys():
    value = result[key]
    if isinstance(value, list):
        print(f"  • {key}: {len(value)} items")
    elif isinstance(value, dict):
        print(f"  • {key}: {type(value).__name__} with {len(value)} keys")
    else:
        print(f"  • {key}: {type(value).__name__}")

In [None]:
# Show the workflow logs to understand the flow
print("\nWorkflow Execution Logs:")
for log in result['logs'][-10:]:  # Last 10 logs
    print(f"  • {log}")

In [None]:
# Show the recommendations
print("\nTop Recommendations:")
for i, event in enumerate(result['top10'], 1):
    print(f"\n{i}. {event['title']}")
    print(f"   Score: {event['recommendation']:.2f}")
    print(f"   Location: {event['location']}")
    print(f"   Rationale: {event['rationale'][:100]}...")  # First 100 chars

In [None]:
# Test the workflow with a simple query
result = run_workflow(
    query="Find protests and cultural events in NYC",
    city="NYC",
    days_ahead=14
)

print(f"Query: {result['query_spec']['text']}")
print(f"Cycles executed: {result['cycle_count']}")
print(f"Total cost: ${result['total_cost']:.4f}")
print(f"Events found: {len(result['top10'])}")

In [None]:
# Import the new workflow module
from agents.workflow import *

print("✅ LangGraph workflow module imported successfully!")

## Summary

In [54]:
# Final summary
print("="*60)
print("  LocAIted Setup Summary")
print("="*60)

checklist = [
    ("Environment configured", 'venv' in sys.prefix or 'micromamba' in sys.prefix),
    ("API keys configured", bool(TAVILY_API_KEY and OPENAI_API_KEY)),
    ("Database initialized", Path(DATABASE_URL.replace('sqlite:///', '')).exists()),
    ("Test data available", TEST_DATA_PATH.exists()),
    ("Dependencies installed", True)  # If we got this far, they're installed
]

all_good = True
for task, status in checklist:
    icon = "✅" if status else "❌"
    print(f"{icon} {task}")
    all_good = all_good and status

if all_good:
    print("\n🎉 Phase 1 Complete! Ready for Phase 2: Building Agents")
else:
    print("\n⚠️  Some items need attention before proceeding to Phase 2")

  LocAIted Setup Summary
✅ Environment configured
✅ API keys configured
✅ Database initialized
✅ Test data available
✅ Dependencies installed

🎉 Phase 1 Complete! Ready for Phase 2: Building Agents
