# Cassidy Insights Prototype

This notebook allows you to prototype insights services that will eventually run as cron jobs.
Use this to test different prompts and analysis approaches on user data.

## Setup and Imports

In [1]:
import sys
import os
import asyncio
import json
from datetime import datetime, timedelta
from typing import List, Dict, Any
from contextlib import asynccontextmanager

# Add backend to Python path
sys.path.append('../backend')
sys.path.append('../backend/app')

# Set database URL to use backend database
os.environ['DATABASE_URL'] = 'sqlite+aiosqlite:///../backend/cassidy.db'

# Backend imports
from app.database import init_db, get_db
from app.repositories.user import UserRepository
from app.repositories.task import TaskRepository
from app.repositories.session import JournalEntryRepository
from app.agents.factory import AgentFactory
from app.agents.models import CassidyAgentDependencies
from app.core.config import settings

# Initialize database
await init_db()
print("✅ Database initialized")

# Create a helper for database sessions
@asynccontextmanager
async def get_db_session():
    """Helper to get database session"""
    async for session in get_db():
        yield session

print("✅ Imports successful")

✅ Database initialized
✅ Imports successful


## Configuration

In [2]:
# Configuration
USERNAME = "jg2950"  # Change this to test with different users
DAYS_BACK = 100  # How many days of data to analyze

print(f"Analyzing data for user: {USERNAME}")
print(f"Looking back {DAYS_BACK} days")

Analyzing data for user: jg2950
Looking back 100 days


## Data Loading Functions

In [None]:
async def debug_user_lookup(username: str):
    """Debug user lookup to see what's happening"""
    async with get_db_session() as db:
        user_repo = UserRepository()
        
        # First, try to get user without any filters
        print(f"Looking for user: {username}")
        
        # Raw SQL check
        from sqlalchemy import text
        result = await db.execute(text("SELECT username, id, is_active FROM users WHERE username = :username"), 
                                {"username": username})
        raw_result = result.fetchone()
        print(f"Raw SQL result: {raw_result}")
        
        # Try the actual repository method
        user = await user_repo.get_by_username(db, username)
        print(f"Repository result: {user}")
        
        return user

# Debug the user lookup
test_user = await debug_user_lookup("jg2950")

async def load_user_data(username: str):
    """Load all user data including preferences, journals, and tasks"""
    
    async with get_db_session() as db:
        user_repo = UserRepository()
        task_repo = TaskRepository()
        journal_repo = JournalEntryRepository()
        
        # Get user
        user = await user_repo.get_by_username(db, username)
        if not user:
            print(f"⚠️  User {username} not found in database.")
            print("Create the user first by:")
            print("1. Running the backend server to initialize the database")
            print("2. Using the frontend to register the user")
            print("3. Or use the default test user 'user_123'")
            raise ValueError(f"User {username} not found")
        
        user_id = str(user.id)
        
        # Get tasks
        tasks = await task_repo.get_by_user_id(db, user_id)
        
        # Get journal entries
        journals = await journal_repo.get_by_user_id(db, user_id)
        
        return {
            'user': user,
            'user_id': user_id,
            'tasks': tasks,
            'journals': journals
        }

def format_data_summary(data):
    """Print a summary of loaded data"""
    print(f"User: {data['user'].username} ({data['user_id']})")
    print(f"Tasks: {len(data['tasks'])} total")
    print(f"Journal entries: {len(data['journals'])} total")
    
    # Task breakdown
    completed_tasks = [t for t in data['tasks'] if t.is_completed]
    pending_tasks = [t for t in data['tasks'] if not t.is_completed]
    print(f"  - Completed: {len(completed_tasks)}")
    print(f"  - Pending: {len(pending_tasks)}")
    
    # Journal breakdown
    recent_journals = [j for j in data['journals'] if j.created_at > datetime.now() - timedelta(days=DAYS_BACK)]
    print(f"  - Recent ({DAYS_BACK} days): {len(recent_journals)}")
    
    return recent_journals

async def create_sample_data_if_needed():
    """Create sample user and data if none exists"""
    from app.database import create_sample_user
    
    try:
        await create_sample_user()
        print("✅ Sample user created or already exists")
    except Exception as e:
        print(f"Could not create sample user: {e}")
        
# Create sample user if needed (useful for first-time setup)
await create_sample_data_if_needed()

## Load User Data

In [None]:
# Load data for the specified user
user_data = await load_user_data(USERNAME)
recent_journals = format_data_summary(user_data)

print("\n✅ Data loaded successfully")

⚠️  User jg2950 not found in database.
Create the user first by:
1. Running the backend server to initialize the database
2. Using the frontend to register the user
3. Or use the default test user 'user_123'


ValueError: User jg2950 not found

## LLM Helper Functions

In [None]:
async def get_llm_agent():
    """Get an LLM agent for analysis"""
    # Create minimal dependencies for the agent
    deps = CassidyAgentDependencies(
        user_id=user_data['user_id'],
        session_id="notebook-session",
        conversation_type="general",
        user_template={},
        user_preferences={},
        current_journal_draft={},
        current_tasks=[]
    )
    
    # Create agent factory and get a general agent
    agent = await AgentFactory.get_agent(
        conversation_type="general",
        user_id=user_data['user_id'],
        context=deps
    )
    
    return agent

async def ask_llm(prompt: str, agent=None):
    """Send a prompt to the LLM and get response"""
    if agent is None:
        agent = await get_llm_agent()
    
    deps = CassidyAgentDependencies(
        user_id=user_data['user_id'],
        session_id="notebook-session",
        conversation_type="general",
        user_template={},
        user_preferences={},
        current_journal_draft={},
        current_tasks=[]
    )
    
    result = await agent.run(prompt, deps=deps)
    return result.output

# Initialize agent
llm_agent = await get_llm_agent()
print("✅ LLM agent initialized")

## Emotional State Analysis Functions

In [None]:
def extract_journal_text(journal_entry):
    """Extract readable text from journal entry"""
    if hasattr(journal_entry, 'raw_text') and journal_entry.raw_text:
        return journal_entry.raw_text
    elif hasattr(journal_entry, 'structured_data') and journal_entry.structured_data:
        data = journal_entry.structured_data if isinstance(journal_entry.structured_data, dict) else json.loads(journal_entry.structured_data)
        return data.get('content', '') or data.get('raw_text', '')
    return "No content available"

async def rate_emotional_state(journal_text: str, agent=None) -> int:
    """Rate the emotional state of a journal entry from 1-5"""
    prompt = f"""
Rate the emotional state in this journal entry on a scale of 1-5, where:
1 = Very negative (depression, anger, despair, anxiety)
2 = Somewhat negative (frustration, sadness, worry)
3 = Neutral (matter-of-fact, balanced)
4 = Somewhat positive (content, hopeful, calm)
5 = Very positive (joy, excitement, gratitude, love)

Journal entry:
"{journal_text}"

Respond with ONLY the number (1-5), no explanation.
"""
    
    response = await ask_llm(prompt, agent)
    
    # Extract number from response
    try:
        rating = int(response.strip())
        return max(1, min(5, rating))  # Clamp to 1-5 range
    except ValueError:
        # Fallback: look for digit in response
        import re
        match = re.search(r'[1-5]', response)
        return int(match.group()) if match else 3  # Default to neutral

async def analyze_journals_emotional_state(journals: List, agent=None):
    """Analyze emotional state for all journal entries"""
    print(f"Analyzing emotional state for {len(journals)} journal entries...")
    
    analyzed_journals = []
    
    for i, journal in enumerate(journals):
        print(f"Processing journal {i+1}/{len(journals)}...", end=" ")
        
        text = extract_journal_text(journal)
        if len(text.strip()) < 10:  # Skip very short entries
            print("(skipped - too short)")
            continue
            
        rating = await rate_emotional_state(text, agent)
        
        analyzed_journals.append({
            'journal': journal,
            'text': text,
            'emotional_rating': rating,
            'date': journal.created_at.strftime('%Y-%m-%d'),
            'structured_data': journal.structured_data if hasattr(journal, 'structured_data') else None
        })
        
        print(f"Rating: {rating}")
    
    return analyzed_journals

print("✅ Emotional analysis functions ready")

## Run Emotional State Analysis

In [None]:
# Analyze emotional state for recent journals
analyzed_journals = await analyze_journals_emotional_state(recent_journals, llm_agent)

# Show distribution
ratings = [j['emotional_rating'] for j in analyzed_journals]
print(f"\nEmotional state distribution:")
for rating in range(1, 6):
    count = ratings.count(rating)
    percentage = (count / len(ratings) * 100) if ratings else 0
    print(f"  {rating}: {count} entries ({percentage:.1f}%)")

print(f"\n✅ Analyzed {len(analyzed_journals)} journal entries")

## Negative State Correlation Analysis

In [None]:
# Filter for low emotional state entries (rating < 3)
negative_journals = [j for j in analyzed_journals if j['emotional_rating'] < 3]

print(f"Found {len(negative_journals)} journal entries with negative emotional state:")
for journal in negative_journals:
    print(f"  - {journal['date']}: Rating {journal['emotional_rating']} - {journal['text'][:100]}...")

if len(negative_journals) == 0:
    print("\n⚠️ No negative emotional state entries found. Consider adjusting the rating threshold or date range.")
else:
    print(f"\n✅ Ready to analyze {len(negative_journals)} negative state entries")

In [None]:
async def find_negative_correlations(negative_entries: List[Dict], agent=None):
    """Find patterns that correlate with negative emotional states"""
    
    if not negative_entries:
        return "No negative emotional state entries to analyze."
    
    # Prepare structured data for analysis
    analysis_data = []
    for entry in negative_entries:
        entry_info = {
            'date': entry['date'],
            'emotional_rating': entry['emotional_rating'],
            'text': entry['text'][:500],  # Limit text length
        }
        
        # Include structured data if available
        if entry['structured_data']:
            try:
                structured = entry['structured_data'] if isinstance(entry['structured_data'], dict) else json.loads(entry['structured_data'])
                entry_info['structured_data'] = structured
            except:
                pass
        
        analysis_data.append(entry_info)
    
    # Create analysis prompt
    prompt = f"""
You are analyzing journal entries to find patterns that correlate with negative emotional states.

I have {len(negative_entries)} journal entries where the person was experiencing negative emotions (rated 1-2 out of 5).

Journal entries with negative emotional states:
{json.dumps(analysis_data, indent=2)}

Please analyze these entries and identify:

1. **Recurring themes or topics** that appear in multiple negative entries
2. **Environmental factors** (time of day, weather, location, etc.) that might correlate
3. **Activities or behaviors** mentioned that might be contributing factors
4. **Relationship or social patterns** that appear problematic
5. **Work or life stressors** that repeatedly come up

Focus on finding patterns that are:
- **Actionable** (things the person could potentially change)
- **Recurring** (appear in multiple entries, not just isolated incidents)
- **Likely causal** (not just coincidental)

Format your response as a clear analysis with specific examples from the journal entries.
"""
    
    return await ask_llm(prompt, agent)

# Run the correlation analysis
if negative_journals:
    print("Analyzing patterns in negative emotional states...")
    correlation_analysis = await find_negative_correlations(negative_journals, llm_agent)
    print("\n" + "="*80)
    print("NEGATIVE EMOTIONAL STATE CORRELATION ANALYSIS")
    print("="*80)
    print(correlation_analysis)
    print("="*80)
else:
    print("No negative journal entries to analyze.")

## Custom Insights Experimentation

Use the cells below to experiment with different insights prompts and approaches.

In [None]:
# Example: Task prioritization insight
async def prioritize_tasks_insight(tasks: List, journals: List, agent=None):
    """Generate insights about task prioritization based on journal context"""
    
    pending_tasks = [t for t in tasks if not t.is_completed]
    
    if not pending_tasks:
        return "No pending tasks to prioritize."
    
    # Recent journal themes
    recent_journal_text = "\n".join([extract_journal_text(j)[:200] for j in journals[-5:]])
    
    task_list = "\n".join([f"- {t.title}: {t.description or 'No description'}" for t in pending_tasks])
    
    prompt = f"""
Based on this person's recent journal entries and current tasks, provide insights on task prioritization.

Recent journal themes:
{recent_journal_text}

Pending tasks:
{task_list}

Provide:
1. Top 3 tasks to prioritize this week based on their current state of mind and concerns
2. Tasks that might be causing stress and should be addressed or delegated
3. Tasks that align with their current emotional needs and goals

Focus on actionable, personalized recommendations.
"""
    
    return await ask_llm(prompt, agent)

# Run task prioritization insight
print("Generating task prioritization insights...")
task_insights = await prioritize_tasks_insight(user_data['tasks'], recent_journals, llm_agent)
print("\n" + "="*80)
print("TASK PRIORITIZATION INSIGHTS")
print("="*80)
print(task_insights)
print("="*80)

In [None]:
# Example: Weekly reflection and goal setting
async def weekly_reflection_insight(journals: List, tasks: List, agent=None):
    """Generate a weekly reflection and goal-setting insight"""
    
    # Get this week's journals
    week_ago = datetime.now() - timedelta(days=7)
    weekly_journals = [j for j in journals if j.created_at > week_ago]
    
    # Get completed tasks this week
    completed_this_week = [t for t in tasks if t.is_completed and t.updated_at and t.updated_at > week_ago]
    
    journal_summaries = "\n".join([
        f"Day {j.created_at.strftime('%A')}: {extract_journal_text(j)[:150]}..."
        for j in weekly_journals[-7:]  # Last 7 entries
    ])
    
    completed_tasks = "\n".join([f"- {t.title}" for t in completed_this_week])
    
    prompt = f"""
Provide a weekly reflection and goal-setting insight based on this person's journal entries and accomplishments.

This week's journal entries:
{journal_summaries}

Tasks completed this week:
{completed_tasks}

Provide:
1. **Key themes and patterns** from the week
2. **Wins and accomplishments** to celebrate
3. **Areas for improvement** or attention
4. **3 specific goals** for next week based on their patterns and needs
5. **One self-care recommendation** based on their emotional state

Make it personal, encouraging, and actionable.
"""
    
    return await ask_llm(prompt, agent)

# Run weekly reflection
print("Generating weekly reflection insights...")
weekly_insights = await weekly_reflection_insight(recent_journals, user_data['tasks'], llm_agent)
print("\n" + "="*80)
print("WEEKLY REFLECTION & GOAL SETTING")
print("="*80)
print(weekly_insights)
print("="*80)

## Experiment with Custom Prompts

Use this cell to test your own insight prompts and analysis approaches.

In [None]:
# Custom experiment space
# Example: Find productivity patterns

custom_prompt = f"""
# Add your custom prompt here
# You have access to:
# - user_data['tasks'] - all tasks
# - recent_journals - journal entries from last {DAYS_BACK} days
# - analyzed_journals - journals with emotional ratings
# - negative_journals - journals with negative emotional states

# Example experiment:
Analyze this person's productivity patterns. What times of day, activities, or conditions 
seem to correlate with task completion and positive outcomes?

Recent tasks completed: {len([t for t in user_data['tasks'] if t.is_completed])}
Recent journal entries: {len(recent_journals)}
"""

# Run your custom analysis
custom_result = await ask_llm(custom_prompt, llm_agent)
print("CUSTOM ANALYSIS RESULT:")
print("="*50)
print(custom_result)

## Data Export for Further Analysis

Export processed data for use in other tools or deeper analysis.

In [None]:
# Export analyzed data
export_data = {
    'user_id': user_data['user_id'],
    'username': user_data['user'].username,
    'analysis_date': datetime.now().isoformat(),
    'date_range_days': DAYS_BACK,
    'summary': {
        'total_tasks': len(user_data['tasks']),
        'completed_tasks': len([t for t in user_data['tasks'] if t.is_completed]),
        'total_journals': len(recent_journals),
        'analyzed_journals': len(analyzed_journals),
        'negative_state_journals': len(negative_journals)
    },
    'emotional_distribution': {str(i): ratings.count(i) for i in range(1, 6)} if ratings else {},
    'analyzed_journals': [{
        'date': j['date'],
        'emotional_rating': j['emotional_rating'],
        'text_preview': j['text'][:200]
    } for j in analyzed_journals]
}

# Save to file
export_filename = f"insights_analysis_{USERNAME}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
with open(export_filename, 'w') as f:
    json.dump(export_data, f, indent=2)

print(f"✅ Analysis data exported to: {export_filename}")
print(f"Summary: {len(analyzed_journals)} journals analyzed, {len(negative_journals)} with negative emotional states")