# AWS Postgres RDS Debug Notebook

This notebook allows to run queries manually against the AWS postgres DB.

In [2]:
import sys
sys.path.insert(0, '/Users/ferdi/Documents/agent-copilot/src')

import psycopg2
import psycopg2.extras
from urllib.parse import urlparse
import pandas as pd
from datetime import datetime
import os

# Import the database module
from aco.server import db

2025-11-26 12:11:08,666 - ACO - INFO - Using PostgreSQL database backend


## Check Database Connection

(You don't really need to run this. I just had to debug something once and wanted to keep it around)

In [7]:
# EMERGENCY NETWORK DIAGNOSIS - Run this first!
print("üö® Basic network connectivity test...")

import socket
import subprocess
from urllib.parse import urlparse
from aco.common.constants import DATABASE_URL

if DATABASE_URL:
    parsed = urlparse(DATABASE_URL)
    host = parsed.hostname
    port = parsed.port or 5432
    
    print(f"Target: {host}:{port}")
    
    # Test 1: DNS resolution (should be instant)
    print(f"\n1Ô∏è‚É£ Testing DNS resolution for {host}...")
    try:
        import socket
        ip = socket.gethostbyname(host)
        print(f"‚úÖ DNS resolved to: {ip}")
    except Exception as e:
        print(f"‚ùå DNS resolution failed: {e}")
        print("‚Üí This means the hostname is invalid or DNS is broken")
    
    # Test 2: Ping test (basic connectivity)
    print(f"\n2Ô∏è‚É£ Testing ping to {host}...")
    try:
        result = subprocess.run(['ping', '-c', '2', host], 
                              capture_output=True, text=True, timeout=10)
        if result.returncode == 0:
            print("‚úÖ Ping successful")
            # Extract timing info
            lines = result.stdout.split('\n')
            for line in lines:
                if 'time=' in line:
                    print(f"   {line.strip()}")
        else:
            print("‚ùå Ping failed")
            print(f"   Error: {result.stderr}")
    except subprocess.TimeoutExpired:
        print("‚ùå Ping timeout - host unreachable")
    except Exception as e:
        print(f"‚ùå Ping error: {e}")
    
    # Test 3: Telnet-style connection test (fastest way to test port)
    print(f"\n3Ô∏è‚É£ Testing port connectivity to {host}:{port}...")
    try:
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.settimeout(5)  # Very short timeout
        result = sock.connect_ex((host, port))
        sock.close()
        
        if result == 0:
            print("‚úÖ Port is reachable")
        else:
            print(f"‚ùå Port connection failed (error {result})")
            print("   ‚Üí Database server may be down or port blocked")
    except Exception as e:
        print(f"‚ùå Socket error: {e}")
        
else:
    print("‚ùå No DATABASE_URL found")

# Test 4: General internet connectivity 
print(f"\n4Ô∏è‚É£ Testing general internet connectivity...")
try:
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    sock.settimeout(3)
    result = sock.connect_ex(("8.8.8.8", 53))  # Google DNS
    sock.close()
    
    if result == 0:
        print("‚úÖ Internet connectivity OK")
    else:
        print("‚ùå No internet connectivity")
        print("   ‚Üí Check your network connection")
except Exception as e:
    print(f"‚ùå Internet test failed: {e}")

print(f"\nüîç If all tests pass but db.get_conn() hangs:")
print("   ‚Üí The PostgreSQL server is likely overloaded or in maintenance")
print("   ‚Üí Check AWS RDS console for instance status")
print("   ‚Üí Try connecting from a different network/location")

üö® Basic network connectivity test...
Target: workflow-postgres.cm14iy6021bi.us-east-1.rds.amazonaws.com:5432

1Ô∏è‚É£ Testing DNS resolution for workflow-postgres.cm14iy6021bi.us-east-1.rds.amazonaws.com...
‚úÖ DNS resolved to: 98.90.57.89

2Ô∏è‚É£ Testing ping to workflow-postgres.cm14iy6021bi.us-east-1.rds.amazonaws.com...
‚ùå Ping timeout - host unreachable

3Ô∏è‚É£ Testing port connectivity to workflow-postgres.cm14iy6021bi.us-east-1.rds.amazonaws.com:5432...
‚úÖ Port is reachable

4Ô∏è‚É£ Testing general internet connectivity...
‚úÖ Internet connectivity OK

üîç If all tests pass but db.get_conn() hangs:
   ‚Üí The PostgreSQL server is likely overloaded or in maintenance
   ‚Üí Check AWS RDS console for instance status
   ‚Üí Try connecting from a different network/location


## Database Schema

In [11]:
# Show complete database schema information
print("üìä DATABASE SCHEMA OVERVIEW\n" + "="*50)

# Get all tables in the database
tables_query = """
    SELECT table_name 
    FROM information_schema.tables 
    WHERE table_schema = 'public' 
    ORDER BY table_name
"""
tables = db.query_all(tables_query)

if not tables:
    print("‚ùå No tables found in the database")
else:
    print(f"‚úÖ Found {len(tables)} tables in the database:\n")
    
    for table_info in tables:
        table_name = table_info['table_name']
        print(f"\nüìã Table: {table_name.upper()}")
        print("-" * 40)
        
        # Get columns for each table
        columns_query = """
            SELECT 
                column_name,
                data_type,
                character_maximum_length,
                is_nullable,
                column_default
            FROM information_schema.columns 
            WHERE table_name = %s AND table_schema = 'public'
            ORDER BY ordinal_position
        """
        columns = db.query_all(columns_query, (table_name,))
        
        # Display columns
        for col in columns:
            col_name = col['column_name']
            col_type = col['data_type']
            
            # Add length info if applicable
            if col['character_maximum_length']:
                col_type += f"({col['character_maximum_length']})"
            
            # Add NULL/NOT NULL
            nullable = "NULL" if col['is_nullable'] == 'YES' else "NOT NULL"
            
            # Add default value if exists
            default = f" DEFAULT {col['column_default']}" if col['column_default'] else ""
            
            print(f"  ‚Ä¢ {col_name}: {col_type} {nullable}{default}")
        
        # Get primary key constraints
        pk_query = """
            SELECT column_name
            FROM information_schema.key_column_usage kcu
            JOIN information_schema.table_constraints tc 
                ON kcu.constraint_name = tc.constraint_name
            WHERE tc.table_name = %s 
                AND tc.constraint_type = 'PRIMARY KEY'
                AND tc.table_schema = 'public'
            ORDER BY kcu.ordinal_position
        """
        pk_cols = db.query_all(pk_query, (table_name,))
        
        if pk_cols:
            pk_names = [col['column_name'] for col in pk_cols]
            print(f"\n  üîë Primary Key: ({', '.join(pk_names)})")
        
        # Get foreign key constraints
        fk_query = """
            SELECT
                kcu.column_name,
                ccu.table_name AS foreign_table_name,
                ccu.column_name AS foreign_column_name
            FROM information_schema.table_constraints AS tc
            JOIN information_schema.key_column_usage AS kcu
                ON tc.constraint_name = kcu.constraint_name
            JOIN information_schema.constraint_column_usage AS ccu
                ON ccu.constraint_name = tc.constraint_name
            WHERE tc.constraint_type = 'FOREIGN KEY' 
                AND tc.table_name = %s
                AND tc.table_schema = 'public'
        """
        fk_cols = db.query_all(fk_query, (table_name,))
        
        if fk_cols:
            print("\n  üîó Foreign Keys:")
            for fk in fk_cols:
                print(f"    ‚Ä¢ {fk['column_name']} ‚Üí {fk['foreign_table_name']}.{fk['foreign_column_name']}")
        
        # Get unique constraints
        unique_query = """
            SELECT column_name
            FROM information_schema.key_column_usage kcu
            JOIN information_schema.table_constraints tc 
                ON kcu.constraint_name = tc.constraint_name
            WHERE tc.table_name = %s 
                AND tc.constraint_type = 'UNIQUE'
                AND tc.table_schema = 'public'
        """
        unique_cols = db.query_all(unique_query, (table_name,))
        
        if unique_cols:
            unique_names = [col['column_name'] for col in unique_cols]
            print(f"\n  ‚≠ê Unique Constraints: {', '.join(unique_names)}")
        
        # Get indexes
        index_query = """
            SELECT 
                indexname,
                indexdef
            FROM pg_indexes
            WHERE tablename = %s
                AND schemaname = 'public'
        """
        indexes = db.query_all(index_query, (table_name,))
        
        if indexes:
            print("\n  üìç Indexes:")
            for idx in indexes:
                # Skip primary key index (already shown above)
                if not idx['indexname'].endswith('_pkey'):
                    print(f"    ‚Ä¢ {idx['indexname']}")
        
        # Get row count
        count_query = f"SELECT COUNT(*) as count FROM {table_name}"
        count_result = db.query_all(count_query)
        row_count = count_result[0]['count'] if count_result else 0
        print(f"\n  üìà Row Count: {row_count:,}")

print("\n" + "="*50)
print("‚úÖ Schema overview complete")

üìä DATABASE SCHEMA OVERVIEW
‚úÖ Found 3 tables in the database:


üìã Table: ATTACHMENTS
----------------------------------------
  ‚Ä¢ file_id: text NOT NULL
  ‚Ä¢ session_id: text NULL
  ‚Ä¢ line_no: integer NULL
  ‚Ä¢ content_hash: text NULL
  ‚Ä¢ file_path: text NULL
  ‚Ä¢ taint: text NULL

  üîë Primary Key: (file_id)

  üîó Foreign Keys:
    ‚Ä¢ session_id ‚Üí experiments.session_id

  üìç Indexes:
    ‚Ä¢ attachments_content_hash_idx

  üìà Row Count: 0

üìã Table: EXPERIMENTS
----------------------------------------
  ‚Ä¢ session_id: text NOT NULL
  ‚Ä¢ parent_session_id: text NULL
  ‚Ä¢ graph_topology: text NULL
  ‚Ä¢ color_preview: text NULL
  ‚Ä¢ timestamp: timestamp without time zone NULL DEFAULT CURRENT_TIMESTAMP
  ‚Ä¢ cwd: text NULL
  ‚Ä¢ command: text NULL
  ‚Ä¢ environment: text NULL
  ‚Ä¢ code_hash: text NULL
  ‚Ä¢ name: text NULL
  ‚Ä¢ success: text NULL
  ‚Ä¢ notes: text NULL
  ‚Ä¢ log: text NULL

  üîë Primary Key: (session_id)

  üîó Foreign Keys:
    ‚Ä¢

## List Table Entries

### experiments table

In [10]:
# Get all experiments
experiments = db.query_all(
    "SELECT session_id, parent_session_id, name, timestamp, success, notes FROM experiments ORDER BY timestamp DESC LIMIT 20"
)

if experiments:
    df_experiments = pd.DataFrame(experiments)
    print(f"Found {len(experiments)} experiments:")
    display(df_experiments)
else:
    print("No experiments found in database")


2025-11-17 02:34:37,109 - ACO - DEBUG - Database schema initialized
2025-11-17 02:34:37,111 - ACO - INFO - Initialized PostgreSQL connection to workflow-postgres.cm14iy6021bi.us-east-1.rds.amazonaws.com


No experiments found in database


### llm_calls table

In [12]:
# Get recent LLM calls
llm_calls = db.query_all(
    "SELECT session_id, node_id, api_type, timestamp FROM llm_calls ORDER BY timestamp DESC LIMIT 20"
)

if llm_calls:
    df_llm = pd.DataFrame(llm_calls)
    print(f"Found {len(llm_calls)} recent LLM calls:")
    display(df_llm)
else:
    print("No LLM calls found")

No LLM calls found


In [10]:
print("üîß RESETTING DATABASE SCHEMA TO MATCH postgres.py\n" + "="*50)

try:
    # Use the existing db connection from the first cell
    conn = db.get_conn()
    
    # First, commit or rollback any existing transaction
    try:
        conn.rollback()
        print("1Ô∏è‚É£ Rolled back any existing transaction")
    except:
        pass
    
    # Now we can set autocommit
    conn.autocommit = True
    c = conn.cursor()
    print("2Ô∏è‚É£ Using existing database connection (autocommit mode)")
    
    # Drop all existing tables in correct order (respecting foreign keys)
    print("\n3Ô∏è‚É£ Dropping existing tables...")
    c.execute("DROP TABLE IF EXISTS attachments CASCADE")
    print("   ‚úÖ Dropped attachments table")
    c.execute("DROP TABLE IF EXISTS llm_calls CASCADE")
    print("   ‚úÖ Dropped llm_calls table")
    c.execute("DROP TABLE IF EXISTS experiments CASCADE")
    print("   ‚úÖ Dropped experiments table")
    c.execute("DROP TABLE IF EXISTS users CASCADE")  # Also drop users if it exists
    print("   ‚úÖ Dropped users table (if existed)")
    
    # Create experiments table (exactly as in postgres.py)
    print("\n4Ô∏è‚É£ Creating tables with schema from postgres.py...")
    c.execute(
        """
        CREATE TABLE IF NOT EXISTS experiments (
            session_id TEXT PRIMARY KEY,
            parent_session_id TEXT,
            graph_topology TEXT,
            color_preview TEXT,
            timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            cwd TEXT,
            command TEXT,
            environment TEXT,
            code_hash TEXT,
            name TEXT,
            success TEXT CHECK (success IN ('', 'Satisfactory', 'Failed')),
            notes TEXT,
            log TEXT,
            FOREIGN KEY (parent_session_id) REFERENCES experiments (session_id),
            UNIQUE (parent_session_id, name)
        )
        """
    )
    print("   ‚úÖ Created experiments table")
    
    # Create llm_calls table (NOTE: output is TEXT, not BYTEA!)
    c.execute(
        """
        CREATE TABLE IF NOT EXISTS llm_calls (
            session_id TEXT,
            node_id TEXT,
            input BYTEA,
            input_hash TEXT,
            input_overwrite BYTEA,
            output TEXT,
            color TEXT,
            label TEXT,
            api_type TEXT,
            timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            PRIMARY KEY (session_id, node_id),
            FOREIGN KEY (session_id) REFERENCES experiments (session_id)
        )
        """
    )
    print("   ‚úÖ Created llm_calls table (with output as TEXT)")
    
    # Create attachments table
    c.execute(
        """
        CREATE TABLE IF NOT EXISTS attachments (
            file_id TEXT PRIMARY KEY,
            session_id TEXT,
            line_no INTEGER,
            content_hash TEXT,
            file_path TEXT,
            taint TEXT,
            FOREIGN KEY (session_id) REFERENCES experiments (session_id)
        )
        """
    )
    print("   ‚úÖ Created attachments table")
    
    # Create indexes (exactly as in postgres.py)
    print("\n5Ô∏è‚É£ Creating indexes...")
    c.execute(
        """
        CREATE INDEX IF NOT EXISTS attachments_content_hash_idx ON attachments(content_hash)
        """
    )
    print("   ‚úÖ Created attachments_content_hash_idx")
    
    c.execute(
        """
        CREATE INDEX IF NOT EXISTS original_input_lookup ON llm_calls(session_id, input_hash)
        """
    )
    print("   ‚úÖ Created original_input_lookup index")
    
    c.execute(
        """
        CREATE INDEX IF NOT EXISTS experiments_timestamp_idx ON experiments(timestamp DESC)
        """
    )
    print("   ‚úÖ Created experiments_timestamp_idx")
    
    # Verify the schema
    print("\n6Ô∏è‚É£ Verifying schema...")
    c.execute("""
        SELECT column_name, data_type 
        FROM information_schema.columns 
        WHERE table_name = 'llm_calls' AND column_name = 'output'
    """)
    output_col = c.fetchone()
    
    if output_col:
        output_type = output_col[1]
        if output_type == 'text':
            print(f"   ‚úÖ Output column is TEXT (matching postgres.py)")
        else:
            print(f"   ‚ö†Ô∏è Output column is {output_type} (expected TEXT)")
    
    # Get table count
    c.execute("""
        SELECT COUNT(*) FROM information_schema.tables 
        WHERE table_schema = 'public'
    """)
    table_count = c.fetchone()[0]
    
    print(f"\n‚úÖ SCHEMA RESET COMPLETE!")
    print(f"   ‚Ä¢ {table_count} tables created")
    print(f"   ‚Ä¢ All indexes created")
    print(f"   ‚Ä¢ Schema now matches postgres.py exactly")
    print(f"\n‚ö†Ô∏è IMPORTANT: The 'output' column in llm_calls is TEXT (not BYTEA)")
    print(f"   This matches the original postgres.py schema.")
    
    # Reset autocommit back to False as per postgres.py default
    conn.autocommit = False
    print(f"   ‚Ä¢ Autocommit reset to False")
    
except Exception as e:
    print(f"\n‚ùå Schema reset failed: {e}")
    print(f"   Error type: {type(e).__name__}")
    import traceback
    traceback.print_exc()
    
    # Try to restore connection state
    try:
        conn.rollback()
        conn.autocommit = False
    except:
        pass

üîß RESETTING DATABASE SCHEMA TO MATCH postgres.py
1Ô∏è‚É£ Rolled back any existing transaction
2Ô∏è‚É£ Using existing database connection (autocommit mode)

3Ô∏è‚É£ Dropping existing tables...
   ‚úÖ Dropped attachments table
   ‚úÖ Dropped llm_calls table
   ‚úÖ Dropped experiments table
   ‚úÖ Dropped users table (if existed)

4Ô∏è‚É£ Creating tables with schema from postgres.py...
   ‚úÖ Created experiments table
   ‚úÖ Created llm_calls table (with output as TEXT)
   ‚úÖ Created attachments table

5Ô∏è‚É£ Creating indexes...
   ‚úÖ Created attachments_content_hash_idx
   ‚úÖ Created original_input_lookup index
   ‚úÖ Created experiments_timestamp_idx

6Ô∏è‚É£ Verifying schema...
   ‚úÖ Output column is TEXT (matching postgres.py)

‚úÖ SCHEMA RESET COMPLETE!
   ‚Ä¢ 3 tables created
   ‚Ä¢ All indexes created
   ‚Ä¢ Schema now matches postgres.py exactly

‚ö†Ô∏è IMPORTANT: The 'output' column in llm_calls is TEXT (not BYTEA)
   This matches the original postgres.py schema.
   ‚Ä¢ A

In [5]:
# Verify the schema change was successful
final_schema = db.query_all(
    """
    SELECT column_name, data_type, is_nullable
    FROM information_schema.columns 
    WHERE table_name = 'llm_calls' 
    ORDER BY ordinal_position
    """
)

print("Final llm_calls table schema:")
for col in final_schema:
    status = ""
    if col['column_name'] == 'output':
        if col['data_type'] == 'bytea':
            status = " ‚úÖ"
        else:
            status = " ‚ùå"
    print(f"  {col['column_name']}: {col['data_type']} ({'NULL' if col['is_nullable'] == 'YES' else 'NOT NULL'}){status}")

# Test that we can insert and retrieve binary data
print("\nTesting binary data storage...")
try:
    import dill
    test_data = {"test": "data", "number": 42}
    test_pickle = dill.dumps(test_data)
    
    # This should work now without error
    print(f"‚úÖ Successfully created pickle data: {len(test_pickle)} bytes")
    print(f"‚úÖ Can load pickle back: {dill.loads(test_pickle)}")
    print("Migration appears successful!")
    
except Exception as e:
    print(f"‚ùå Error with pickle test: {e}")

2025-11-17 02:34:14,429 - ACO - DEBUG - Database schema initialized
2025-11-17 02:34:14,430 - ACO - INFO - Initialized PostgreSQL connection to workflow-postgres.cm14iy6021bi.us-east-1.rds.amazonaws.com


Final llm_calls table schema:
  session_id: text (NOT NULL)
  node_id: text (NOT NULL)
  input: bytea (NULL)
  input_hash: text (NULL)
  input_overwrite: bytea (NULL)
  output: bytea (NULL) ‚úÖ
  color: text (NULL)
  label: text (NULL)
  api_type: text (NULL)
  timestamp: timestamp without time zone (NULL)

Testing binary data storage...
‚úÖ Successfully created pickle data: 41 bytes
‚úÖ Can load pickle back: {'test': 'data', 'number': 42}
Migration appears successful!
