# AWS Postgres RDS Debug Notebook

This notebook allows to run queries manually against the AWS postgres DB.

In [14]:
import sys
sys.path.insert(0, '/Users/ferdi/Documents/agent-copilot/src')

import psycopg2
import psycopg2.extras
from urllib.parse import urlparse
import pandas as pd
from datetime import datetime
import os

# Import the database module
from aco.server.database_manager import DB
DB.switch_mode("remote")

2025-11-19 02:02:59,729 - ACO - INFO - Switched to remote PostgreSQL database
2025-11-19 02:02:59,730 - ACO - DEBUG - Cleared SQLite connection cache
2025-11-19 02:02:59,731 - ACO - DEBUG - Closed PostgreSQL connection pool
2025-11-19 02:02:59,732 - ACO - DEBUG - Cleared PostgreSQL connection pool


## Check Database Connection

In [6]:
# EMERGENCY NETWORK DIAGNOSIS - Run this first!
print("üö® Basic network connectivity test...")

import socket
import subprocess
from urllib.parse import urlparse
from aco.common.constants import REMOTE_DATABASE_URL

parsed = urlparse(REMOTE_DATABASE_URL)
host = parsed.hostname
port = parsed.port or 5432

print(f"Target: {host}:{port}")

# Test 1: DNS resolution (should be instant)
print(f"\n1Ô∏è‚É£ Testing DNS resolution for {host}...")
try:
    import socket
    ip = socket.gethostbyname(host)
    print(f"‚úÖ DNS resolved to: {ip}")
except Exception as e:
    print(f"‚ùå DNS resolution failed: {e}")
    print("‚Üí This means the hostname is invalid or DNS is broken")

# Test 2: Ping test (basic connectivity)
print(f"\n2Ô∏è‚É£ Testing ping to {host}...")
try:
    result = subprocess.run(['ping', '-c', '2', host], 
                            capture_output=True, text=True, timeout=10)
    if result.returncode == 0:
        print("‚úÖ Ping successful")
        # Extract timing info
        lines = result.stdout.split('\n')
        for line in lines:
            if 'time=' in line:
                print(f"   {line.strip()}")
    else:
        print("‚ùå Ping failed")
        print(f"   Error: {result.stderr}")
except subprocess.TimeoutExpired:
    print("‚ùå Ping timeout - host unreachable")
except Exception as e:
    print(f"‚ùå Ping error: {e}")

# Test 3: Telnet-style connection test (fastest way to test port)
print(f"\n3Ô∏è‚É£ Testing port connectivity to {host}:{port}...")
try:
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    sock.settimeout(5)  # Very short timeout
    result = sock.connect_ex((host, port))
    sock.close()
    
    if result == 0:
        print("‚úÖ Port is reachable")
    else:
        print(f"‚ùå Port connection failed (error {result})")
        print("   ‚Üí Database server may be down or port blocked")
except Exception as e:
    print(f"‚ùå Socket error: {e}")
    

# Test 4: General internet connectivity 
print(f"\n4Ô∏è‚É£ Testing general internet connectivity...")
try:
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    sock.settimeout(3)
    result = sock.connect_ex(("8.8.8.8", 53))  # Google DNS
    sock.close()
    
    if result == 0:
        print("‚úÖ Internet connectivity OK")
    else:
        print("‚ùå No internet connectivity")
        print("   ‚Üí Check your network connection")
except Exception as e:
    print(f"‚ùå Internet test failed: {e}")

print(f"\nüîç If all tests pass but db.get_conn() hangs:")
print("   ‚Üí The PostgreSQL server is likely overloaded or in maintenance")
print("   ‚Üí Check AWS RDS console for instance status")
print("   ‚Üí Try connecting from a different network/location")

üö® Basic network connectivity test...
Target: workflow-postgres.cm14iy6021bi.us-east-1.rds.amazonaws.com:5432

1Ô∏è‚É£ Testing DNS resolution for workflow-postgres.cm14iy6021bi.us-east-1.rds.amazonaws.com...
‚úÖ DNS resolved to: 98.90.57.89

2Ô∏è‚É£ Testing ping to workflow-postgres.cm14iy6021bi.us-east-1.rds.amazonaws.com...
‚ùå Ping timeout - host unreachable

3Ô∏è‚É£ Testing port connectivity to workflow-postgres.cm14iy6021bi.us-east-1.rds.amazonaws.com:5432...
‚úÖ Port is reachable

4Ô∏è‚É£ Testing general internet connectivity...
‚úÖ Internet connectivity OK

üîç If all tests pass but db.get_conn() hangs:
   ‚Üí The PostgreSQL server is likely overloaded or in maintenance
   ‚Üí Check AWS RDS console for instance status
   ‚Üí Try connecting from a different network/location


In [8]:
# Enhanced database connection debugging
from aco.common.constants import DATABASE_URL
import psycopg2
from urllib.parse import urlparse

if DATABASE_URL:
    print(f"‚úÖ Database URL found: {DATABASE_URL.split('@')[1] if '@' in DATABASE_URL else DATABASE_URL}")
    
    # Parse the URL to show connection details
    try:
        parsed = urlparse(DATABASE_URL)
        print(f"\nüìã Connection details:")
        print(f"   Host: {parsed.hostname}")
        print(f"   Port: {parsed.port or 5432}")
        print(f"   Database: {parsed.path[1:] if parsed.path else 'N/A'}")
        print(f"   Username: {parsed.username}")
        print(f"   Password: {'***' if parsed.password else 'None'}")
    except Exception as e:
        print(f"‚ùå Error parsing DATABASE_URL: {e}")
    
    # Test raw psycopg2 connection first
    print(f"\nüîç Testing raw psycopg2 connection...")
    try:
        result = urlparse(DATABASE_URL)
        raw_conn = psycopg2.connect(
            host=result.hostname,
            port=result.port or 5432,
            user=result.username,
            password=result.password,
            database=result.path[1:],
            connect_timeout=30
        )
        print("‚úÖ Raw psycopg2 connection successful")
        raw_conn.close()
    except psycopg2.OperationalError as e:
        print(f"‚ùå psycopg2 connection failed: {e}")
        print("   This could be:")
        print("   - Network connectivity issue")
        print("   - Database server down")
        print("   - Authentication failure")
        print("   - Firewall blocking connection")
    except Exception as e:
        print(f"‚ùå Unexpected connection error: {e}")
    
    # Test db.get_conn() function
    print(f"\nüîç Testing db.get_conn() function...")
    try:
        conn = db.get_conn()
        print("‚úÖ db.get_conn() successful")
        print("‚úÖ Connected to database successfully")
    except Exception as e:
        print(f"‚ùå db.get_conn() failed: {e}")
        print(f"   Error type: {type(e).__name__}")
        
        # Check if it's a timeout
        if "timeout" in str(e).lower():
            print("   ‚Üí This looks like a timeout issue")
        elif "authentication" in str(e).lower() or "password" in str(e).lower():
            print("   ‚Üí This looks like an authentication issue")
        elif "connection refused" in str(e).lower():
            print("   ‚Üí This looks like the server is not accepting connections")
        elif "host" in str(e).lower():
            print("   ‚Üí This looks like a hostname resolution issue")
            
else:
    print("‚ùå No DATABASE_URL found")
    print("   Check your environment variables or config.yaml")

‚úÖ Database URL found: workflow-postgres.cm14iy6021bi.us-east-1.rds.amazonaws.com:5432/workflow_db

üìã Connection details:
   Host: workflow-postgres.cm14iy6021bi.us-east-1.rds.amazonaws.com
   Port: 5432
   Database: workflow_db
   Username: postgres
   Password: ***

üîç Testing raw psycopg2 connection...
‚úÖ Raw psycopg2 connection successful

üîç Testing db.get_conn() function...
‚úÖ db.get_conn() successful
‚úÖ Connected to database successfully


## List Table Entries

### experiments table

In [7]:
# Get all experiments
experiments = DB.query_all(
    "SELECT session_id, parent_session_id, name, timestamp, success, notes FROM experiments ORDER BY timestamp DESC LIMIT 20"
)

if experiments:
    df_experiments = pd.DataFrame(experiments)
    print(f"Found {len(experiments)} experiments:")
    display(df_experiments)
else:
    print("No experiments found in database")

2025-11-19 01:56:36,712 - ACO - DEBUG - Loaded PostgreSQL backend module
2025-11-19 01:56:36,713 - ACO - INFO - [QUERY_ALL] START thread=140704361521024 sql=SELECT session_id, parent_session_id, name, timestamp, success, notes FROM experiments ORDER BY time...
2025-11-19 01:56:37,688 - ACO - DEBUG - Database schema initialized
2025-11-19 01:56:37,689 - ACO - INFO - Initialized PostgreSQL connection pool to workflow-postgres.cm14iy6021bi.us-east-1.rds.amazonaws.com
2025-11-19 01:56:37,690 - ACO - INFO - [CONN] GET conn=5044824640 thread=140704361521024 caller=postgres.py:query_all:226
2025-11-19 01:56:37,756 - ACO - INFO - [QUERY_ALL] SUCCESS thread=140704361521024 conn=5044824640 rows=20
2025-11-19 01:56:37,757 - ACO - INFO - [CONN] RETURN conn=5044824640 thread=140704361521024 caller=postgres.py:query_all:238


Found 20 experiments:


Unnamed: 0,0,1,2,3,4,5
0,7c8e9ccf-67b8-4a23-9729-1748211f2ea3,7c8e9ccf-67b8-4a23-9729-1748211f2ea3,Workflow run,2025-11-19 01:42:20.835837,,Take notes.
1,b854aaa1-940b-4173-9fb2-fa21737285af,b854aaa1-940b-4173-9fb2-fa21737285af,Workflow run,2025-11-19 01:35:03.055282,,Take notes.
2,267a1e5e-f0da-4562-8872-c04a307fbb16,267a1e5e-f0da-4562-8872-c04a307fbb16,test_api_calls,2025-11-19 01:23:24.910038,,Take notes.
3,478db80f-7747-4251-a328-c332753cdd75,478db80f-7747-4251-a328-c332753cdd75,test_api_calls,2025-11-19 00:57:20.149974,,Take notes.
4,5a6a2a86-6296-4068-9d76-00da45c9c01e,5a6a2a86-6296-4068-9d76-00da45c9c01e,test_api_calls,2025-11-19 00:24:48.492158,,Take notes.
5,75ee4595-47b1-40cf-90dd-204af276e4be,75ee4595-47b1-40cf-90dd-204af276e4be,test_api_calls,2025-11-19 00:19:10.979501,,Take notes.
6,26ed59ff-4c64-494f-b9c9-ebd3277f6233,26ed59ff-4c64-494f-b9c9-ebd3277f6233,test_api_calls,2025-11-19 00:18:36.310759,,Take notes.
7,082e3c50-cb00-4889-92be-4d92b6ba960b,082e3c50-cb00-4889-92be-4d92b6ba960b,test_api_calls,2025-11-19 00:13:34.643312,,Take notes.
8,973a49a1-90dc-4074-9c6d-d96ff03c34be,973a49a1-90dc-4074-9c6d-d96ff03c34be,test_api_calls,2025-11-18 23:22:18.505094,,Take notes.
9,46859243-7002-4190-b01c-a6fc5848f477,46859243-7002-4190-b01c-a6fc5848f477,test_api_calls,2025-11-18 22:51:03.077411,,Take notes.


### llm_calls table

In [12]:
# Get recent LLM calls
llm_calls = db.query_all(
    "SELECT session_id, node_id, api_type, timestamp FROM llm_calls ORDER BY timestamp DESC LIMIT 20"
)

if llm_calls:
    df_llm = pd.DataFrame(llm_calls)
    print(f"Found {len(llm_calls)} recent LLM calls:")
    display(df_llm)
else:
    print("No LLM calls found")

No LLM calls found


## Clear tables

Here's a function to clear all records from the experiments table:

In [None]:
def clear_experiments_table():
    """Clear all records from the experiments table (deletes all records)"""
    try:
        # Execute DELETE query to remove all records
        DB.execute("DELETE FROM llm_calls")
        DB.execute("DELETE FROM experiments")
        print("‚úÖ Successfully cleared all records from experiments table")
        
        # Verify the table is empty
        count = DB.query_one("SELECT COUNT(*) as count FROM experiments")
        print(f"   Remaining records: {count['count']}")
        
    except Exception as e:
        print(f"‚ùå Failed to clear experiments table: {e}")
        
# Call the function to clear the table
# Uncomment the line below to actually run it:
clear_experiments_table()

2025-11-19 02:03:20,627 - ACO - INFO - [EXECUTE] START thread=140704361521024 sql=DELETE FROM experiments...
2025-11-19 02:03:21,356 - ACO - DEBUG - Database schema initialized
2025-11-19 02:03:21,357 - ACO - INFO - Initialized PostgreSQL connection pool to workflow-postgres.cm14iy6021bi.us-east-1.rds.amazonaws.com
2025-11-19 02:03:21,358 - ACO - INFO - [CONN] GET conn=5044824304 thread=140704361521024 caller=postgres.py:execute:249
2025-11-19 02:03:21,533 - ACO - ERROR - [EXECUTE] ERROR thread=140704361521024 conn=5044824304 error=update or delete on table "experiments" violates foreign key constraint "llm_calls_session_id_fkey" on table "llm_calls"
DETAIL:  Key (session_id)=(26657df9-1098-415d-9996-4171f3049780) is still referenced from table "llm_calls".

2025-11-19 02:03:21,566 - ACO - INFO - [CONN] RETURN conn=5044824304 thread=140704361521024 caller=postgres.py:execute:261


‚ùå Failed to clear experiments table: update or delete on table "experiments" violates foreign key constraint "llm_calls_session_id_fkey" on table "llm_calls"
DETAIL:  Key (session_id)=(26657df9-1098-415d-9996-4171f3049780) is still referenced from table "llm_calls".



In [None]:
# FRESH CONNECTION + EMERGENCY RESET (All-in-one)
print("üö® Creating fresh connection and performing emergency reset...")
from aco.common.constants import DATABASE_URL
import psycopg2
import psycopg2.extras
from urllib.parse import urlparse

if DATABASE_URL:
    parsed = urlparse(DATABASE_URL)
    
    try:
        # Create a fresh connection
        print("1Ô∏è‚É£ Establishing fresh connection...")
        fresh_conn = psycopg2.connect(
            host=parsed.hostname,
            port=parsed.port or 5432,
            user=parsed.username,
            password=parsed.password,
            database=parsed.path[1:],
            connect_timeout=15
        )
        fresh_conn.autocommit = True  # Auto-commit for immediate effect
        cursor = fresh_conn.cursor()
        print("   ‚úÖ Fresh connection established")
        
        # Kill all other connections
        print("\n2Ô∏è‚É£ Killing hanging sessions...")
        cursor.execute("SELECT current_database()")
        current_db = cursor.fetchone()[0]
        
        cursor.execute("""
            SELECT pg_terminate_backend(pid) as killed
            FROM pg_stat_activity
            WHERE datname = %s 
            AND pid != pg_backend_pid()
            AND state != 'idle'
        """, (current_db,))
        killed_sessions = cursor.fetchall()
        print(f"   ‚úÖ Terminated {len(killed_sessions)} hanging sessions")
        
        # Force drop all tables (uncomment below)
        # print("\n3Ô∏è‚É£ Force dropping tables...")
        # cursor.execute("DROP TABLE IF EXISTS llm_calls CASCADE")
        # cursor.execute("DROP TABLE IF EXISTS attachments CASCADE")
        # cursor.execute("DROP TABLE IF EXISTS experiments CASCADE")
        # print("   ‚úÖ All tables dropped")
        
        # # Recreate experiments table
        # print("\n4Ô∏è‚É£ Recreating tables with BYTEA schema...")
        # cursor.execute("""
        #     CREATE TABLE experiments (
        #         session_id TEXT PRIMARY KEY,
        #         parent_session_id TEXT,
        #         graph_topology TEXT,
        #         color_preview TEXT,
        #         timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
        #         cwd TEXT,
        #         command TEXT,
        #         environment TEXT,
        #         code_hash TEXT,
        #         name TEXT,
        #         success TEXT CHECK (success IN ('', 'Satisfactory', 'Failed')),
        #         notes TEXT,
        #         log TEXT,
        #         FOREIGN KEY (parent_session_id) REFERENCES experiments (session_id),
        #         UNIQUE (parent_session_id, name)
        #     )
        # """)
        # print("   ‚úÖ Experiments table created")
        
        # # Recreate llm_calls table with BYTEA output (THE CRITICAL FIX!)
        # cursor.execute("""
        #     CREATE TABLE llm_calls (
        #         session_id TEXT,
        #         node_id TEXT,
        #         input BYTEA,
        #         input_hash TEXT,
        #         input_overwrite BYTEA,
        #         output BYTEA,
        #         color TEXT,
        #         label TEXT,
        #         api_type TEXT,
        #         timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
        #         PRIMARY KEY (session_id, node_id),
        #         FOREIGN KEY (session_id) REFERENCES experiments (session_id)
        #     )
        # """)
        # print("   ‚úÖ LLM calls table created with BYTEA output column")
        
        # # Recreate attachments table
        # cursor.execute("""
        #     CREATE TABLE attachments (
        #         file_id TEXT PRIMARY KEY,
        #         session_id TEXT,
        #         line_no INTEGER,
        #         content_hash TEXT,
        #         file_path TEXT,
        #         taint TEXT,
        #         FOREIGN KEY (session_id) REFERENCES experiments (session_id)
        #     )
        # """)
        # print("   ‚úÖ Attachments table created")
        
        # # Create indexes
        # cursor.execute("CREATE INDEX attachments_content_hash_idx ON attachments(content_hash)")
        # cursor.execute("CREATE INDEX original_input_lookup ON llm_calls(session_id, input_hash)")
        # cursor.execute("CREATE INDEX experiments_timestamp_idx ON experiments(timestamp DESC)")
        # print("   ‚úÖ Indexes created")
        
        # # Verify the schema fix
        # cursor.execute("""
        #     SELECT data_type 
        #     FROM information_schema.columns 
        #     WHERE table_name = 'llm_calls' AND column_name = 'output'
        # """)
        # output_type = cursor.fetchone()[0]
        
        print(f"\nüéâ SUCCESS! Schema completely rebuilt")
        print(f"   ‚úÖ Output column is now: {output_type}")
        print(f"   ‚úÖ All tables recreated cleanly")
        print(f"   ‚úÖ All locks cleared")
        
        if output_type == 'bytea':
            print(f"\n‚ú® The fix is complete! Now:")
            print(f"   1. Your original script should work without errors")
            print(f"   2. Revert the cache_manager.py change (use dill.loads directly)")
            print(f"   3. The web app should show clear data")
        else:
            print(f"   ‚ö†Ô∏è  Output column type is unexpected: {output_type}")
            
        fresh_conn.close()
        
    except Exception as e:
        print(f"‚ùå Emergency reset failed: {e}")
        print(f"   Error type: {type(e).__name__}")
        
        if "timeout" in str(e).lower():
            print("   ‚Üí Connection still timing out. Database may be overloaded.")
        elif "permission" in str(e).lower():
            print("   ‚Üí Permission denied. Check database user privileges.")
        else:
            print(f"   ‚Üí Unexpected error: {e}")
            
else:
    print("‚ùå No DATABASE_URL found")

üö® Creating fresh connection and performing emergency reset...
1Ô∏è‚É£ Establishing fresh connection...
   ‚úÖ Fresh connection established

2Ô∏è‚É£ Killing hanging sessions...
   ‚úÖ Terminated 2 hanging sessions

3Ô∏è‚É£ Force dropping tables...
   ‚úÖ All tables dropped

4Ô∏è‚É£ Recreating tables with BYTEA schema...
   ‚úÖ Experiments table created
   ‚úÖ LLM calls table created with BYTEA output column
   ‚úÖ Attachments table created
   ‚úÖ Indexes created

üéâ SUCCESS! Schema completely rebuilt
   ‚úÖ Output column is now: bytea
   ‚úÖ All tables recreated cleanly
   ‚úÖ All locks cleared

‚ú® The fix is complete! Now:
   1. Your original script should work without errors
   2. Revert the cache_manager.py change (use dill.loads directly)
   3. The web app should show clear data


In [None]:
# Verify the schema change was successful
final_schema = DB.query_all(
    """
    SELECT column_name, data_type, is_nullable
    FROM information_schema.columns 
    WHERE table_name = 'llm_calls' 
    ORDER BY ordinal_position
    """
)

print("Final llm_calls table schema:")
for col in final_schema:
    status = ""
    if col['column_name'] == 'output':
        if col['data_type'] == 'bytea':
            status = " ‚úÖ"
        else:
            status = " ‚ùå"
    print(f"  {col['column_name']}: {col['data_type']} ({'NULL' if col['is_nullable'] == 'YES' else 'NOT NULL'}){status}")

# Test that we can insert and retrieve binary data
print("\nTesting binary data storage...")
try:
    import dill
    test_data = {"test": "data", "number": 42}
    test_pickle = dill.dumps(test_data)
    
    # This should work now without error
    print(f"‚úÖ Successfully created pickle data: {len(test_pickle)} bytes")
    print(f"‚úÖ Can load pickle back: {dill.loads(test_pickle)}")
    print("Migration appears successful!")
    
except Exception as e:
    print(f"‚ùå Error with pickle test: {e}")

2025-11-17 02:34:14,429 - ACO - DEBUG - Database schema initialized
2025-11-17 02:34:14,430 - ACO - INFO - Initialized PostgreSQL connection to workflow-postgres.cm14iy6021bi.us-east-1.rds.amazonaws.com


Final llm_calls table schema:
  session_id: text (NOT NULL)
  node_id: text (NOT NULL)
  input: bytea (NULL)
  input_hash: text (NULL)
  input_overwrite: bytea (NULL)
  output: bytea (NULL) ‚úÖ
  color: text (NULL)
  label: text (NULL)
  api_type: text (NULL)
  timestamp: timestamp without time zone (NULL)

Testing binary data storage...
‚úÖ Successfully created pickle data: 41 bytes
‚úÖ Can load pickle back: {'test': 'data', 'number': 42}
Migration appears successful!


In [None]:
def clear_experiments_table():
    """Clear all records from the experiments table with cascading delete"""
    try:
        # First delete from dependent tables (in order of dependencies)
        print("üóëÔ∏è  Starting cascaded delete...")
        
        # 1. Delete from llm_calls (depends on experiments)
        DB.execute("DELETE FROM llm_calls")
        print("   ‚úÖ Cleared llm_calls table")
        
        # 2. Delete from attachments (depends on experiments)
        DB.execute("DELETE FROM attachments")
        print("   ‚úÖ Cleared attachments table")
        
        # 3. Finally delete from experiments (parent table)
        DB.execute("DELETE FROM experiments")
        print("   ‚úÖ Cleared experiments table")
        
        # Verify all tables are empty
        exp_count = DB.query_one("SELECT COUNT(*) as count FROM experiments")
        llm_count = DB.query_one("SELECT COUNT(*) as count FROM llm_calls")
        att_count = DB.query_one("SELECT COUNT(*) as count FROM attachments")
        
        print(f"\nüìä Final counts:")
        print(f"   experiments: {exp_count['count']} records")
        print(f"   llm_calls: {llm_count['count']} records")
        print(f"   attachments: {att_count['count']} records")
        
        print("\n‚ú® Successfully cleared all tables!")
        
    except Exception as e:
        print(f"‚ùå Failed to clear tables: {e}")
        print("   Make sure to handle foreign key constraints")
        
# Call the function to clear all tables
# Uncomment the line below to actually run it:
# clear_experiments_table()