# AWS Postgres RDS Debug Notebook

This notebook allows to run queries manually against the AWS postgres DB.

# Prepare

In [1]:
import sys
import psycopg2
import psycopg2.extras
from urllib.parse import urlparse
import pandas as pd
from datetime import datetime
import os

# Import the database module
from aco.server.database_manager import DB

# Ensure we're using the remote PostgreSQL database
try:
    DB.switch_mode("remote")
except Exception as e:
    print(f"‚ùå Failed to switch to remote mode: {e}")

2025-12-18 08:44:06,445 - ACO - INFO - DatabaseManager initialized with backend: local
2025-12-18 08:44:06,446 - ACO - INFO - Switched to remote PostgreSQL database


## Check Database Connection

(You don't really need to run this. I just had to debug something once and wanted to keep it around)

In [2]:
# EMERGENCY NETWORK DIAGNOSIS - Run this first!
print("üö® Basic network connectivity test...")

import socket
import subprocess
from urllib.parse import urlparse
from aco.common.constants import REMOTE_DATABASE_URL

if REMOTE_DATABASE_URL:
    parsed = urlparse(REMOTE_DATABASE_URL)
    host = parsed.hostname
    port = parsed.port or 5432
    
    print(f"Target: {host}:{port}")
    
    # Test 1: DNS resolution (should be instant)
    print(f"\n1Ô∏è‚É£ Testing DNS resolution for {host}...")
    try:
        import socket
        ip = socket.gethostbyname(host)
        print(f"‚úÖ DNS resolved to: {ip}")
    except Exception as e:
        print(f"‚ùå DNS resolution failed: {e}")
        print("‚Üí This means the hostname is invalid or DNS is broken")
    
    # Test 2: Ping test (basic connectivity)
    print(f"\n2Ô∏è‚É£ Testing ping to {host}...")
    try:
        result = subprocess.run(['ping', '-c', '2', host], 
                              capture_output=True, text=True, timeout=10)
        if result.returncode == 0:
            print("‚úÖ Ping successful")
            # Extract timing info
            lines = result.stdout.split('\n')
            for line in lines:
                if 'time=' in line:
                    print(f"   {line.strip()}")
        else:
            print("‚ùå Ping failed")
            print(f"   Error: {result.stderr}")
    except subprocess.TimeoutExpired:
        print("‚ùå Ping timeout - host unreachable")
    except Exception as e:
        print(f"‚ùå Ping error: {e}")
    
    # Test 3: Telnet-style connection test (fastest way to test port)
    print(f"\n3Ô∏è‚É£ Testing port connectivity to {host}:{port}...")
    try:
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.settimeout(5)  # Very short timeout
        result = sock.connect_ex((host, port))
        sock.close()
        
        if result == 0:
            print("‚úÖ Port is reachable")
        else:
            print(f"‚ùå Port connection failed (error {result})")
            print("   ‚Üí Database server may be down or port blocked")
    except Exception as e:
        print(f"‚ùå Socket error: {e}")
        
else:
    print("‚ùå No DATABASE_URL found")

# Test 4: General internet connectivity 
print(f"\n4Ô∏è‚É£ Testing general internet connectivity...")
try:
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    sock.settimeout(3)
    result = sock.connect_ex(("8.8.8.8", 53))  # Google DNS
    sock.close()
    
    if result == 0:
        print("‚úÖ Internet connectivity OK")
    else:
        print("‚ùå No internet connectivity")
        print("   ‚Üí Check your network connection")
except Exception as e:
    print(f"‚ùå Internet test failed: {e}")

print(f"\nüîç If all tests pass but db.get_conn() hangs:")
print("   ‚Üí The PostgreSQL server is likely overloaded or in maintenance")
print("   ‚Üí Check AWS RDS console for instance status")
print("   ‚Üí Try connecting from a different network/location")

üö® Basic network connectivity test...
Target: workflow-postgres.cm14iy6021bi.us-east-1.rds.amazonaws.com:5432

1Ô∏è‚É£ Testing DNS resolution for workflow-postgres.cm14iy6021bi.us-east-1.rds.amazonaws.com...
‚úÖ DNS resolved to: 98.90.57.89

2Ô∏è‚É£ Testing ping to workflow-postgres.cm14iy6021bi.us-east-1.rds.amazonaws.com...
‚ùå Ping timeout - host unreachable

3Ô∏è‚É£ Testing port connectivity to workflow-postgres.cm14iy6021bi.us-east-1.rds.amazonaws.com:5432...
‚úÖ Port is reachable

4Ô∏è‚É£ Testing general internet connectivity...
‚úÖ Internet connectivity OK

üîç If all tests pass but db.get_conn() hangs:
   ‚Üí The PostgreSQL server is likely overloaded or in maintenance
   ‚Üí Check AWS RDS console for instance status
   ‚Üí Try connecting from a different network/location


## Show database Schema

In [3]:
# Show complete database schema information
print("üìä DATABASE SCHEMA OVERVIEW\n" + "="*50)

# Ensure we're using the remote PostgreSQL database
if DB.get_current_mode() != "remote":
    try:
        DB.switch_mode("remote")
    except Exception as e:
        print(f"‚ùå Failed to switch to remote mode: {e}")

# Only proceed if we're now using remote database
if DB.get_current_mode() == "remote":
    print("")
    
    # Get all tables in the database
    tables_query = """
        SELECT table_name 
        FROM information_schema.tables 
        WHERE table_schema = 'public' 
        ORDER BY table_name
    """
    tables = DB.query_all(tables_query)
    
    if not tables:
        print("‚ùå No tables found in the database")
    else:
        print(f"‚úÖ Found {len(tables)} tables in the database:\n")
        
        for table_info in tables:
            table_name = table_info['table_name']
            print(f"\nüìã Table: {table_name.upper()}")
            print("-" * 40)
            
            # Get columns for each table
            columns_query = """
                SELECT 
                    column_name,
                    data_type,
                    character_maximum_length,
                    is_nullable,
                    column_default
                FROM information_schema.columns 
                WHERE table_name = %s AND table_schema = 'public'
                ORDER BY ordinal_position
            """
            columns = DB.query_all(columns_query, (table_name,))
            
            # Display columns
            for col in columns:
                col_name = col['column_name']
                col_type = col['data_type']
                
                # Add length info if applicable
                if col['character_maximum_length']:
                    col_type += f"({col['character_maximum_length']})"
                
                # Add NULL/NOT NULL
                nullable = "NULL" if col['is_nullable'] == 'YES' else "NOT NULL"
                
                # Add default value if exists
                default = f" DEFAULT {col['column_default']}" if col['column_default'] else ""
                
                print(f"  ‚Ä¢ {col_name}: {col_type} {nullable}{default}")
            
            # Get primary key constraints
            pk_query = """
                SELECT column_name
                FROM information_schema.key_column_usage kcu
                JOIN information_schema.table_constraints tc 
                    ON kcu.constraint_name = tc.constraint_name
                WHERE tc.table_name = %s 
                    AND tc.constraint_type = 'PRIMARY KEY'
                    AND tc.table_schema = 'public'
                ORDER BY kcu.ordinal_position
            """
            pk_cols = DB.query_all(pk_query, (table_name,))
            
            if pk_cols:
                pk_names = [col['column_name'] for col in pk_cols]
                print(f"\n  üîë Primary Key: ({', '.join(pk_names)})")
            
            # Get foreign key constraints
            fk_query = """
                SELECT
                    kcu.column_name,
                    ccu.table_name AS foreign_table_name,
                    ccu.column_name AS foreign_column_name
                FROM information_schema.table_constraints AS tc
                JOIN information_schema.key_column_usage AS kcu
                    ON tc.constraint_name = kcu.constraint_name
                JOIN information_schema.constraint_column_usage AS ccu
                    ON ccu.constraint_name = tc.constraint_name
                WHERE tc.constraint_type = 'FOREIGN KEY' 
                    AND tc.table_name = %s
                    AND tc.table_schema = 'public'
            """
            fk_cols = DB.query_all(fk_query, (table_name,))
            
            if fk_cols:
                print("\n  üîó Foreign Keys:")
                for fk in fk_cols:
                    print(f"    ‚Ä¢ {fk['column_name']} ‚Üí {fk['foreign_table_name']}.{fk['foreign_column_name']}")
            
            # Get unique constraints
            unique_query = """
                SELECT column_name
                FROM information_schema.key_column_usage kcu
                JOIN information_schema.table_constraints tc 
                    ON kcu.constraint_name = tc.constraint_name
                WHERE tc.table_name = %s 
                    AND tc.constraint_type = 'UNIQUE'
                    AND tc.table_schema = 'public'
            """
            unique_cols = DB.query_all(unique_query, (table_name,))
            
            if unique_cols:
                unique_names = [col['column_name'] for col in unique_cols]
                print(f"\n  ‚≠ê Unique Constraints: {', '.join(unique_names)}")
            
            # Get indexes
            index_query = """
                SELECT 
                    indexname,
                    indexdef
                FROM pg_indexes
                WHERE tablename = %s
                    AND schemaname = 'public'
            """
            indexes = DB.query_all(index_query, (table_name,))
            
            if indexes:
                print("\n  üìç Indexes:")
                for idx in indexes:
                    # Skip primary key index (already shown above)
                    if not idx['indexname'].endswith('_pkey'):
                        print(f"    ‚Ä¢ {idx['indexname']}")
            
            # Get row count
            count_query = f"SELECT COUNT(*) as count FROM {table_name}"
            count_result = DB.query_all(count_query)
            row_count = count_result[0]['count'] if count_result else 0
            print(f"\n  üìà Row Count: {row_count:,}")
    
    print("\n" + "="*50)
    print("‚úÖ Schema overview complete")

2025-12-18 08:44:19,933 - ACO - DEBUG - Loaded PostgreSQL backend module


üìä DATABASE SCHEMA OVERVIEW



2025-12-18 08:44:21,443 - ACO - DEBUG - Database schema initialized
2025-12-18 08:44:21,444 - ACO - INFO - Initialized PostgreSQL connection pool to workflow-postgres.cm14iy6021bi.us-east-1.rds.amazonaws.com


‚úÖ Found 5 tables in the database:


üìã Table: ATTACHMENTS
----------------------------------------
  ‚Ä¢ file_id: text NOT NULL
  ‚Ä¢ session_id: text NULL
  ‚Ä¢ line_no: integer NULL
  ‚Ä¢ content_hash: text NULL
  ‚Ä¢ file_path: text NULL
  ‚Ä¢ taint: text NULL

  üîë Primary Key: (file_id)

  üîó Foreign Keys:
    ‚Ä¢ session_id ‚Üí experiments.session_id

  üìç Indexes:
    ‚Ä¢ attachments_content_hash_idx

  üìà Row Count: 0

üìã Table: EXPERIMENTS
----------------------------------------
  ‚Ä¢ session_id: text NOT NULL
  ‚Ä¢ parent_session_id: text NULL
  ‚Ä¢ graph_topology: text NULL
  ‚Ä¢ color_preview: text NULL
  ‚Ä¢ timestamp: timestamp without time zone NULL DEFAULT CURRENT_TIMESTAMP
  ‚Ä¢ cwd: text NULL
  ‚Ä¢ command: text NULL
  ‚Ä¢ environment: text NULL
  ‚Ä¢ code_hash: text NULL
  ‚Ä¢ name: text NULL
  ‚Ä¢ success: text NULL
  ‚Ä¢ notes: text NULL
  ‚Ä¢ log: text NULL
  ‚Ä¢ user_id: integer NULL

  üîë Primary Key: (session_id)

  üîó Foreign Keys:
    ‚Ä¢ p

## List Table Entries

### experiments table

In [4]:
# Get all experiments
experiments = DB.query_all(
    "SELECT session_id, name, timestamp, user_id FROM experiments ORDER BY timestamp DESC LIMIT 20"
)

if experiments:
    df_experiments = pd.DataFrame(experiments)
    print(f"Found {len(experiments)} experiments:")
    display(df_experiments)
else:
    print("No experiments found in database")

Found 11 experiments:


Unnamed: 0,0,1,2,3
0,7c1be626-b240-4d98-a01b-9839ace7b764,Workflow run,2025-12-17 09:54:58.549801,1
1,ecdac4d7-494c-4c1e-b528-6fab02c3c596,Workflow run,2025-12-17 09:28:17.729013,1
2,f0ddb337-23cd-49f5-b223-ef1ad4f0270d,Workflow run,2025-12-17 09:27:51.201459,1
3,1940b1dc-467e-4b57-83f2-0ec393e7e35b,Workflow run,2025-12-17 09:22:42.774126,1
4,dce65316-1de7-4edb-9017-53e0860cd08f,Workflow run,2025-12-17 09:18:01.955206,1
5,2732e15a-feb8-48b4-a9e6-c431bd9da837,Workflow run,2025-12-17 09:00:18.196953,1
6,5a72102b-246c-4344-9e05-f860ae1d2ce1,Workflow run,2025-12-17 08:51:49.451242,1
7,a166a1f9-0197-4c98-83e3-9cd1940bcb1c,Workflow run,2025-12-17 08:47:23.533764,1
8,6b39116e-3b78-4178-83ad-f1889b22c8f4,Workflow run,2025-12-17 08:47:18.867580,1
9,926c41f5-cf65-4cfc-8597-16064f3e21cc,Workflow run,2025-12-17 08:45:29.110291,1


### llm_calls table

In [5]:
# Get recent LLM calls
llm_calls = DB.query_all(
    "SELECT session_id, node_id, api_type, timestamp FROM llm_calls ORDER BY timestamp DESC LIMIT 20"
)

if llm_calls:
    df_llm = pd.DataFrame(llm_calls)
    print(f"Found {len(llm_calls)} recent LLM calls:")
    display(df_llm)
else:
    print("No LLM calls found")

Found 20 recent LLM calls:


Unnamed: 0,0,1,2,3
0,7c1be626-b240-4d98-a01b-9839ace7b764,177dc116-0be1-4922-8bc6-14a52bffeb15,requests.Session.send,2025-12-17 08:55:08.166440
1,7c1be626-b240-4d98-a01b-9839ace7b764,b971a711-c794-436c-aabe-3304bfc51b8b,requests.Session.send,2025-12-17 08:55:07.309117
2,7c1be626-b240-4d98-a01b-9839ace7b764,890fc9de-326a-4441-99b5-5973a1095384,requests.Session.send,2025-12-17 08:55:06.361325
3,7c1be626-b240-4d98-a01b-9839ace7b764,fc464a1b-0cb5-4a5d-a6ea-9f2dcb87e0d5,requests.Session.send,2025-12-17 08:55:05.439934
4,ecdac4d7-494c-4c1e-b528-6fab02c3c596,c17fed98-6ee1-43a8-bd5e-dc7a5fafec9d,requests.Session.send,2025-12-17 08:28:26.637652
5,ecdac4d7-494c-4c1e-b528-6fab02c3c596,282a6a18-bd53-4bd5-b3fc-f4e2e19f3d61,requests.Session.send,2025-12-17 08:28:25.736232
6,ecdac4d7-494c-4c1e-b528-6fab02c3c596,4e58ca44-115c-481e-a795-79358a423d12,requests.Session.send,2025-12-17 08:28:24.827035
7,ecdac4d7-494c-4c1e-b528-6fab02c3c596,edb3f65b-cea0-4001-b2a9-2df72728f2d4,requests.Session.send,2025-12-17 08:28:23.904281
8,f0ddb337-23cd-49f5-b223-ef1ad4f0270d,780cdce7-a007-44d7-af8f-0b3a47f68b54,requests.Session.send,2025-12-17 08:28:00.397816
9,f0ddb337-23cd-49f5-b223-ef1ad4f0270d,f62d84d3-1ad7-4a6f-bfd0-f5e074aa3eab,requests.Session.send,2025-12-17 08:27:59.502928


## users table

In [6]:
# Get all users
users = DB.query_all(
    "SELECT id, email, name, created_at, google_id FROM users"
)

if users:
    df_users = pd.DataFrame(users)
    print(f"Found {len(users)} users:")
    display(users)
else:
    print("No users found in database")

Found 1 users:


[[1,
  'julian.buechel@gmail.com',
  'Julian B√ºchel',
  datetime.datetime(2025, 12, 17, 7, 36, 9, 767864),
  '103315367651959535575']]

# DANGER ZONE: Drop tables and reset schema

In [7]:
# ---> Comment below out to run schema change. <---
# NOTE: Dropping a table deletes all its data and is irreversible.
from aco.server.database_manager import DB

print("üîß RESETTING DATABASE SCHEMA TO MATCH postgres.py\n" + "="*50)

try:
    # Ensure we're using the remote PostgreSQL database
    if DB.get_current_mode() != "remote":
        DB.switch_mode("remote")
        print("1Ô∏è‚É£ Switched to remote PostgreSQL database")
    else:
        print("1Ô∏è‚É£ Already using remote PostgreSQL database")
    
    # Drop all existing tables in correct order (respecting foreign keys)
    print("\n2Ô∏è‚É£ Dropping existing tables...")
    DB.execute("DROP TABLE IF EXISTS attachments CASCADE")
    print("   ‚úÖ Dropped attachments table")
    DB.execute("DROP TABLE IF EXISTS llm_calls CASCADE")
    print("   ‚úÖ Dropped llm_calls table")
    DB.execute("DROP TABLE IF EXISTS experiments CASCADE")
    print("   ‚úÖ Dropped experiments table")
    DB.execute("DROP TABLE IF EXISTS users CASCADE")
    print("   ‚úÖ Dropped users table")
    
    # Create users table first (referenced by experiments)
    print("\n3Ô∏è‚É£ Creating tables with schema from postgres.py...")
    DB.execute(
        """
        CREATE TABLE IF NOT EXISTS users (
            id SERIAL PRIMARY KEY,
            google_id VARCHAR(255) NOT NULL UNIQUE,
            email VARCHAR(255) NOT NULL UNIQUE,
            name VARCHAR(255) NOT NULL,
            picture TEXT,
            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        )
        """
    )
    print("   ‚úÖ Created users table")
    
    # Create experiments table
    DB.execute(
        """
        CREATE TABLE IF NOT EXISTS experiments (
            session_id TEXT PRIMARY KEY,
            parent_session_id TEXT,
            graph_topology TEXT,
            color_preview TEXT,
            timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            cwd TEXT,
            command TEXT,
            environment TEXT,
            code_hash TEXT,
            name TEXT,
            success TEXT CHECK (success IN ('', 'Satisfactory', 'Failed')),
            notes TEXT,
            log TEXT,
            user_id INTEGER,
            FOREIGN KEY (parent_session_id) REFERENCES experiments (session_id),
            FOREIGN KEY (user_id) REFERENCES users (id),
            UNIQUE (parent_session_id, name)
        )
        """
    )
    print("   ‚úÖ Created experiments table")
    
    # Create llm_calls table
    DB.execute(
        """
        CREATE TABLE IF NOT EXISTS llm_calls (
            session_id TEXT,
            node_id TEXT,
            input TEXT,
            input_hash TEXT,
            input_overwrite TEXT,
            output TEXT,
            color TEXT,
            label TEXT,
            api_type TEXT,
            timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            PRIMARY KEY (session_id, node_id),
            FOREIGN KEY (session_id) REFERENCES experiments (session_id)
        )
        """
    )
    print("   ‚úÖ Created llm_calls table")
    
    # Create attachments table
    DB.execute(
        """
        CREATE TABLE IF NOT EXISTS attachments (
            file_id TEXT PRIMARY KEY,
            session_id TEXT,
            line_no INTEGER,
            content_hash TEXT,
            file_path TEXT,
            taint TEXT,
            FOREIGN KEY (session_id) REFERENCES experiments (session_id)
        )
        """
    )
    print("   ‚úÖ Created attachments table")
    
    # Create indexes (exactly as in postgres.py)
    print("\n4Ô∏è‚É£ Creating indexes...")
    DB.execute(
        """
        CREATE INDEX IF NOT EXISTS attachments_content_hash_idx ON attachments(content_hash)
        """
    )
    print("   ‚úÖ Created attachments_content_hash_idx")
    
    DB.execute(
        """
        CREATE INDEX IF NOT EXISTS original_input_lookup ON llm_calls(session_id, input_hash)
        """
    )
    print("   ‚úÖ Created original_input_lookup index")
    
    DB.execute(
        """
        CREATE INDEX IF NOT EXISTS experiments_timestamp_idx ON experiments(timestamp DESC)
        """
    )
    print("   ‚úÖ Created experiments_timestamp_idx")
    
    # Verify the schema
    print("\n5Ô∏è‚É£ Verifying schema...")
    
    # Check llm_calls output column type
    output_col = DB.query_one("""
        SELECT column_name, data_type 
        FROM information_schema.columns 
        WHERE table_name = 'llm_calls' AND column_name = 'output'
    """)
    
    # Check users table exists
    users_count = DB.query_one("""
        SELECT COUNT(*) as count FROM information_schema.tables 
        WHERE table_schema = 'public' AND table_name = 'users'
    """)
    if users_count and users_count['count'] > 0:
        print(f"   ‚úÖ Users table exists")
    
    # Get table count
    table_count_result = DB.query_one("""
        SELECT COUNT(*) as count FROM information_schema.tables 
        WHERE table_schema = 'public'
    """)
    table_count = table_count_result['count'] if table_count_result else 0
    
    print(f"\n‚úÖ SCHEMA RESET COMPLETE!")
    print(f"   ‚Ä¢ {table_count} tables created")
    print(f"   ‚Ä¢ All indexes created")
    print(f"   ‚Ä¢ Schema now matches postgres.py exactly")
    
except Exception as e:
    print(f"\n‚ùå Schema reset failed: {e}")
    print(f"   Error type: {type(e).__name__}")
    import traceback
    traceback.print_exc()

üîß RESETTING DATABASE SCHEMA TO MATCH postgres.py
1Ô∏è‚É£ Already using remote PostgreSQL database

2Ô∏è‚É£ Dropping existing tables...
   ‚úÖ Dropped attachments table
   ‚úÖ Dropped llm_calls table
   ‚úÖ Dropped experiments table
   ‚úÖ Dropped users table

3Ô∏è‚É£ Creating tables with schema from postgres.py...
   ‚úÖ Created users table
   ‚úÖ Created experiments table
   ‚úÖ Created llm_calls table
   ‚úÖ Created attachments table

4Ô∏è‚É£ Creating indexes...
   ‚úÖ Created attachments_content_hash_idx
   ‚úÖ Created original_input_lookup index
   ‚úÖ Created experiments_timestamp_idx

5Ô∏è‚É£ Verifying schema...
   ‚úÖ Users table exists

‚úÖ SCHEMA RESET COMPLETE!
   ‚Ä¢ 5 tables created
   ‚Ä¢ All indexes created
   ‚Ä¢ Schema now matches postgres.py exactly
