# Database Administration

This notebook provides database management utilities:
- Initialize database
- Test connection
- View database statistics
- Cleanup utilities

In [1]:
import sys
from system import database, ux

## Database Connection Test

First, let us do a database connectivity test. 

In [2]:
if database.test_connection():
    ux.success("Database connection successful")
    
    # Show connection details
    from system.config import DB_CONFIG
    
    data = [
        ['Server', DB_CONFIG['host']],
        ['Database', DB_CONFIG['database']],
        ['Port', DB_CONFIG['port']],
        ['User', DB_CONFIG['user']]
    ]
    ux.table(data, headers=['Property', 'Value'])
    
    # Get database version
    version = database.execute_scalar("SELECT version()")
    print(f"\nPostgreSQL Version:")
    print(version[:100] + "...")
    
else:
    ux.error("Database connection failed")
    print("\nTroubleshooting:")
    print("1. Check if PostgreSQL container is running: docker ps")
    print("2. Verify environment variables in .env file")
    print("3. Try initializing the database (next cell)")

Property,Value
Server,postgres
Database,irp_db
Port,5432
User,irp_user



PostgreSQL Version:
PostgreSQL 15.14 on x86_64-pc-linux-musl, compiled by gcc (Alpine 14.2.0) 14.2.0, 64-bit...


## User Input | Initialize Database Now?

In [3]:
initialize_db = ux.dropdown(["Do not initialize the database.", 
                       "Initialize anyway. !!! WARNING - THIS WILL CLEAR ALL METADATA"
                      ], 
                      "How would you like to proceed?")
if initialize_db is None:
    print("You have canceled the operation.")
    sys.exit()
d.warning(f"{_selection}")

How would you like to proceed?
Options:
1. Do not initialize the database.
Enter 1-2 or 'cancel' to stop


>  0


Please enter a number between 1 and 2


>  cancel


You have canceled the operation.


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [4]:
if initialize_db:
    ux.subheader("Initializing Database")
    success = database.init_database()
    ux.success("Initialization completed")
else:
    ux.info("Initialization skipped")

In [5]:
ux.header("Database Statistics")

# Table row counts
tables = ['irp_cycle', 'irp_stage', 'irp_step', 'irp_step_run', 'irp_batch', 'irp_job']
stats = []
has_error = False
for table in tables:
    count = database.execute_scalar(f"SELECT COUNT(*) FROM {table}")
    stats.append([table, count])
        
ux.table(stats, headers=['Table', 'Row Count'])

Table,Row Count
irp_cycle,1
irp_stage,1
irp_step,2
irp_step_run,2
irp_batch,0
irp_job,0


In [6]:
ux.header("Database Statistics")

# Active cycle info
active = database.get_active_cycle()
if active:
    print(f"\nActive Cycle: {active['cycle_name']}")
else:
    ux.warning("No active cycle")


Active Cycle: Q4_2025_Demo-Analysis


In [8]:
ux.header("Recent Step Runs")

query = """
SELECT
    c.cycle_name,
    CONCAT(sg.stage_num, '.', st.step_num) as step,
    st.step_name,
    sr.status,
    sr.started_ts,
    sr.completed_ts,
    EXTRACT(EPOCH FROM (sr.completed_ts - sr.started_ts)) as duration_seconds
FROM irp_step_run sr
INNER JOIN irp_step st ON sr.step_id = st.id
INNER JOIN irp_stage sg ON st.stage_id = sg.id
INNER JOIN irp_cycle c ON sg.cycle_id = c.id
ORDER BY sr.started_ts DESC
LIMIT 10
"""

df = database.execute_query(query)
if not df.empty:
    # Format duration
    df['duration'] = df['duration_seconds'].apply(ux.format_duration)
    df = df.drop('duration_seconds', axis=1)
    
    ux.dataframe(df, max_rows=10)
else:
    ux.info("No step runs found")

# Cell 6: Cleanup Utilities

ux.subheader("Cleanup Utilities")
print("Uncomment the operations you want to perform:\n")

print("# Clear failed step runs")
print("# rows = db.execute_command(\"DELETE FROM irp_step_run WHERE status = 'failed'\")")
print("# print(f'Deleted {rows} failed step runs')")
print()

print("# Clear all step runs but keep steps")
print("# rows = db.execute_command(\"DELETE FROM irp_step_run\")")
print("# print(f'Deleted {rows} step runs')")
print()

print("# Remove archived cycles older than 30 days")
print("# query = \"\"\"DELETE FROM irp_cycle ")
print("#           WHERE status = 'archived' ")
print("#           AND archived_ts < NOW() - INTERVAL '30 days'\"\"\"")
print("# rows = db.execute_command(query)")
print("# print(f'Deleted {rows} old archived cycles')")

# Cell 7: Query Builder

ux.subheader("Custom Query")
print("Run custom SQL queries:\n")

query = "SELECT * FROM irp_cycle LIMIT 5"
print(f"Query: {query}\n")

try:
    df = database.execute_query(query)
    if not df.empty:
        ux.dataframe(df)
    else:
        ux.info("Query returned no results")
except Exception as e:
    ux.error(f"Query failed: {str(e)}")

cycle_name,step,step_name,status,started_ts,completed_ts,duration
Q4_2025_Demo-Analysis,1.1,Initialize,completed,2025-10-10 17:46:09.218716+00:00,2025-10-10 17:46:13.581283+00:00,4.4s
Q4_2025_Demo-Analysis,1.1,Initialize,running,2025-10-10 17:45:36.772930+00:00,NaT,nanh


Uncomment the operations you want to perform:

# Clear failed step runs
# rows = db.execute_command("DELETE FROM irp_step_run WHERE status = 'failed'")
# print(f'Deleted {rows} failed step runs')

# Clear all step runs but keep steps
# rows = db.execute_command("DELETE FROM irp_step_run")
# print(f'Deleted {rows} step runs')

# Remove archived cycles older than 30 days
# query = """DELETE FROM irp_cycle 
#           WHERE status = 'archived' 
#           AND archived_ts < NOW() - INTERVAL '30 days'"""
# rows = db.execute_command(query)
# print(f'Deleted {rows} old archived cycles')


Run custom SQL queries:

Query: SELECT * FROM irp_cycle LIMIT 5



id,cycle_name,status,created_ts,archived_ts,created_by,metadata
1,Q4_2025_Demo-Analysis,active,2025-10-10 17:44:24.341276+00:00,,notebook_user,


---