# Step 02: Analysis Summary

This notebook provides an overview of the analysis batch jobs submitted in Step 01.

**Tasks:**
- Retrieve Analysis batch from Step 01
- Fetch job status from Moody's API
- Display summary of completed, failed, and in-progress jobs
- Show detailed job information including locations modeled

## 1) Setup

In [None]:
%load_ext autoreload
%autoreload 2

import pandas as pd
from datetime import datetime

from helpers.notebook_setup import initialize_notebook_context
from helpers import ux
from helpers.batch import read_batch, get_batch_jobs
from helpers.database import execute_query
from helpers.irp_integration import IRPClient
from helpers.constants import BatchType, JobStatus

In [None]:
# Initialize notebook context and step tracking
context, step = initialize_notebook_context('Step_02_Analysis_Summary.ipynb', allow_rerun=True)

# Display context
ux.header("Analysis Batch Summary")
ux.info(f"Cycle: {context.cycle_name}")
ux.info(f"Stage: {context.stage_name}")
ux.info(f"Step: {context.step_name}")
ux.success(f"Step tracking initialized for '{context.step_name}'")

## 2) Retrieve Analysis Batch

In [None]:
# Retrieve Analysis batch from Step 01 output
ux.subheader("Retrieve Analysis Batch")

# Query for Step 01 step run to get batch ID
query = """
    SELECT sr.id, sr.step_id, sr.run_num, sr.output_data, sr.completed_ts
    FROM irp_step_run sr
    INNER JOIN irp_step s ON sr.step_id = s.id
    INNER JOIN irp_stage sg ON s.stage_id = sg.id
    INNER JOIN irp_cycle c ON sg.cycle_id = c.id
    WHERE c.cycle_name = %s
      AND sg.stage_num = 4
      AND s.step_num = 1
      AND sr.status = 'COMPLETED'
    ORDER BY sr.completed_ts DESC
    LIMIT 1
"""

result = execute_query(query, (context.cycle_name,))

if result.empty:
    raise ValueError("Analysis execution step not found - please complete Step 01 first")

output_data = result.iloc[0]['output_data']
analysis_batch_id = output_data.get('batch_id')

if not analysis_batch_id:
    raise ValueError("Analysis batch ID not found in Step 01 output")

ux.success(f"Retrieved Analysis batch: ID={analysis_batch_id}")
step.log(f"Retrieved Analysis batch: ID={analysis_batch_id}")

## 3) Batch Status Overview

In [None]:
# Display batch status and summary
ux.subheader("Batch Status")

# Read batch details
batch = read_batch(analysis_batch_id)

batch_info = [
    ["Batch ID", batch['id']],
    ["Batch Type", batch['batch_type']],
    ["Status", batch['status']],
    ["Created", batch['created_ts'].strftime('%Y-%m-%d %H:%M:%S')]
]
ux.table(batch_info, headers=["Property", "Value"])

# Get all jobs in batch
jobs = get_batch_jobs(analysis_batch_id)
total_jobs = len(jobs)

# Count jobs by status
status_counts = {}
for job in jobs:
    status = job['status']
    status_counts[status] = status_counts.get(status, 0) + 1

# Display status summary
ux.info(f"\nTotal Jobs: {total_jobs}")
status_rows = [[status, count] for status, count in sorted(status_counts.items())]
if status_rows:
    ux.table(status_rows, headers=["Status", "Count"])

# Highlight key metrics
finished_count = status_counts.get(JobStatus.FINISHED, 0)
failed_count = status_counts.get(JobStatus.FAILED, 0)
error_count = status_counts.get(JobStatus.ERROR, 0)
in_progress = total_jobs - finished_count - failed_count - error_count

if finished_count == total_jobs:
    ux.success(f"\nAll {total_jobs} jobs completed successfully!")
elif failed_count > 0 or error_count > 0:
    ux.warning(f"\n{failed_count + error_count} job(s) failed or errored")
elif in_progress > 0:
    ux.info(f"\n{in_progress} job(s) still in progress")

step.log(f"Batch status: {batch['status']}, Jobs: {finished_count} finished, {failed_count} failed, {in_progress} in progress")

## 4) Fetch Job Details from Moody's

In [None]:
# Fetch detailed job information from Moody's API
ux.subheader("Fetching Job Details from Moody's")

irp_client = IRPClient()

job_details = []
failed_to_fetch = []

for job in jobs:
    workflow_id = job.get('moodys_workflow_id')
    if not workflow_id:
        continue
    
    # Get job configuration from database
    config_query = "SELECT job_configuration_data FROM irp_job_configuration WHERE id = %s"
    config_result = execute_query(config_query, (job['job_configuration_id'],))
    job_config = config_result.iloc[0]['job_configuration_data'] if not config_result.empty else {}
    
    try:
        # Fetch job details from Moody's API
        moodys_job = irp_client.analysis.get_analysis_job(int(workflow_id))
        job_details.append({
            'job_id': job['id'],
            'workflow_id': workflow_id,
            'local_status': job['status'],
            'moodys_data': moodys_job,
            'job_config': job_config
        })
    except Exception as e:
        failed_to_fetch.append({'job_id': job['id'], 'workflow_id': workflow_id, 'error': str(e)})

ux.success(f"Fetched details for {len(job_details)} job(s)")
if failed_to_fetch:
    ux.warning(f"Failed to fetch {len(failed_to_fetch)} job(s)")

step.log(f"Fetched {len(job_details)} job details from Moody's API")

## 5) Analysis Job Summary

In [None]:
# Build summary table with job results
ux.subheader("Analysis Job Results")

summary_rows = []

for detail in job_details:
    moodys_data = detail['moodys_data']
    job_config = detail.get('job_config', {})
    
    # Extract from Moody's API response (top-level fields)
    job_name = moodys_data.get('name', 'N/A')
    status = moodys_data.get('status', 'N/A')
    progress = moodys_data.get('progress', 0)
    
    # Get configuration details from our database
    portfolio = job_config.get('Portfolio', 'N/A')
    database = job_config.get('Database', 'N/A')
    analysis_profile = job_config.get('Analysis Profile', 'N/A')
    
    # Calculate duration if available
    start_time = moodys_data.get('startedAt')
    end_time = moodys_data.get('endedAt')
    duration = 'N/A'
    if start_time and end_time:
        try:
            start_dt = datetime.fromisoformat(start_time.replace('Z', '+00:00'))
            end_dt = datetime.fromisoformat(end_time.replace('Z', '+00:00'))
            duration_mins = (end_dt - start_dt).total_seconds() / 60
            duration = f"{duration_mins:.1f} min"
        except:
            pass
    
    summary_rows.append({
        'Analysis Name': job_name,
        'Status': status,
        'Progress': f"{progress}%",
        'Portfolio': portfolio,
        'Database': database,
        'Analysis Profile': analysis_profile[:30] + '...' if len(str(analysis_profile)) > 30 else analysis_profile,
        'Duration': duration
    })

if summary_rows:
    summary_df = pd.DataFrame(summary_rows)
    
    # Display with formatting
    pd.set_option('display.max_colwidth', 40)
    pd.set_option('display.width', None)
    print(summary_df.to_string(index=False))
else:
    ux.warning("No job details available to display")

## 6) Successful Jobs - Detailed View

In [None]:
# Display detailed information for successful jobs
ux.subheader("Successful Analysis Jobs")

successful_jobs = [d for d in job_details if d['moodys_data'].get('status') == 'FINISHED']

if not successful_jobs:
    ux.info("No successfully completed jobs yet")
else:
    ux.success(f"{len(successful_jobs)} job(s) completed successfully\n")
    
    for detail in successful_jobs:
        moodys_data = detail['moodys_data']
        job_config = detail.get('job_config', {})
        details_obj = moodys_data.get('details', {})
        
        job_name = moodys_data.get('name', 'N/A')
        ux.info(f"Analysis: {job_name}")
        
        # Calculate duration
        start_time = moodys_data.get('startedAt')
        end_time = moodys_data.get('endedAt')
        duration = 'N/A'
        if start_time and end_time:
            try:
                start_dt = datetime.fromisoformat(start_time.replace('Z', '+00:00'))
                end_dt = datetime.fromisoformat(end_time.replace('Z', '+00:00'))
                duration_mins = (end_dt - start_dt).total_seconds() / 60
                duration = f"{duration_mins:.1f} min"
            except:
                pass
        
        detail_rows = [
            ["Workflow ID", detail['workflow_id']],
            ["Job ID (Local)", detail['job_id']],
            ["Status", moodys_data.get('status', 'N/A')],
            ["Progress", f"{moodys_data.get('progress', 0)}%"],
            ["Duration", duration],
            ["Started At", moodys_data.get('startedAt', 'N/A')],
            ["Ended At", moodys_data.get('endedAt', 'N/A')],
            ["", ""],
            ["Portfolio", job_config.get('Portfolio', 'N/A')],
            ["Database (EDM)", job_config.get('Database', 'N/A')],
            ["Analysis Profile", job_config.get('Analysis Profile', 'N/A')],
            ["Output Profile", job_config.get('Output Profile', 'N/A')],
            ["Event Rate", job_config.get('Event Rate', 'N/A')],
        ]
        
        # Add treaty info if present
        for i in range(1, 6):
            treaty = job_config.get(f'Reinsurance Treaty {i}')
            if treaty:
                detail_rows.append([f"Reinsurance Treaty {i}", treaty])
        
        ux.table(detail_rows, headers=["Property", "Value"])
        
        # Show details summary if available
        if details_obj.get('summary'):
            ux.info(f"Summary: {details_obj['summary']}")
        
        print()

## 7) Failed Jobs - Error Details

In [None]:
# Display detailed information for failed jobs
ux.subheader("Failed Analysis Jobs")

failed_jobs = [d for d in job_details if d['moodys_data'].get('status') in ['FAILED', 'CANCELLED', 'ERROR']]

if not failed_jobs:
    ux.success("No failed jobs!")
else:
    ux.error(f"{len(failed_jobs)} job(s) failed\n")
    
    for detail in failed_jobs:
        moodys_data = detail['moodys_data']
        job_config = detail.get('job_config', {})
        
        job_name = moodys_data.get('name', 'N/A')
        status = moodys_data.get('status', 'N/A')
        
        ux.warning(f"Analysis: {job_name}")
        
        error_rows = [
            ["Workflow ID", detail['workflow_id']],
            ["Job ID (Local)", detail['job_id']],
            ["Status", status],
            ["Progress", f"{moodys_data.get('progress', 0)}%"],
            ["Portfolio", job_config.get('Portfolio', 'N/A')],
            ["Database (EDM)", job_config.get('Database', 'N/A')],
        ]
        ux.table(error_rows, headers=["Property", "Value"])
        
        # Check tasks for errors
        tasks = moodys_data.get('tasks', [])
        failed_tasks = [t for t in tasks if t.get('status') not in ['FINISHED', 'QUEUED', 'PENDING', 'RUNNING']]
        
        if failed_tasks:
            ux.info("Failed Tasks:")
            for task in failed_tasks:
                task_output = task.get('output', {})
                errors = task_output.get('errors', [])
                print(f"  - Task {task.get('taskId')}: {task.get('name')} - {task.get('status')}")
                for error in errors:
                    print(f"      Error: {error.get('message', 'Unknown error')}")
        
        # Show details summary if available (may contain error info)
        details_obj = moodys_data.get('details', {})
        if details_obj.get('summary'):
            ux.info(f"Details: {details_obj['summary']}")
        
        print()

## 8) In-Progress Jobs

In [None]:
# Display information for in-progress jobs
ux.subheader("In-Progress Analysis Jobs")

in_progress_statuses = ['SUBMITTED', 'PENDING', 'QUEUED', 'RUNNING']
in_progress_jobs = [d for d in job_details if d['moodys_data'].get('status') in in_progress_statuses]

if not in_progress_jobs:
    ux.success("No jobs currently in progress")
else:
    ux.info(f"{len(in_progress_jobs)} job(s) still running\n")
    
    progress_rows = []
    for detail in in_progress_jobs:
        moodys_data = detail['moodys_data']
        progress_rows.append([
            moodys_data.get('name', 'N/A'),
            moodys_data.get('status', 'N/A'),
            f"{moodys_data.get('progress', 0)}%",
            detail['workflow_id']
        ])
    
    ux.table(progress_rows, headers=["Analysis Name", "Status", "Progress", "Workflow ID"])
    
    ux.info("\nRe-run this notebook to check for updates.")

## 9) Complete Step

In [None]:
# Complete step execution
ux.header("Step Completion")

try:
    # Prepare output summary
    output_data = {
        'batch_id': analysis_batch_id,
        'batch_status': batch['status'],
        'total_jobs': total_jobs,
        'jobs_finished': finished_count,
        'jobs_failed': failed_count + error_count,
        'jobs_in_progress': in_progress,
        'jobs_fetched': len(job_details)
    }
    
    # Complete the step
    step.complete(output_data)

    ux.success("\n" + "="*60)
    ux.success("ANALYSIS SUMMARY COMPLETED")
    ux.success("="*60)
    
    # Final summary
    ux.info(f"\nBatch Status: {batch['status']}")
    ux.info(f"Total Jobs: {total_jobs}")
    
    if finished_count > 0:
        ux.success(f"  Completed: {finished_count}")
    if failed_count + error_count > 0:
        ux.error(f"  Failed: {failed_count + error_count}")
    if in_progress > 0:
        ux.warning(f"  In Progress: {in_progress}")
    
    if finished_count == total_jobs:
        ux.info("\nAll analyses complete! Proceed to Stage 05 (Grouping) when ready.")
    elif in_progress > 0:
        ux.info("\nSome jobs still in progress. Re-run this notebook to check for updates.")
    else:
        ux.warning("\nSome jobs failed. Review errors above and consider resubmitting.")

except Exception as e:
    ux.error(f"Step completion failed: {str(e)}")
    step.fail(str(e))
    raise