# Step 01: Group Analysis Results (Analysis-only Groups)

This notebook submits analysis grouping jobs to Moody's Risk Modeler for **analysis-only groups**.

Analysis-only groups contain ONLY analysis names (no references to other groups).

**Tasks:**
- Retrieve Grouping batch from Stage_01/Step_03
- Review grouping job configurations
- Submit grouping jobs to Moody's API
- Track job completion status

**Note:** If you have groups that contain OTHER groups (rollup groups), those are handled separately in Step_02_Group_Rollup.ipynb.

## 1) Setup

In [None]:
%load_ext autoreload
%autoreload 2

from helpers.notebook_setup import initialize_notebook_context
from helpers import ux
from helpers.batch import get_batch_jobs, read_batch, activate_batch, update_batch_step
from helpers.database import execute_query
from helpers.irp_integration import IRPClient
from helpers.constants import BatchType, JobStatus

# Flags to track state
validation_failed = False
validation_errors = []
existing_groups = set()  # Set of group names that already exist

In [None]:
# Initialize notebook context and step tracking
context, step = initialize_notebook_context('Step_01_Group_Analysis_Results.ipynb')

# Display context
ux.header("Group Analysis Results Batch (Analysis-only Groups)")
ux.info(f"Cycle: {context.cycle_name}")
ux.info(f"Stage: {context.stage_name}")
ux.info(f"Step: {context.step_name}")
ux.success(f"Step tracking initialized for '{context.step_name}'")

## 2) Retrieve Grouping Batch

In [None]:
# Retrieve Grouping batch from Stage_01/Step_03
ux.subheader("Retrieve Grouping Batch")

query = """
    SELECT sr.id, sr.step_id, sr.run_num, sr.output_data, sr.completed_ts
    FROM irp_step_run sr
    INNER JOIN irp_step s ON sr.step_id = s.id
    INNER JOIN irp_stage sg ON s.stage_id = sg.id
    INNER JOIN irp_cycle c ON sg.cycle_id = c.id
    WHERE c.cycle_name = %s
      AND sg.stage_num = 1
      AND s.step_num = 3
      AND sr.status = 'COMPLETED'
    ORDER BY sr.completed_ts DESC
    LIMIT 1
"""

result = execute_query(query, (context.cycle_name,))

if result.empty:
    raise ValueError("Batch creation step not found - please complete Stage_01/Step_03 first")

output_data = result.iloc[0]['output_data']
batches = output_data.get('batches', {})

if BatchType.GROUPING not in batches:
    raise ValueError(f"Grouping batch not found. Available: {list(batches.keys())}")

grouping_batch_id = int(batches[BatchType.GROUPING])
ux.success(f"Retrieved Grouping batch: ID={grouping_batch_id}")
step.log(f"Retrieved Grouping batch: ID={grouping_batch_id}")

# Validate batch using EntityValidator directly to get structured output
from helpers.entity_validator import EntityValidator

ux.subheader("Validate Batch")

# Get job configurations for validation
jobs = get_batch_jobs(grouping_batch_id)
groupings = []
for job in jobs:
    config_query = "SELECT job_configuration_data FROM irp_job_configuration WHERE id = %s"
    config_result = execute_query(config_query, (job['job_configuration_id'],))
    if not config_result.empty:
        groupings.append(config_result.iloc[0]['job_configuration_data'])

# Run validation - returns (messages, existing_groups_list)
validator = EntityValidator()
all_messages, existing_groups_list = validator.validate_grouping_batch(groupings)
existing_groups = set(existing_groups_list)  # Convert to set for easy lookup

# Separate pre-requisite errors from "groups exist" errors
# ENT-GROUP-001 is the code for "groups already exist" - this is recoverable
prereq_errors = [e for e in all_messages if "ENT-GROUP-001" not in e and not e.startswith("WARN-")]
warnings = [e for e in all_messages if e.startswith("WARN-")]

if prereq_errors:
    validation_failed = True
    ux.error("Pre-requisite validation failed:")
    for error in prereq_errors:
        ux.error(f"  {error}")
    step.log(f"Pre-requisite validation failed: {len(prereq_errors)} error(s)")
elif existing_groups:
    ux.warning(f"Found {len(existing_groups)} existing group(s) - will prompt for action")
    if warnings:
        for w in warnings:
            ux.warning(f"  {w}")
    ux.success("Pre-requisite validation passed")
else:
    if warnings:
        for w in warnings:
            ux.warning(f"  {w}")
    ux.success("Batch validation passed")

## 3) Build Submission Plan

Determine action for each job based on status and whether group exists.

In [None]:
# Build submission plan based on whether groups exist
#
# Core question: Which groups are missing and need to be created?
# - Missing group → Need to create (submit/resubmit based on job status)
# - Existing group → Already done, but offer option to delete and re-run

# Categorize jobs by whether group exists
jobs_missing_group = []   # Group doesn't exist - need to create
jobs_with_group = []      # Group exists - already done
jobs_in_progress = []     # Currently running - skip

IN_PROGRESS_STATUSES = {JobStatus.SUBMITTED, JobStatus.QUEUED, JobStatus.PENDING, JobStatus.RUNNING}

irp_client = IRPClient()

if validation_failed:
    ux.warning("Skipping submission plan due to validation failure")
else:
    ux.subheader("Build Submission Plan")

    for job in jobs:
        config_query = "SELECT job_configuration_data FROM irp_job_configuration WHERE id = %s"
        config_result = execute_query(config_query, (job['job_configuration_id'],))
        config = config_result.iloc[0]['job_configuration_data'] if not config_result.empty else {}

        group_name = config.get('Group_Name', '')
        items_count = len(config.get('items', []))
        group_exists = group_name in existing_groups

        job_info = {
            'job_id': job['id'],
            'status': job['status'],
            'group_name': group_name,
            'items_count': items_count
        }

        if job['status'] in IN_PROGRESS_STATUSES:
            jobs_in_progress.append(job_info)
        elif group_exists:
            jobs_with_group.append(job_info)
        else:
            jobs_missing_group.append(job_info)

    # Display summary
    ux.info(f"Total jobs: {len(jobs)}")

    if jobs_missing_group:
        ux.warning(f"  Missing Moody's group: {len(jobs_missing_group)}")
    if jobs_with_group:
        ux.success(f"  Moody's group exists: {len(jobs_with_group)}")
    if jobs_in_progress:
        ux.info(f"  In progress: {len(jobs_in_progress)}")

# Determine if this is a fresh run (all missing + all INITIATED = no prompts needed)
is_fresh_run = (
    not validation_failed
    and len(jobs_missing_group) > 0
    and len(jobs_with_group) == 0
    and len(jobs_in_progress) == 0
    and all(j['status'] == JobStatus.INITIATED for j in jobs_missing_group)
)

# Jobs to process
jobs_to_create = []      # Jobs where we'll create the group
jobs_to_delete = []      # Jobs where we'll delete existing group and re-run

if validation_failed:
    pass  # Skip all processing

elif is_fresh_run:
    # Fresh run - all jobs are INITIATED with no existing groups
    ux.success("\nFresh run - all groups will be created.")
    jobs_to_create = jobs_missing_group

else:
    # Need user input for missing and/or existing groups

    # Prompt for missing groups (if any, and not all INITIATED)
    if jobs_missing_group:
        ux.info("\n" + "-"*50)
        ux.warning(f"Missing Moody's groups ({len(jobs_missing_group)}):")
        for j in jobs_missing_group[:10]:
            ux.warning(f"  - {j['group_name']} ({j['items_count']} analyses, job: {j['status']})")
        if len(jobs_missing_group) > 10:
            ux.warning(f"  ... and {len(jobs_missing_group) - 10} more")

        choice = input(f"\nCreate these {len(jobs_missing_group)} missing group(s)? (y/n): ").strip().lower()
        if choice in ['y', 'yes']:
            jobs_to_create = jobs_missing_group
            ux.success("Will create missing groups in Moody's.")
        else:
            ux.info("Skipping missing groups.")

    # Prompt for existing groups (if any)
    if jobs_with_group:
        ux.info("\n" + "-"*50)
        ux.success(f"Existing groups ({len(jobs_with_group)}):")
        for j in jobs_with_group[:10]:
            ux.success(f"  - {j['group_name']} ({j['items_count']} analyses, job: {j['status']})")
        if len(jobs_with_group) > 10:
            ux.success(f"  ... and {len(jobs_with_group) - 10} more")

        choice = input(f"\nDelete and re-create these {len(jobs_with_group)} existing group(s)? (y/n): ").strip().lower()
        if choice in ['y', 'yes']:
            jobs_to_delete = jobs_with_group
            ux.warning("Will delete and re-create existing groups.")
        else:
            ux.success("Keeping existing groups.")

    # Handle case where nothing to do
    if not jobs_to_create and not jobs_to_delete:
        if jobs_in_progress:
            ux.info("\nNo action needed - jobs are in progress.")
        elif jobs_with_group and not jobs_missing_group:
            ux.success("\nAll groups already exist.")

step.log(f"Plan: create={len(jobs_to_create)}, delete_and_recreate={len(jobs_to_delete)}, in_progress={len(jobs_in_progress)}")

## 4) Submit Grouping Batch to Moody's

In [None]:
# Execute the submission plan
from helpers.job import resubmit_job, submit_job, delete_groups_for_jobs

ux.subheader("Submit Groups to Moody's")

deletion_errors = []
submission_results = []
failed_count = 0

if validation_failed:
    ux.warning("Skipping submission due to validation failure")
    result = {'submitted_jobs': 0, 'batch_status': 'INITIATED', 'jobs': []}

elif not jobs_to_create and not jobs_to_delete:
    ux.info("No jobs to submit.")
    batch = read_batch(grouping_batch_id)
    result = {'submitted_jobs': 0, 'batch_status': batch['status'], 'jobs': []}

else:
    # Step 1: Delete existing groups for jobs_to_delete
    if jobs_to_delete:
        ux.info(f"\nDeleting {len(jobs_to_delete)} existing group(s)...")

        # delete_groups_for_jobs expects list of dicts with 'group_name' key
        deletion_errors = delete_groups_for_jobs(jobs_to_delete, irp_client)

        if deletion_errors:
            ux.error(f"  Failed to delete {len(deletion_errors)} group(s):")
            for err in deletion_errors:
                ux.error(f"    - {err}")
        else:
            ux.success(f"  Deleted {len(jobs_to_delete)} group(s)")

    # Step 2: Submit/resubmit jobs
    if deletion_errors:
        ux.error("\nCannot proceed with submission due to deletion failures.")
        result = {'submitted_jobs': 0, 'batch_status': 'INITIATED', 'jobs': []}
    else:
        all_jobs_to_process = jobs_to_create + jobs_to_delete
        ux.info(f"\nSubmitting {len(all_jobs_to_process)} job(s)...")

        for job_info in all_jobs_to_process:
            job_id = job_info['job_id']
            status = job_info['status']

            try:
                if status == JobStatus.INITIATED:
                    # Fresh job - submit directly
                    submit_job(job_id, BatchType.GROUPING, irp_client)
                    submission_results.append({'job_id': job_id, 'action': 'submitted'})
                    ux.info(f"  Submitted: {job_info['group_name']}")
                else:
                    # Terminal job - resubmit (creates new job and submits it)
                    new_job_id = resubmit_job(job_id, irp_client, BatchType.GROUPING)
                    submission_results.append({'job_id': new_job_id, 'action': 'resubmitted', 'original_job_id': job_id})
                    ux.info(f"  Resubmitted: {job_info['group_name']}")
            except Exception as e:
                failed_count += 1
                submission_results.append({'job_id': job_id, 'error': str(e)})
                ux.error(f"  Failed: {job_info['group_name']} - {e}")

        success_count = len(all_jobs_to_process) - failed_count

        # Update batch status to ACTIVE if any jobs were submitted
        if success_count > 0:
            activate_batch(grouping_batch_id)
            # Update batch step_id to this step (needed for step chaining)
            update_batch_step(grouping_batch_id, step.step_id)
            ux.success(f"Batch {grouping_batch_id} activated")

        result = {
            'submitted_jobs': success_count,
            'batch_status': 'ACTIVE' if success_count > 0 else 'INITIATED',
            'jobs': submission_results
        }

        ux.info(f"\nSubmission complete: {success_count} succeeded, {failed_count} failed")

## 5) Complete Step Execution

In [None]:
# Complete step execution
ux.header("Step Completion")

# Prepare output data
output_data = {
    'batch_id': grouping_batch_id,
    'batch_type': BatchType.GROUPING,
    'batch_status': result['batch_status'],
    'submitted_jobs': result['submitted_jobs'],
    'failed_jobs': failed_count,
    'deletion_errors': len(deletion_errors) if deletion_errors else 0
}

if validation_failed:
    # Handle validation failure
    from helpers.step import update_step_run
    from helpers.constants import StepStatus

    error_message = "\n".join(prereq_errors)
    update_step_run(step.run_id, StepStatus.FAILED, error_message=error_message)

    ux.error("\n" + "="*60)
    ux.error("VALIDATION FAILED")
    ux.error("="*60)
    for error in prereq_errors:
        ux.error(f"  {error}")

elif deletion_errors:
    # Handle deletion failure
    from helpers.step import update_step_run
    from helpers.constants import StepStatus

    error_message = f"{len(deletion_errors)} group deletion(s) failed:\n" + "\n".join(deletion_errors)
    update_step_run(step.run_id, StepStatus.FAILED, error_message=error_message)

    ux.error("\n" + "="*60)
    ux.error("GROUP DELETION FAILED")
    ux.error("="*60)
    ux.info(f"\nBatch ID: {grouping_batch_id}")
    ux.error(f"Failed deletions: {len(deletion_errors)}")
    ux.info("\nPlease manually delete these groups in Moody's Risk Modeler,")
    ux.info("then re-run this notebook.")

elif failed_count > 0:
    # Handle submission failures
    from helpers.step import update_step_run
    from helpers.constants import StepStatus

    failed_job_errors = [f"Job {j['job_id']}: {j['error']}" for j in submission_results if 'error' in j]
    error_message = f"{failed_count} job(s) failed to submit:\n" + "\n".join(failed_job_errors)
    update_step_run(step.run_id, StepStatus.FAILED, error_message=error_message)

    ux.error("\n" + "="*60)
    ux.error("BATCH SUBMISSION FAILED")
    ux.error("="*60)
    ux.info(f"\nBatch ID: {grouping_batch_id}")
    ux.info(f"Submitted: {result['submitted_jobs']} job(s)")
    ux.error(f"Failed: {failed_count} job(s)")
    for error in failed_job_errors:
        ux.error(f"  {error}")

else:
    # Success
    step.complete(output_data)

    ux.success("\n" + "="*60)
    if result['submitted_jobs'] == 0:
        ux.success("STEP COMPLETED - NO SUBMISSION NEEDED")
    else:
        ux.success("GROUPING BATCH SUBMITTED SUCCESSFULLY")
    ux.success("="*60)
    ux.info(f"\nBatch ID: {grouping_batch_id}")
    ux.info(f"Submitted: {result['submitted_jobs']} job(s)")
    ux.info(f"Batch status: {result['batch_status']}")
    ux.info("\nNext: Monitor job progress or proceed to next step")