# Step 01: Execute Analysis

This notebook submits analysis jobs to Moody's Risk Modeler.

**Tasks:**
- Retrieve Analysis batch from Stage_01/Step_03
- Review analysis job configurations
- Submit analysis jobs to Moody's API
- Track job completion status

## 1) Setup

In [None]:
%load_ext autoreload
%autoreload 2

from helpers.notebook_setup import initialize_notebook_context
from helpers import ux
from helpers.batch import get_batch_jobs, read_batch, activate_batch
from helpers.database import execute_query
from helpers.irp_integration import IRPClient
from helpers.constants import BatchType, JobStatus

# Flags to track state
validation_failed = False
validation_errors = []
existing_analyses = set()  # Set of "EDM/AnalysisName" that already exist

In [None]:
# Initialize notebook context and step tracking
context, step = initialize_notebook_context('Step_01_Execute_Analysis.ipynb')

# Display context
ux.header("Execute Analysis Batch")
ux.info(f"Cycle: {context.cycle_name}")
ux.info(f"Stage: {context.stage_name}")
ux.info(f"Step: {context.step_name}")
ux.success(f"Step tracking initialized for '{context.step_name}'")

## 2) Retrieve Analysis Batch

In [None]:
# Retrieve Analysis batch from Stage_01/Step_03
ux.subheader("Retrieve Analysis Batch")

query = """
    SELECT sr.id, sr.step_id, sr.run_num, sr.output_data, sr.completed_ts
    FROM irp_step_run sr
    INNER JOIN irp_step s ON sr.step_id = s.id
    INNER JOIN irp_stage sg ON s.stage_id = sg.id
    INNER JOIN irp_cycle c ON sg.cycle_id = c.id
    WHERE c.cycle_name = %s
      AND sg.stage_num = 1
      AND s.step_num = 3
      AND sr.status = 'COMPLETED'
    ORDER BY sr.completed_ts DESC
    LIMIT 1
"""

result = execute_query(query, (context.cycle_name,))

if result.empty:
    raise ValueError("Batch creation step not found - please complete Stage_01/Step_03 first")

output_data = result.iloc[0]['output_data']
batches = output_data.get('batches', {})

if BatchType.ANALYSIS not in batches:
    raise ValueError(f"Analysis batch not found. Available: {list(batches.keys())}")

analysis_batch_id = int(batches[BatchType.ANALYSIS])
ux.success(f"Retrieved Analysis batch: ID={analysis_batch_id}")
step.log(f"Retrieved Analysis batch: ID={analysis_batch_id}")

# Validate batch using EntityValidator directly to get structured output
from helpers.entity_validator import EntityValidator

ux.subheader("Validate Batch")

# Get job configurations for validation
jobs = get_batch_jobs(analysis_batch_id)
analyses = []
for job in jobs:
    config_query = "SELECT job_configuration_data FROM irp_job_configuration WHERE id = %s"
    config_result = execute_query(config_query, (job['job_configuration_id'],))
    if not config_result.empty:
        analyses.append(config_result.iloc[0]['job_configuration_data'])

# Run validation - returns (errors, existing_analyses_list)
validator = EntityValidator()
all_errors, existing_analyses_list = validator.validate_analysis_batch(analyses)
existing_analyses = set(existing_analyses_list)  # Convert to set for easy lookup

# Separate pre-requisite errors from "analyses exist" errors
# ENT-ANALYSIS-001 is the code for "analyses already exist" - this is recoverable
prereq_errors = [e for e in all_errors if "ENT-ANALYSIS-001" not in e]

if prereq_errors:
    validation_failed = True
    ux.error("Pre-requisite validation failed:")
    for error in prereq_errors:
        ux.error(f"  {error}")
    step.log(f"Pre-requisite validation failed: {len(prereq_errors)} error(s)")
elif existing_analyses:
    ux.warning(f"Found {len(existing_analyses)} existing analysis(es) - will prompt for action")
    ux.success("Pre-requisite validation passed")
else:
    ux.success("Batch validation passed")

## 3) Build Submission Plan

Determine action for each job based on status and whether analysis exists.

In [None]:
# Build submission plan based on whether analyses exist
# 
# Core question: Which analyses are missing and need to be created?
# - Missing analysis → Need to create (submit/resubmit based on job status)
# - Existing analysis → Already done, but offer option to delete and re-run

# Categorize jobs by whether analysis exists
jobs_missing_analysis = []  # Analysis doesn't exist - need to create
jobs_with_analysis = []     # Analysis exists - already done
jobs_in_progress = []       # Currently running - skip

IN_PROGRESS_STATUSES = {JobStatus.SUBMITTED, JobStatus.QUEUED, JobStatus.PENDING, JobStatus.RUNNING}

irp_client = IRPClient()

if validation_failed:
    ux.warning("Skipping submission plan due to validation failure")
else:
    ux.subheader("Build Submission Plan")
    
    for job in jobs:
        config_query = "SELECT job_configuration_data FROM irp_job_configuration WHERE id = %s"
        config_result = execute_query(config_query, (job['job_configuration_id'],))
        config = config_result.iloc[0]['job_configuration_data'] if not config_result.empty else {}
        
        edm = config.get('Database', '')
        analysis_name = config.get('Analysis Name', '')
        analysis_key = f"{edm}/{analysis_name}"
        analysis_exists = analysis_key in existing_analyses
        
        job_info = {
            'job_id': job['id'],
            'status': job['status'],
            'edm': edm,
            'analysis_name': analysis_name,
            'analysis_key': analysis_key
        }
        
        if job['status'] in IN_PROGRESS_STATUSES:
            jobs_in_progress.append(job_info)
        elif analysis_exists:
            jobs_with_analysis.append(job_info)
        else:
            jobs_missing_analysis.append(job_info)
    
    # Display summary
    ux.info(f"Total jobs: {len(jobs)}")
    
    if jobs_missing_analysis:
        ux.warning(f"  Missing Moody's analysis: {len(jobs_missing_analysis)}")
    if jobs_with_analysis:
        ux.success(f"  Moody's analysis exists: {len(jobs_with_analysis)}")
    if jobs_in_progress:
        ux.info(f"  In progress: {len(jobs_in_progress)}")

# Determine if this is a fresh run (all missing + all INITIATED = no prompts needed)
is_fresh_run = (
    not validation_failed
    and len(jobs_missing_analysis) > 0
    and len(jobs_with_analysis) == 0
    and len(jobs_in_progress) == 0
    and all(j['status'] == JobStatus.INITIATED for j in jobs_missing_analysis)
)

# Jobs to process
jobs_to_create = []      # Jobs where we'll create the analysis
jobs_to_delete = []      # Jobs where we'll delete existing analysis and re-run

if validation_failed:
    pass  # Skip all processing

elif is_fresh_run:
    # Fresh run - all jobs are INITIATED with no existing analyses
    ux.success("\nFresh run - all analyses will be created.")
    jobs_to_create = jobs_missing_analysis

else:
    # Need user input for missing and/or existing analyses
    
    # Prompt for missing analyses (if any, and not all INITIATED)
    if jobs_missing_analysis:
        ux.info("\n" + "-"*50)
        ux.warning(f"Missing Moody's analyses ({len(jobs_missing_analysis)}):")
        for j in jobs_missing_analysis:
            ux.warning(f"  - {j['analysis_name']} (job: {j['status']})")
        
        choice = input(f"\nCreate these {len(jobs_missing_analysis)} missing analysis(es)? (y/n): ").strip().lower()
        if choice in ['y', 'yes']:
            jobs_to_create = jobs_missing_analysis
            ux.success("Will create missing analyses in Moody's.")
        else:
            ux.info("Skipping missing analyses.")
    
    # Prompt for existing analyses (if any)
    if jobs_with_analysis:
        ux.info("\n" + "-"*50)
        ux.success(f"Existing analyses ({len(jobs_with_analysis)}):")
        for j in jobs_with_analysis:
            ux.success(f"  - {j['analysis_name']} (job: {j['status']})")
        
        choice = input(f"\nDelete and re-run these {len(jobs_with_analysis)} existing analysis(es)? (y/n): ").strip().lower()
        if choice in ['y', 'yes']:
            jobs_to_delete = jobs_with_analysis
            ux.warning("Will delete and re-run existing analyses.")
        else:
            ux.success("Keeping existing analyses.")

    # Handle case where nothing to do
    if not jobs_to_create and not jobs_to_delete:
        if jobs_in_progress:
            ux.info("\nNo action needed - jobs are in progress.")
        elif jobs_with_analysis and not jobs_missing_analysis:
            ux.success("\nAll analyses already exist.")

step.log(f"Plan: create={len(jobs_to_create)}, delete_and_rerun={len(jobs_to_delete)}, in_progress={len(jobs_in_progress)}")

## 4) Submit Analysis Batch to Moody's

In [None]:
# Execute the submission plan
from helpers.job import resubmit_job, submit_job, delete_analyses_for_jobs

ux.subheader("Submit Analyses to Moody's")

deletion_errors = []
submission_results = []
failed_count = 0

if validation_failed:
    ux.warning("Skipping submission due to validation failure")
    result = {'submitted_jobs': 0, 'batch_status': 'INITIATED', 'jobs': []}

elif not jobs_to_create and not jobs_to_delete:
    ux.info("No jobs to submit.")
    batch = read_batch(analysis_batch_id)
    result = {'submitted_jobs': 0, 'batch_status': batch['status'], 'jobs': []}

else:
    # Step 1: Delete existing analyses for jobs_to_delete
    if jobs_to_delete:
        ux.info(f"\nDeleting {len(jobs_to_delete)} existing analysis(es)...")
        
        deletion_errors = delete_analyses_for_jobs(
            [j['job_id'] for j in jobs_to_delete],
            existing_analyses,
            irp_client
        )
        
        if deletion_errors:
            ux.error(f"  Failed to delete {len(deletion_errors)} analysis(es):")
            for err in deletion_errors:
                ux.error(f"    - {err}")
        else:
            ux.success(f"  Deleted {len(jobs_to_delete)} analysis(es)")
    
    # Step 2: Submit/resubmit jobs
    if deletion_errors:
        ux.error("\nCannot proceed with submission due to deletion failures.")
        result = {'submitted_jobs': 0, 'batch_status': 'INITIATED', 'jobs': []}
    else:
        all_jobs_to_process = jobs_to_create + jobs_to_delete
        ux.info(f"\nSubmitting {len(all_jobs_to_process)} job(s)...")
        
        for job_info in all_jobs_to_process:
            job_id = job_info['job_id']
            status = job_info['status']
            
            try:
                if status == JobStatus.INITIATED:
                    # Fresh job - submit directly
                    submit_job(job_id, BatchType.ANALYSIS, irp_client)
                    submission_results.append({'job_id': job_id, 'action': 'submitted'})
                    ux.info(f"  Submitted: {job_info['analysis_name']}")
                else:
                    # Terminal job - resubmit (creates new job and submits it)
                    new_job_id = resubmit_job(job_id, irp_client, BatchType.ANALYSIS)
                    submission_results.append({'job_id': new_job_id, 'action': 'resubmitted', 'original_job_id': job_id})
                    ux.info(f"  Resubmitted: {job_info['analysis_name']}")
            except Exception as e:
                failed_count += 1
                submission_results.append({'job_id': job_id, 'error': str(e)})
                ux.error(f"  Failed: {job_info['analysis_name']} - {e}")
        
        success_count = len(all_jobs_to_process) - failed_count
        
        # Update batch status to ACTIVE if any jobs were submitted
        if success_count > 0:
            activate_batch(analysis_batch_id)
            ux.success(f"Batch {analysis_batch_id} activated")
        
        result = {
            'submitted_jobs': success_count,
            'batch_status': 'ACTIVE' if success_count > 0 else 'INITIATED',
            'jobs': submission_results
        }
        
        ux.info(f"\nSubmission complete: {success_count} succeeded, {failed_count} failed")

## 5) Complete Step Execution

In [None]:
# Complete step execution
ux.header("Step Completion")

# Prepare output data
output_data = {
    'batch_id': analysis_batch_id,
    'batch_type': BatchType.ANALYSIS,
    'batch_status': result['batch_status'],
    'submitted_jobs': result['submitted_jobs'],
    'failed_jobs': failed_count,
    'deletion_errors': len(deletion_errors) if deletion_errors else 0
}

if validation_failed:
    # Handle validation failure
    from helpers.step import update_step_run
    from helpers.constants import StepStatus
    
    error_message = "\n".join(prereq_errors)
    update_step_run(step.run_id, StepStatus.FAILED, error_message=error_message)
    
    ux.error("\n" + "="*60)
    ux.error("VALIDATION FAILED")
    ux.error("="*60)
    for error in prereq_errors:
        ux.error(f"  {error}")

elif deletion_errors:
    # Handle deletion failure
    from helpers.step import update_step_run
    from helpers.constants import StepStatus
    
    error_message = f"{len(deletion_errors)} analysis deletion(s) failed:\n" + "\n".join(deletion_errors)
    update_step_run(step.run_id, StepStatus.FAILED, error_message=error_message)
    
    ux.error("\n" + "="*60)
    ux.error("ANALYSIS DELETION FAILED")
    ux.error("="*60)
    ux.info(f"\nBatch ID: {analysis_batch_id}")
    ux.error(f"Failed deletions: {len(deletion_errors)}")
    ux.info("\nPlease manually delete these analyses in Moody's Risk Modeler,")
    ux.info("then re-run this notebook.")

elif failed_count > 0:
    # Handle submission failures
    from helpers.step import update_step_run
    from helpers.constants import StepStatus
    
    failed_job_errors = [f"Job {j['job_id']}: {j['error']}" for j in submission_results if 'error' in j]
    error_message = f"{failed_count} job(s) failed to submit:\n" + "\n".join(failed_job_errors)
    update_step_run(step.run_id, StepStatus.FAILED, error_message=error_message)
    
    ux.error("\n" + "="*60)
    ux.error("BATCH SUBMISSION FAILED")
    ux.error("="*60)
    ux.info(f"\nBatch ID: {analysis_batch_id}")
    ux.info(f"Submitted: {result['submitted_jobs']} job(s)")
    ux.error(f"Failed: {failed_count} job(s)")
    for error in failed_job_errors:
        ux.error(f"  {error}")

else:
    # Success
    step.complete(output_data)
    
    ux.success("\n" + "="*60)
    if result['submitted_jobs'] == 0:
        ux.success("STEP COMPLETED - NO SUBMISSION NEEDED")
    else:
        ux.success("ANALYSIS BATCH SUBMITTED SUCCESSFULLY")
    ux.success("="*60)
    ux.info(f"\nBatch ID: {analysis_batch_id}")
    ux.info(f"Submitted: {result['submitted_jobs']} job(s)")
    ux.info(f"Batch status: {result['batch_status']}")
    ux.info("\nNext: Monitor job progress or proceed to next stage")