# Step 01: Submit Create EDM Batch

This notebook submits EDM Creation jobs to Moody's Risk Modeler

### Setup

In [None]:
%load_ext autoreload
%autoreload 2

# Import required modules
import sys
import os
from pathlib import Path

# Determine the notebook's actual directory
cwd = Path.cwd()

if 'Active_' in str(cwd):
    # Working directory is set correctly, construct path to THIS notebook
    notebook_path = cwd / 'Step_01_Import_Base_Data.ipynb'
else:
    # Working directory is not set correctly (e.g., /home/jovyan)
    home = Path.home()
    workspace = home / 'workspace'
    
    if workspace.exists():
        workflows = workspace / 'workflows'
        active_dirs = list(workflows.glob('Active_*'))
        
        if active_dirs:
            # Use the first Active_ directory found and point to THIS notebook
            notebook_path = active_dirs[0] / 'notebooks' / 'Stage_03_Data_Import' / 'Step_01_Import_Base_Data.ipynb'
        else:
            raise RuntimeError("No Active_ cycle directory found in workspace/workflows/")
    else:
        raise RuntimeError("Workspace directory not found")

print(f"Notebook path: {notebook_path}")

# Add workspace to path
workspace_path = notebook_path.parent.parent.parent.parent
if str(workspace_path) not in sys.path:
    sys.path.insert(0, str(workspace_path))

from helpers import ux
from helpers.context import WorkContext
from helpers.batch import submit_batch, get_batch_jobs, read_batch
from helpers.job import track_job_status
from helpers.database import execute_query
from helpers.step import Step, get_last_step_run
from helpers.irp_integration import IRPClient

In [None]:
# Initialize work context
ux.header("Base Data Import")

# Initialize context with explicit notebook path
context = WorkContext(notebook_path=str(notebook_path))

ux.info(f"Cycle: {context.cycle_name}")
ux.info(f"Stage: {context.stage_name}")
ux.info(f"Step: {context.step_name}")

# Initialize step execution tracking
step = Step(context)

if step.executed:
    ux.warning("⚠ This step has already been executed")
    ux.info(f"Message: {step.status_message}")
    
    last_run = get_last_step_run(step.step_id)
    if last_run:
        ux.info(f"Last run: #{last_run['run_num']}")
        ux.info(f"Status: {last_run['status']}")
        if last_run['completed_ts']:
            ux.info(f"Completed: {last_run['completed_ts'].strftime('%Y-%m-%d %H:%M:%S')}")
    
    response = ux.yes_no("Do you want to re-run this step?")
    if response:
        ux.info("Re-running step...")
        step.start(force=True)
    else:
        ux.info("Step execution skipped")
        raise SystemExit("Step already completed")

ux.success(f"✓ Step tracking initialized for '{context.step_name}'")

### Retrieve EDM Batch

In [None]:
# Retrieve EDM Creation batch from Stage_01/Step_03
ux.subheader("Retrieve EDM Batch")

# Query for Stage_01/Step_03 step run
query = """
    SELECT sr.id, sr.step_id, sr.run_num, sr.output_data, sr.completed_ts
    FROM irp_step_run sr
    INNER JOIN irp_step s ON sr.step_id = s.id
    INNER JOIN irp_stage sg ON s.stage_id = sg.id
    INNER JOIN irp_cycle c ON sg.cycle_id = c.id
    WHERE c.cycle_name = %s
      AND sg.stage_num = 1
      AND s.step_num = 3
      AND sr.status = 'COMPLETED'
    ORDER BY sr.completed_ts DESC
    LIMIT 1
"""

result = execute_query(query, (context.cycle_name,))

if result.empty:
    raise ValueError("Batch creation step not found - please complete Stage_01/Step_03 first")

output_data = result.iloc[0]['output_data']
batches = output_data.get('batches', {})

if 'EDM Creation' not in batches:
    raise ValueError(f"EDM Creation batch not found. Available: {list(batches.keys())}")

edm_batch_id = int(batches['EDM Creation'])

ux.success(f"✓ Retrieved EDM Creation batch: ID={edm_batch_id}")
step.log(f"Retrieved EDM Creation batch: ID={edm_batch_id}")

### Review Create EDM Batch Configuration

In [None]:
# Verify batch status and display job information
ux.subheader("Verify Batch Status")

# Read batch details
batch = read_batch(edm_batch_id)

batch_info = [
    ["Batch ID", batch['id']],
    ["Batch Type", batch['batch_type']],
    ["Status", batch['status']],
    ["Created", batch['created_ts'].strftime('%Y-%m-%d %H:%M:%S')]
]
ux.table(batch_info, headers=["Property", "Value"])

# Get jobs in batch
jobs = get_batch_jobs(edm_batch_id)
job_count = len(jobs)

ux.info(f"\nTotal jobs: {job_count}")

# Show sample configurations
if job_count > 0:
    ux.info("\nSample EDM Configurations (first 5):")
    for i, job in enumerate(jobs[:5]):
        config_query = "SELECT job_configuration_data FROM irp_job_configuration WHERE id = %s"
        config_result = execute_query(config_query, (job['job_configuration_id'],))
        if not config_result.empty:
            config = config_result.iloc[0]['job_configuration_data']
            ux.info(f"  {i+1}. {config.get('Database', 'N/A')}")

step.log(f"Verified batch: {job_count} jobs ready for submission")

### Submit Create EDM Batch to Moody's

In [None]:
# Submit batch to Moody's API
ux.subheader("Submit Batch to Moody's")

ux.info("")
ux.info("Submission Process:")
ux.info("  • Each job will lookup/create an exposure set in Moody's")
ux.info("  • Database server 'databridge-1' will be used for all EDMs")
ux.info("  • Jobs will transition to SUBMITTED status")
ux.info("  • Batch will transition to ACTIVE status")
ux.info(f"\nEstimated time: ~{job_count * 2} seconds")
ux.info("")

# Confirm submission
proceed = ux.yes_no("Submit batch to Moody's API?")

if not proceed:
    step.log("User cancelled batch submission")
    raise SystemExit("User cancelled batch submission")

# Submit
ux.info("\nSubmitting batch...")

result = submit_batch(edm_batch_id, IRPClient())

# Display results
ux.success(f"\n✓ Batch submission completed")
ux.info(f"  Submitted: {result['submitted_jobs']} jobs")
ux.info(f"  Status: {result['batch_status']}")

# Check for errors
failed_count = len([j for j in result['jobs'] if 'error' in j])
if failed_count > 0:
    ux.warning(f"\n⚠ {failed_count} job(s) failed to submit")
    for job_result in result['jobs']:
        if 'error' in job_result:
            ux.error(f"  Job {job_result['job_id']}: {job_result['error']}")

step.log(f"Batch submitted: {result['submitted_jobs']} jobs, {failed_count} failed")

### 1b) Monitor Job Progress (Optional)

In [None]:
# Optional: Wait for EDM jobs to complete
ux.subheader("Monitor Job Progress (Optional)")

ux.info("")
ux.info("You can choose to:")
ux.info("  • Wait here for jobs to complete (may take hours)")
ux.info("  • Continue with workflow and monitor separately")
ux.info("")

monitor = ux.yes_no("Monitor EDM job progress until complete?")

if monitor:
    import time
    from datetime import datetime, timedelta
    from helpers.batch import recon_batch
    
    ux.info("\nPolling Moody's API for job status...")
    ux.info("This may take several minutes to hours depending on job complexity")
    ux.info("")
    
    interval = 30  # Poll every 30 seconds
    timeout = 7200  # 2 hour timeout
    start_time = datetime.now()
    poll_count = 0
    
    while True:
        poll_count += 1
        
        # Track all jobs
        all_complete = True
        status_counts = {}
        
        for job in jobs:
            try:
                new_status = track_job_status(job['id'])
                status_counts[new_status] = status_counts.get(new_status, 0) + 1
                
                if new_status not in ['FINISHED', 'FAILED', 'CANCELLED']:
                    all_complete = False
            except Exception as e:
                ux.warning(f"Failed to track job {job['id']}: {e}")
                all_complete = False
        
        # Display progress
        elapsed = (datetime.now() - start_time).total_seconds()
        ux.info(f"Poll #{poll_count} ({int(elapsed)}s): {status_counts}")
        
        if all_complete:
            ux.success("\nAll jobs completed!")
            break
        
        # Check timeout
        if elapsed > timeout:
            ux.warning("\nTimeout reached - jobs still running")
            ux.info("Use '_Tools/Batch Management/Monitor Active Jobs.ipynb' to continue tracking")
            break
        
        time.sleep(interval)
    
    # Reconcile batch
    ux.info("\nReconciling batch status...")
    final_status = recon_batch(edm_batch_id)
    ux.info(f"Final batch status: {final_status}")
    
    if final_status == 'COMPLETED':
        ux.success("✓ Batch completed successfully!")
    elif final_status == 'FAILED':
        ux.error("✗ Batch failed - check job errors")
    else:
        ux.info(f"Batch status: {final_status}")
    
    step.log(f"Monitored batch to completion: {final_status}")
    
else:
    ux.info("\nSkipping inline monitoring")
    ux.info("Use '_Tools/Batch Management/Monitor Active Jobs.ipynb' to track progress")
    ux.info("Or schedule automated monitoring (see Batch Management/README.md)")
    step.log("User skipped inline monitoring")