# Step 03: Create Batches

This notebook creates batches and jobs from the loaded configuration.

**Tasks:**
- Verify configuration is loaded and valid
- Identify batch types to create based on configuration data
- Preview databases and job configurations
- Create batches (EDM Creation, etc.)
- Display batch and job summaries

In [None]:
%load_ext autoreload
%autoreload 2

from helpers.notebook_setup import initialize_notebook_context
from helpers import ux
from helpers.configuration import read_configuration, get_base_portfolios, classify_groupings
from helpers.batch import create_batch
from helpers.database import execute_query
from helpers.constants import BatchType

## 1) Setup

In [None]:
# Initialize notebook context and step tracking
context, step = initialize_notebook_context('Step_03_Create_Batches.ipynb', allow_rerun=True)

# Display context
ux.header("Batch Creation")
ux.info(f"Cycle: {context.cycle_name}")
ux.info(f"Stage: {context.stage_name}")
ux.info(f"Step: {context.step_name}")
ux.success(f"✓ Step tracking initialized for '{context.step_name}'")

## 2) Verify Configuration

In [None]:
# Verify configuration exists and is valid
ux.header("Configuration Verification")

try:
    # Get cycle ID
    cycle_result = execute_query(
        "SELECT id FROM irp_cycle WHERE cycle_name = %s",
        (context.cycle_name,)
    )
    
    if cycle_result.empty:
        raise ValueError(f"Cycle not found: {context.cycle_name}")
    
    cycle_id = int(cycle_result.iloc[0]['id'])  # Convert numpy.int64 to Python int
    
    # Get configuration for this cycle
    config_result = execute_query(
        "SELECT id, status, created_ts FROM irp_configuration WHERE cycle_id = %s ORDER BY created_ts DESC LIMIT 1",
        (cycle_id,)
    )
    
    if config_result.empty:
        ux.error("✗ No configuration found for this cycle")
        ux.info("Please complete Step 02: Validate Configuration File first")
        step.fail("No configuration found for cycle")
        raise ValueError("No configuration found for cycle")
    
    config_id = int(config_result.iloc[0]['id'])  # Convert numpy.int64 to Python int
    config_status = config_result.iloc[0]['status']
    config_created = config_result.iloc[0]['created_ts']
    
    # Verify status is VALID or ACTIVE
    if config_status not in ['VALID', 'ACTIVE']:
        ux.error(f"✗ Configuration status is '{config_status}' (expected VALID or ACTIVE)")
        step.fail(f"Configuration status invalid: {config_status}")
        raise ValueError(f"Configuration must be VALID or ACTIVE, found: {config_status}")
    
    # Display configuration summary
    config_info = [
        ["Configuration ID", config_id],
        ["Status", config_status],
        ["Created", config_created.strftime('%Y-%m-%d %H:%M:%S')]
    ]
    ux.table(config_info, headers=["Property", "Value"])
    ux.success("✓ Configuration verified")
    
    step.log(f"Configuration verified: ID={config_id}, Status={config_status}")
    
except Exception as e:
    ux.error(f"✗ Configuration verification failed: {str(e)}")
    step.fail(f"Configuration verification failed: {str(e)}")
    raise

## 3) Identify Batch Types to Create

In [None]:
# Analyze configuration to determine which batch types are needed
ux.header("Batch Type Identification")

try:
    # Read configuration data
    config_data = read_configuration(config_id)
    
    # Extract configuration_data JSONB field
    configuration_data = config_data.get('configuration_data', {})
    metadata = configuration_data.get('Metadata', {})
    
    # Identify batch types based on configuration content
    batch_types_info = []
    batch_types_to_create = []
    
    # Check for EDM Creation (Databases sheet)
    databases = configuration_data.get('Databases', [])
    if databases:
        batch_types_info.append([BatchType.EDM_CREATION, len(databases), "One job per database"])
        batch_types_to_create.append(BatchType.EDM_CREATION)
    
    # Check for EDM DB Upgrade (Databases sheet + EDM Data Version in Metadata)
    edm_version = metadata.get('EDM Data Version')
    if databases and edm_version:
        target_version = edm_version.split('.')[0] if '.' in edm_version else edm_version
        batch_types_info.append([BatchType.EDM_DB_UPGRADE, len(databases), f"One job per database (upgrade to v{target_version})"])
        batch_types_to_create.append(BatchType.EDM_DB_UPGRADE)
    
    # Check for Base Portfolio Creation and MRI Import configuration(Portfolios sheet)
    portfolios = configuration_data.get('Portfolios', [])
    if portfolios:
        # Base Portfolio Creation and MRI Import only applies to Base Portfolios
        base_portfolios = get_base_portfolios(portfolios)
        # Add Portfolio Creation
        batch_types_info.append([BatchType.PORTFOLIO_CREATION, len(base_portfolios), "One job per portfolio"])
        batch_types_to_create.append(BatchType.PORTFOLIO_CREATION)
        # Add MRI Import
        batch_types_info.append([BatchType.MRI_IMPORT, len(base_portfolios), "One job per portfolio"])
        batch_types_to_create.append(BatchType.MRI_IMPORT)

    # Check for GeoHaz (Base Portfolios + Geocode Version in Metadata)
    geocode_version = metadata.get('Geocode Version')
    if portfolios and geocode_version:
        if not base_portfolios:
            base_portfolios = get_base_portfolios(portfolios)
        if base_portfolios:
            batch_types_info.append([BatchType.GEOHAZ, len(base_portfolios), f"One job per base portfolio (geocode v{geocode_version})"])
            batch_types_to_create.append(BatchType.GEOHAZ)

    # Check for Portfolio Mapping (Base Portfolios only)
    if portfolios:
        if not base_portfolios:
            base_portfolios = get_base_portfolios(portfolios)
        if base_portfolios:
            batch_types_info.append([BatchType.PORTFOLIO_MAPPING, len(base_portfolios), "One job per base portfolio (SQL mapping)"])
            batch_types_to_create.append(BatchType.PORTFOLIO_MAPPING)

    # Check for Reinsurance Treaties (requires both sheets to determine job count)
    # Always create this batch if both sheets exist - even with 0 jobs, we need it for chaining
    treaties = configuration_data.get('Reinsurance Treaties', [])
    analyses = configuration_data.get('Analysis Table', [])
    treaty_edm_combinations = set()  # Initialize for use in preview section
    
    if analyses:  # Only need analyses sheet - treaties sheet can be empty
        # Build a set of valid treaty names (may be empty if no treaties defined)
        valid_treaty_names = {t.get('Treaty Name') for t in treaties if t.get('Treaty Name')} if treaties else set()
        
        # Collect unique treaty-EDM combinations from Analysis Table
        treaty_columns = ['Reinsurance Treaty 1', 'Reinsurance Treaty 2', 'Reinsurance Treaty 3',
                          'Reinsurance Treaty 4', 'Reinsurance Treaty 5']
        
        for analysis in analyses:
            edm = analysis.get('Database')
            if not edm:
                continue
            for col in treaty_columns:
                treaty_name = analysis.get(col)
                if treaty_name and treaty_name in valid_treaty_names:
                    treaty_edm_combinations.add((treaty_name, edm))
        
        # Always create batch for chaining - even if 0 jobs (empty batch completes immediately)
        job_count = len(treaty_edm_combinations)
        description = f"One job per treaty-EDM combination" if job_count > 0 else "No treaties to create (empty batch for workflow continuity)"
        batch_types_info.append([BatchType.CREATE_REINSURANCE_TREATIES, job_count, description])
        batch_types_to_create.append(BatchType.CREATE_REINSURANCE_TREATIES)

    # Check for Analysis (Analysis Table sheet)
    if analyses:
        batch_types_info.append([BatchType.ANALYSIS, len(analyses), "One job per analysis"])
        batch_types_to_create.append(BatchType.ANALYSIS)

    # Check for Grouping (Groupings sheet) - split into analysis-only and rollup groups
    groupings = configuration_data.get('Groupings', [])
    if groupings:
        analysis_only_groups, rollup_groups = classify_groupings(configuration_data)
        
        # Add Grouping batch for analysis-only groups
        if analysis_only_groups:
            batch_types_info.append([BatchType.GROUPING, len(analysis_only_groups), "One job per analysis-only group"])
            batch_types_to_create.append(BatchType.GROUPING)
        
        # Add Grouping Rollup batch for groups containing other groups
        if rollup_groups:
            batch_types_info.append([BatchType.GROUPING_ROLLUP, len(rollup_groups), "One job per rollup group (groups of groups)"])
            batch_types_to_create.append(BatchType.GROUPING_ROLLUP)

    # Check for Export to RDM (requires Export RDM Name in Metadata, Analysis Table, and Groupings)
    rdm_name = metadata.get('Export RDM Name')
    if rdm_name and analyses and groupings:
        # Single job exports all analyses and groups
        analysis_count = len(analyses)
        group_count = len(groupings)
        batch_types_info.append([BatchType.EXPORT_TO_RDM, 1, f"One job to export {analysis_count} analyses + {group_count} groups to '{rdm_name}'"])
        batch_types_to_create.append(BatchType.EXPORT_TO_RDM)

    # Display identified batch types
    if batch_types_info:
        ux.info("Batch types identified from configuration:")
        ux.table(batch_types_info, headers=["Batch Type", "Job Count", "Description"])
        ux.success(f"✓ Found {len(batch_types_to_create)} batch type(s) to create")
        
        step.log(f"Identified {len(batch_types_to_create)} batch type(s): {', '.join(batch_types_to_create)}")
    else:
        ux.warning("⚠ No batch types identified from configuration")
        ux.info("Configuration may not contain required data sheets (Databases, Portfolios, etc.)")
        step.fail("No batch types identified in configuration")
        raise ValueError("No batch types identified in configuration")
    
except Exception as e:
    ux.error(f"✗ Batch type identification failed: {str(e)}")
    step.fail(f"Batch type identification failed: {str(e)}")
    raise

## 4a) Preview: EDM Creation Batch

In [None]:
# Preview databases that will become jobs
ux.header("EDM Creation Batch Preview")

if BatchType.PORTFOLIO_CREATION in batch_types_to_create:
    ux.info(f"This batch will create {len(databases)} job(s), one for each database:")
    ux.info("")
    
    # Display databases table
    database_rows = []
    for db in databases:
        database_rows.append([
            db.get('Database', 'N/A')
        ])
    
    ux.table(database_rows, headers=["EDM Name"])
    
    ux.info("")
    ux.info("Each database will become one job with configuration containing:")
    ux.info("  - Metadata from configuration file")
    ux.info("  - Database-specific fields (Database, Version, EDM_Type, etc.)")
    
    step.log(f"Previewed EDM Creation batch: {len(databases)} databases")
else:
    ux.info("EDM Creation batch not needed (no databases in configuration)")

## 4b) Preview: Base Portfolio Creation Batch

In [None]:
# Preview portfolios that will become jobs
ux.header("Base Portfolio Creation Batch Preview")

if BatchType.PORTFOLIO_CREATION in batch_types_to_create:
    ux.info(f"This batch will create {len(base_portfolios)} job(s), one for each base portfolio:")
    ux.info("")
    
    # Display databases table
    portfolio_rows = []
    for port in base_portfolios:
        portfolio_rows.append([
            port.get('Portfolio', 'N/A'),
            port.get('Database', 'N/A'),
        ])
    
    ux.table(portfolio_rows, headers=["Portfolio", "EDM"])
    
    ux.info("")
    ux.info("Each portfolio will become one job with configuration containing:")
    ux.info("  - Metadata from configuration file")
    ux.info("  - Portfolio-specific fields (Portfolio Name, EDM, etc.)")
    
    step.log(f"Previewed Base Portfolio Creation batch: {len(base_portfolios)} portfolios")
else:
    ux.info("Portfolio Creation batch not needed (no base portfolios in configuration)")

## 4c) Preview: MRI Import Batch

In [None]:
# Preview portfolios that will become jobs
ux.header("MRI Import Batch Preview")

if BatchType.MRI_IMPORT in batch_types_to_create:
    ux.info(f"This batch will create {len(base_portfolios)} job(s), one for each base portfolio:")
    ux.info("")
    
    # Display databases table
    portfolio_rows = []
    for port in base_portfolios:
        portfolio_rows.append([
            port.get('Portfolio', 'N/A'),
            port.get('Database', 'N/A'),
            port.get('Import File', 'N/A')
        ])
    
    ux.table(portfolio_rows, headers=["Portfolio", "EDM", "Import File"])
    
    ux.info("")
    ux.info("Each portfolio will become one job with configuration containing:")
    ux.info("  - Metadata from configuration file")
    ux.info("  - Portfolio-specific fields (Portfolio Name, EDM, Import File, etc.)")
    
    step.log(f"Previewed MRI Import batch: {len(base_portfolios)} portfolios")
else:
    ux.info("MRI Import batch not needed (no base portfolios in configuration)")

## 4d) Preview: Create Reinsurance Treaties Batch

In [None]:
# Preview reinsurance treaties that will become jobs
ux.header("Create Reinsurance Treaties Batch Preview")

if BatchType.CREATE_REINSURANCE_TREATIES in batch_types_to_create:
    if treaty_edm_combinations:
        ux.info(f"This batch will create {len(treaty_edm_combinations)} job(s), one for each unique treaty-EDM combination:")
        ux.info("")
        
        # Display treaty-EDM combinations table
        treaty_rows = []
        for treaty_name, edm in sorted(treaty_edm_combinations):
            treaty_rows.append([treaty_name, edm])
        
        ux.table(treaty_rows, headers=["Treaty Name", "EDM"])
        
        ux.info("")
        ux.info("Each treaty-EDM combination will become one job with configuration containing:")
        ux.info("  - Metadata from configuration file")
        ux.info("  - Database (EDM) where the treaty will be created")
        ux.info("  - Treaty-specific fields from Reinsurance Treaties sheet")
        
        step.log(f"Previewed Create Reinsurance Treaties batch: {len(treaty_edm_combinations)} treaty-EDM combinations")
    else:
        ux.info("This batch will be created with 0 jobs.")
        ux.info("")
        ux.info("No reinsurance treaties are defined in the configuration, or no analyses")
        ux.info("reference any treaties. The batch will be created for workflow continuity")
        ux.info("and will immediately complete during submission.")
        
        step.log("Previewed Create Reinsurance Treaties batch: 0 jobs (empty batch)")
else:
    ux.info("Create Reinsurance Treaties batch not needed (no Analysis Table in configuration)")

## 4e) Preview: EDM DB Upgrade Batch

In [None]:
# Preview EDM DB Upgrade jobs
ux.header("EDM DB Upgrade Batch Preview")

if BatchType.EDM_DB_UPGRADE in batch_types_to_create:
    target_version = edm_version.split('.')[0] if '.' in edm_version else edm_version
    ux.info(f"This batch will create {len(databases)} job(s), one for each database:")
    ux.info(f"Target EDM Data Version: {target_version}")
    ux.info("")
    
    # Display databases table
    database_rows = []
    for db in databases:
        database_rows.append([
            db.get('Database', 'N/A'),
            target_version
        ])
    
    ux.table(database_rows, headers=["EDM Name", "Target Version"])
    
    ux.info("")
    ux.info("Each database will become one job with configuration containing:")
    ux.info("  - Metadata from configuration file")
    ux.info("  - Database-specific fields from Databases sheet")
    ux.info("  - target_edm_version: The version to upgrade to")
    
    step.log(f"Previewed EDM DB Upgrade batch: {len(databases)} databases to version {target_version}")
else:
    ux.info("EDM DB Upgrade batch not needed (no databases or EDM Data Version not specified)")

## 4f) Preview: GeoHaz Batch

In [None]:
# Preview GeoHaz jobs
ux.header("GeoHaz Batch Preview")

if BatchType.GEOHAZ in batch_types_to_create:
    ux.info(f"This batch will create {len(base_portfolios)} job(s), one for each base portfolio:")
    ux.info(f"Geocode Version: {geocode_version}")
    ux.info("")
    
    # Display portfolios table
    portfolio_rows = []
    for port in base_portfolios:
        portfolio_rows.append([
            port.get('Portfolio', 'N/A'),
            port.get('Database', 'N/A'),
            geocode_version
        ])
    
    ux.table(portfolio_rows, headers=["Portfolio", "EDM", "Geocode Version"])
    
    ux.info("")
    ux.info("Each portfolio will become one job with configuration containing:")
    ux.info("  - Metadata from configuration file")
    ux.info("  - Portfolio-specific fields from Portfolios sheet")
    ux.info("  - geocode_version: The geocode version to use")
    
    step.log(f"Previewed GeoHaz batch: {len(base_portfolios)} base portfolios with geocode v{geocode_version}")
else:
    ux.info("GeoHaz batch not needed (no base portfolios or Geocode Version not specified)")

## 4g) Preview: Portfolio Mapping Batch

In [None]:
# Preview Portfolio Mapping jobs
ux.header("Portfolio Mapping Batch Preview")

if BatchType.PORTFOLIO_MAPPING in batch_types_to_create:
    ux.info(f"This batch will create {len(base_portfolios)} job(s), one for each base portfolio:")
    ux.info("")
    
    # Display portfolios table
    portfolio_rows = []
    for port in base_portfolios:
        portfolio_rows.append([
            port.get('Portfolio', 'N/A'),
            port.get('Database', 'N/A'),
            port.get('Import File', 'N/A')
        ])
    
    ux.table(portfolio_rows, headers=["Portfolio", "EDM", "Import File"])
    
    ux.info("")
    ux.info("Each portfolio will become one job with configuration containing:")
    ux.info("  - Metadata from configuration file")
    ux.info("  - Portfolio-specific fields from Portfolios sheet")
    ux.info("  - SQL script: 2b_Query_To_Create_Sub_Portfolios_{Import File}_RMS_BackEnd.sql")
    ux.info("")
    ux.info("Note: Portfolio Mapping executes SQL scripts locally (not submitted to Moody's)")
    
    step.log(f"Previewed Portfolio Mapping batch: {len(base_portfolios)} base portfolios")
else:
    ux.info("Portfolio Mapping batch not needed (no base portfolios in configuration)")

## 4h) Preview: Analysis Batch

In [None]:
# Preview Analysis jobs
ux.header("Analysis Batch Preview")

if BatchType.ANALYSIS in batch_types_to_create:
    ux.info(f"This batch will create {len(analyses)} job(s), one for each analysis:")
    ux.info("")
    
    # Display analysis table
    analysis_rows = []
    for analysis in analyses[:10]:  # Show first 10
        # Collect treaty names
        treaty_names = []
        for i in range(1, 6):
            treaty = analysis.get(f'Reinsurance Treaty {i}')
            if treaty:
                treaty_names.append(treaty)
        treaties_str = ', '.join(treaty_names) if treaty_names else 'None'
        
        # Collect tag names
        tag_names = []
        for i in range(1, 6):
            tag = analysis.get(f'Tag {i}')
            if tag:
                tag_names.append(tag)
        tags_str = ', '.join(tag_names) if tag_names else 'None'
        
        analysis_rows.append([
            analysis.get('Analysis Name', 'N/A'),
            analysis.get('Portfolio', 'N/A'),
            analysis.get('Database', 'N/A'),
            analysis.get('Analysis Profile', 'N/A')[:30] + '...' if len(analysis.get('Analysis Profile', '')) > 30 else analysis.get('Analysis Profile', 'N/A'),
        ])
    
    ux.table(analysis_rows, headers=["Analysis Name", "Portfolio", "EDM", "Analysis Profile"])
    
    if len(analyses) > 10:
        ux.info(f"... and {len(analyses) - 10} more analysis job(s)")
    
    ux.info("")
    ux.info("Each analysis will become one job with configuration containing:")
    ux.info("  - Metadata from configuration file")
    ux.info("  - Analysis-specific fields (Name, Portfolio, Database, Profiles, Treaties, Tags)")
    
    step.log(f"Previewed Analysis batch: {len(analyses)} analyses")
else:
    ux.info("Analysis batch not needed (no analyses in configuration)")

## 4i) Preview: Grouping Batch (Analysis-only Groups)

In [None]:
# Preview Grouping jobs (analysis-only groups)
ux.header("Grouping Batch Preview (Analysis-only Groups)")

if BatchType.GROUPING in batch_types_to_create:
    ux.info(f"This batch will create {len(analysis_only_groups)} job(s), one for each analysis-only group:")
    ux.info("These groups contain ONLY analysis names (no group references).")
    ux.info("")
    
    # Display groupings table
    grouping_rows = []
    for grouping in analysis_only_groups[:10]:  # Show first 10
        group_name = grouping.get('Group_Name', 'N/A')
        items = grouping.get('items', [])
        items_count = len(items)
        # Show first few items as preview
        items_preview = ', '.join(items[:3])
        if len(items) > 3:
            items_preview += f', ... (+{len(items) - 3} more)'
        
        grouping_rows.append([
            group_name,
            items_count,
            items_preview
        ])
    
    ux.table(grouping_rows, headers=["Group Name", "# Analyses", "Analyses (Preview)"])
    
    if len(analysis_only_groups) > 10:
        ux.info(f"... and {len(analysis_only_groups) - 10} more grouping job(s)")
    
    ux.info("")
    ux.info("Each grouping will become one job with configuration containing:")
    ux.info("  - Metadata from configuration file")
    ux.info("  - Group_Name: Name of the group")
    ux.info("  - items: List of analysis names to group together")
    
    step.log(f"Previewed Grouping batch: {len(analysis_only_groups)} analysis-only groups")
else:
    ux.info("Grouping batch not needed (no analysis-only groups in configuration)")

## 4j) Preview: Grouping Rollup Batch (Groups of Groups)

In [None]:
# Preview Grouping Rollup jobs (groups of groups)
ux.header("Grouping Rollup Batch Preview (Groups of Groups)")

if BatchType.GROUPING_ROLLUP in batch_types_to_create:
    ux.info(f"This batch will create {len(rollup_groups)} job(s), one for each rollup group:")
    ux.info("These groups contain references to OTHER GROUPS (not just analyses).")
    ux.warning("⚠ IMPORTANT: Grouping Rollup jobs can only run AFTER the Grouping batch completes.")
    ux.info("")
    
    # Build set of group names to identify which items are groups
    group_names_set = {g.get('Group_Name') for g in groupings if g.get('Group_Name')}
    
    # Display rollup groupings table
    grouping_rows = []
    for grouping in rollup_groups[:10]:  # Show first 10
        group_name = grouping.get('Group_Name', 'N/A')
        items = grouping.get('items', [])
        items_count = len(items)
        
        # Identify which items are groups vs analyses
        group_refs = [item for item in items if item in group_names_set]
        analysis_refs = [item for item in items if item not in group_names_set]
        
        # Show preview with group references marked
        items_preview = ', '.join(f"[{item}]" if item in group_names_set else item for item in items[:3])
        if len(items) > 3:
            items_preview += f', ... (+{len(items) - 3} more)'
        
        grouping_rows.append([
            group_name,
            len(group_refs),
            len(analysis_refs),
            items_preview
        ])
    
    ux.table(grouping_rows, headers=["Group Name", "# Group Refs", "# Analysis Refs", "Items (Preview, [groups] marked)"])
    
    if len(rollup_groups) > 10:
        ux.info(f"... and {len(rollup_groups) - 10} more rollup job(s)")
    
    ux.info("")
    ux.info("Each rollup group will become one job with configuration containing:")
    ux.info("  - Metadata from configuration file")
    ux.info("  - Group_Name: Name of the rollup group")
    ux.info("  - items: List of group names AND/OR analysis names to include")
    
    step.log(f"Previewed Grouping Rollup batch: {len(rollup_groups)} rollup groups")
else:
    ux.info("Grouping Rollup batch not needed (no groups of groups in configuration)")

## 4k) Preview: Export to RDM Batch

In [None]:
# Preview Export to RDM job
ux.header("Export to RDM Batch Preview")

if BatchType.EXPORT_TO_RDM in batch_types_to_create:
    rdm_name = metadata.get('Export RDM Name')
    analysis_names = [a.get('Analysis Name') for a in analyses if a.get('Analysis Name')]
    group_names = [g.get('Group_Name') for g in groupings if g.get('Group_Name')]
    
    ux.info(f"This batch will create 1 job to export all analyses and groups to RDM.")
    ux.info(f"Target RDM: {rdm_name}")
    ux.info(f"Server: databridge-1")
    ux.info("")
    
    # Display summary
    summary_rows = [
        ["Analyses to export", len(analysis_names)],
        ["Groups to export", len(group_names)],
        ["Total items", len(analysis_names) + len(group_names)]
    ]
    ux.table(summary_rows, headers=["Item Type", "Count"])
    
    # Show preview of items to export
    ux.info("")
    ux.info("Analyses to export (first 10):")
    for name in analysis_names[:10]:
        ux.info(f"  - {name}")
    if len(analysis_names) > 10:
        ux.info(f"  ... and {len(analysis_names) - 10} more")
    
    ux.info("")
    ux.info("Groups to export (first 10):")
    for name in group_names[:10]:
        ux.info(f"  - {name}")
    if len(group_names) > 10:
        ux.info(f"  ... and {len(group_names) - 10} more")
    
    ux.info("")
    ux.info("Job configuration will contain:")
    ux.info("  - Metadata from configuration file")
    ux.info("  - rdm_name: Target RDM database name")
    ux.info("  - server_name: databridge-1")
    ux.info("  - analysis_names: Combined list of all analysis and group names")
    ux.warning("⚠ IMPORTANT: Export to RDM can only run AFTER all Grouping batches complete.")
    
    step.log(f"Previewed Export to RDM batch: {len(analysis_names)} analyses + {len(group_names)} groups to '{rdm_name}'")
else:
    ux.info("Export to RDM batch not needed (Export RDM Name not specified or no analyses/groups)")

## 5) Create Batches

In [None]:
# Create batches for identified batch types
ux.header("Batch Creation")

# Confirm with user
batch_summary = ", ".join(batch_types_to_create)
ux.info(f"Ready to create batches: {batch_summary}")
proceed = ux.yes_no("Create these batches?")

if not proceed:
    ux.info("Batch creation cancelled by user")
    step.log("User cancelled batch creation")
    raise SystemExit("User cancelled batch creation")

# Create batches
created_batches = {}

try:
    for batch_type in batch_types_to_create:
        ux.subheader(f"Creating batch: {batch_type}")
        
        # Create batch (this will create jobs atomically)
        batch_id = create_batch(
            batch_type=batch_type,
            configuration_id=config_id,
            step_id=step.step_id
        )
        
        # Store batch ID (convert to int to avoid numpy types)
        created_batches[batch_type] = int(batch_id)
        
        # Get job count for this batch
        job_count_result = execute_query(
            "SELECT COUNT(*) as count FROM irp_job WHERE batch_id = %s",
            (batch_id,)
        )
        job_count = int(job_count_result.iloc[0]['count'])
        
        ux.success(f"✓ Batch created: ID={batch_id}")
        ux.info(f"  Jobs created: {job_count}")
        
        step.log(f"Created batch '{batch_type}': ID={batch_id}, Jobs={job_count}")
    
    ux.success(f"\n✓ All batches created successfully ({len(created_batches)} total)")
    
except Exception as e:
    ux.error(f"✗ Batch creation failed: {str(e)}")
    step.fail(f"Batch creation failed: {str(e)}")
    raise

## 6) Display Batch Summary

In [None]:
# Display summary of all created batches
ux.header("Batch Summary")

try:
    # Get batch details
    batch_ids = list(created_batches.values())
    
    if batch_ids:
        # Build query to get all batches
        placeholders = ', '.join(['%s'] * len(batch_ids))
        batch_query = f"""
            SELECT 
                b.id,
                b.batch_type,
                b.status,
                b.created_ts,
                COUNT(j.id) as job_count
            FROM irp_batch b
            LEFT JOIN irp_job j ON b.id = j.batch_id
            WHERE b.id IN ({placeholders})
            GROUP BY b.id, b.batch_type, b.status, b.created_ts
            ORDER BY b.created_ts
        """
        
        batch_results = execute_query(batch_query, tuple(batch_ids))
        
        # Display batch information
        batch_rows = []
        total_jobs = 0
        
        for _, batch in batch_results.iterrows():
            batch_rows.append([
                batch['batch_type'],
                batch['id'],
                batch['status'],
                int(batch['job_count']),
                batch['created_ts'].strftime('%Y-%m-%d %H:%M:%S')
            ])
            total_jobs += int(batch['job_count'])
        
        ux.table(batch_rows, headers=["Batch Type", "Batch ID", "Status", "Jobs", "Created"])
        
        ux.info(f"\nTotal batches: {len(batch_ids)}")
        ux.info(f"Total jobs: {total_jobs}")
        
        step.log(f"Batch summary: {len(batch_ids)} batches, {total_jobs} total jobs")
    
except Exception as e:
    ux.error(f"✗ Failed to display batch summary: {str(e)}")
    # Don't fail step, this is just display
    step.log(f"Warning: Failed to display batch summary: {str(e)}", level="WARNING")

## 7) Preview Job Configurations

In [None]:
# Preview job configurations for created batches
ux.header("Job Configuration Preview")

try:
    # Preview EDM Creation jobs
    if 'EDM Creation' in created_batches:
        edm_batch_id = created_batches['EDM Creation']
        
        ux.subheader("EDM Creation Jobs (first 5)")
        
        # Get job configurations
        job_config_query = """
            SELECT 
                jc.id,
                jc.job_configuration_data,
                j.id as job_id,
                j.status
            FROM irp_job_configuration jc
            INNER JOIN irp_job j ON jc.id = j.job_configuration_id
            WHERE jc.batch_id = %s
            LIMIT 5
        """
        
        job_configs = execute_query(job_config_query, (edm_batch_id,))
        
        if not job_configs.empty:
            # Display job configuration details
            job_rows = []
            for _, job_config in job_configs.iterrows():
                config_data = job_config['job_configuration_data']
                job_rows.append([
                    job_config['job_id'],
                    config_data.get('Database', 'N/A'),
                    job_config['status']
                ])
            
            ux.table(job_rows, headers=["Job ID", "Database", "Status"])
            
            ux.info("\nEach job configuration contains:")
            ux.info("  - Full metadata from configuration file")
            ux.info("  - Database-specific fields from Databases sheet")
            ux.info("  - Additional fields: Description, Connection details, etc.")
        else:
            ux.warning("No job configurations found")
    
    # Preview EDM DB Upgrade jobs
    if 'EDM DB Upgrade' in created_batches:
        upgrade_batch_id = created_batches['EDM DB Upgrade']
        
        ux.subheader("EDM DB Upgrade Jobs (first 5)")
        
        # Get job configurations
        job_config_query = """
            SELECT 
                jc.id,
                jc.job_configuration_data,
                j.id as job_id,
                j.status
            FROM irp_job_configuration jc
            INNER JOIN irp_job j ON jc.id = j.job_configuration_id
            WHERE jc.batch_id = %s
            LIMIT 5
        """
        
        job_configs = execute_query(job_config_query, (upgrade_batch_id,))
        
        if not job_configs.empty:
            # Display job configuration details
            job_rows = []
            for _, job_config in job_configs.iterrows():
                config_data = job_config['job_configuration_data']
                job_rows.append([
                    job_config['job_id'],
                    config_data.get('Database', 'N/A'),
                    config_data.get('target_edm_version', 'N/A'),
                    job_config['status']
                ])
            
            ux.table(job_rows, headers=["Job ID", "Database", "Target Version", "Status"])
            
            ux.info("\nEach job configuration contains:")
            ux.info("  - Full metadata from configuration file")
            ux.info("  - Database-specific fields from Databases sheet")
            ux.info("  - target_edm_version: The version to upgrade to")
        else:
            ux.warning("No job configurations found")
    
    # Preview Portfolio Creation jobs
    if 'Portfolio Creation' in created_batches:
        portfolio_batch_id = created_batches['Portfolio Creation']
        
        ux.subheader("Portfolio Creation Jobs (first 5)")
        
        # Get job configurations
        job_config_query = """
            SELECT 
                jc.id,
                jc.job_configuration_data,
                j.id as job_id,
                j.status
            FROM irp_job_configuration jc
            INNER JOIN irp_job j ON jc.id = j.job_configuration_id
            WHERE jc.batch_id = %s
            LIMIT 5
        """
        
        job_configs = execute_query(job_config_query, (portfolio_batch_id,))
        
        if not job_configs.empty:
            # Display job configuration details
            job_rows = []
            for _, job_config in job_configs.iterrows():
                config_data = job_config['job_configuration_data']
                job_rows.append([
                    job_config['job_id'],
                    config_data.get('Portfolio', 'N/A'),
                    config_data.get('Database', 'N/A'),
                    job_config['status']
                ])
            
            ux.table(job_rows, headers=["Job ID", "Portfolio", "EDM", "Status"])
            
            ux.info("\nEach job configuration contains:")
            ux.info("  - Full metadata from configuration file")
            ux.info("  - Portfolio-specific fields from Portfolios sheet")
            ux.info("  - Additional fields: Portfolio Name, Database, Base Portfolio flag, etc.")
        else:
            ux.warning("No job configurations found")
    
    # Preview MRI Import jobs
    if 'MRI Import' in created_batches:
        mri_batch_id = created_batches['MRI Import']
        
        ux.subheader("MRI Import Jobs (first 5)")
        
        # Get job configurations
        job_config_query = """
            SELECT 
                jc.id,
                jc.job_configuration_data,
                j.id as job_id,
                j.status
            FROM irp_job_configuration jc
            INNER JOIN irp_job j ON jc.id = j.job_configuration_id
            WHERE jc.batch_id = %s
            LIMIT 5
        """
        
        job_configs = execute_query(job_config_query, (mri_batch_id,))
        
        if not job_configs.empty:
            # Display job configuration details
            job_rows = []
            for _, job_config in job_configs.iterrows():
                config_data = job_config['job_configuration_data']
                job_rows.append([
                    job_config['job_id'],
                    config_data.get('Portfolio', 'N/A'),
                    config_data.get('Database', 'N/A'),
                    config_data.get('accounts_import_file', 'N/A'),
                    config_data.get('locations_import_file', 'N/A'),
                    job_config['status']
                ])
            
            ux.table(job_rows, headers=["Job ID", "Portfolio", "EDM", "Accounts File", "Locations File", "Status"])
            
            ux.info("\nEach job configuration contains:")
            ux.info("  - Full metadata from configuration file")
            ux.info("  - Portfolio-specific fields from Portfolios sheet")
            ux.info("  - CSV import filenames: accounts_import_file, locations_import_file")
            ux.info("  - Additional fields: Portfolio Name, Database, Import File, etc.")
        else:
            ux.warning("No job configurations found")
    
    # Preview GeoHaz jobs
    if 'GeoHaz' in created_batches:
        geohaz_batch_id = created_batches['GeoHaz']
        
        ux.subheader("GeoHaz Jobs (first 5)")
        
        # Get job configurations
        job_config_query = """
            SELECT 
                jc.id,
                jc.job_configuration_data,
                j.id as job_id,
                j.status
            FROM irp_job_configuration jc
            INNER JOIN irp_job j ON jc.id = j.job_configuration_id
            WHERE jc.batch_id = %s
            LIMIT 5
        """
        
        job_configs = execute_query(job_config_query, (geohaz_batch_id,))
        
        if not job_configs.empty:
            # Display job configuration details
            job_rows = []
            for _, job_config in job_configs.iterrows():
                config_data = job_config['job_configuration_data']
                job_rows.append([
                    job_config['job_id'],
                    config_data.get('Portfolio', 'N/A'),
                    config_data.get('Database', 'N/A'),
                    config_data.get('geocode_version', 'N/A'),
                    job_config['status']
                ])
            
            ux.table(job_rows, headers=["Job ID", "Portfolio", "EDM", "Geocode Version", "Status"])
            
            ux.info("\nEach job configuration contains:")
            ux.info("  - Full metadata from configuration file")
            ux.info("  - Portfolio-specific fields from Portfolios sheet")
            ux.info("  - geocode_version: The geocode version to use")
        else:
            ux.warning("No job configurations found")
    
    # Preview Create Reinsurance Treaties jobs
    if 'Create Reinsurance Treaties' in created_batches:
        treaty_batch_id = created_batches['Create Reinsurance Treaties']
        
        ux.subheader("Create Reinsurance Treaties Jobs (first 5)")
        
        # Get job configurations
        job_config_query = """
            SELECT 
                jc.id,
                jc.job_configuration_data,
                j.id as job_id,
                j.status
            FROM irp_job_configuration jc
            INNER JOIN irp_job j ON jc.id = j.job_configuration_id
            WHERE jc.batch_id = %s
            LIMIT 5
        """
        
        job_configs = execute_query(job_config_query, (treaty_batch_id,))
        
        if not job_configs.empty:
            # Display job configuration details
            job_rows = []
            for _, job_config in job_configs.iterrows():
                config_data = job_config['job_configuration_data']
                job_rows.append([
                    job_config['job_id'],
                    config_data.get('Treaty Name', 'N/A'),
                    config_data.get('Database', 'N/A'),
                    job_config['status']
                ])
            
            ux.table(job_rows, headers=["Job ID", "Treaty Name", "EDM", "Status"])
            
            ux.info("\nEach job configuration contains:")
            ux.info("  - Full metadata from configuration file")
            ux.info("  - Database (EDM) where the treaty will be created")
            ux.info("  - Treaty-specific fields from Reinsurance Treaties sheet")
        else:
            ux.warning("No job configurations found")
    
    # Preview Analysis jobs
    if 'Analysis' in created_batches:
        analysis_batch_id = created_batches['Analysis']
        
        ux.subheader("Analysis Jobs (first 5)")
        
        # Get job configurations
        job_config_query = """
            SELECT 
                jc.id,
                jc.job_configuration_data,
                j.id as job_id,
                j.status
            FROM irp_job_configuration jc
            INNER JOIN irp_job j ON jc.id = j.job_configuration_id
            WHERE jc.batch_id = %s
            LIMIT 5
        """
        
        job_configs = execute_query(job_config_query, (analysis_batch_id,))
        
        if not job_configs.empty:
            # Display job configuration details
            job_rows = []
            for _, job_config in job_configs.iterrows():
                config_data = job_config['job_configuration_data']
                job_rows.append([
                    job_config['job_id'],
                    config_data.get('Analysis Name', 'N/A'),
                    config_data.get('Portfolio', 'N/A'),
                    config_data.get('Database', 'N/A'),
                    config_data.get('Analysis Profile', 'N/A')[:25] + '...' if len(config_data.get('Analysis Profile', '')) > 25 else config_data.get('Analysis Profile', 'N/A'),
                    job_config['status']
                ])
            
            ux.table(job_rows, headers=["Job ID", "Analysis Name", "Portfolio", "EDM", "Analysis Profile", "Status"])
            
            ux.info("\nEach job configuration contains:")
            ux.info("  - Full metadata from configuration file")
            ux.info("  - Analysis-specific fields from Analysis Table sheet")
            ux.info("  - Analysis Profile, Output Profile, Event Rate, Treaties, Tags")
        else:
            ux.warning("No job configurations found")

    # Preview Grouping jobs (analysis-only)
    if 'Grouping' in created_batches:
        grouping_batch_id = created_batches['Grouping']
        
        ux.subheader("Grouping Jobs - Analysis-only (first 5)")
        
        # Get job configurations
        job_config_query = """
            SELECT 
                jc.id,
                jc.job_configuration_data,
                j.id as job_id,
                j.status
            FROM irp_job_configuration jc
            INNER JOIN irp_job j ON jc.id = j.job_configuration_id
            WHERE jc.batch_id = %s
            LIMIT 5
        """
        
        job_configs = execute_query(job_config_query, (grouping_batch_id,))
        
        if not job_configs.empty:
            # Display job configuration details
            job_rows = []
            for _, job_config in job_configs.iterrows():
                config_data = job_config['job_configuration_data']
                items = config_data.get('items', [])
                items_count = len(items)
                # Show first few items as preview
                items_preview = ', '.join(items[:2])
                if len(items) > 2:
                    items_preview += f', ... (+{len(items) - 2} more)'
                
                job_rows.append([
                    job_config['job_id'],
                    config_data.get('Group_Name', 'N/A'),
                    items_count,
                    items_preview,
                    job_config['status']
                ])
            
            ux.table(job_rows, headers=["Job ID", "Group Name", "# Analyses", "Analyses (Preview)", "Status"])
            
            ux.info("\nEach job configuration contains:")
            ux.info("  - Full metadata from configuration file")
            ux.info("  - Group_Name: Name of the group")
            ux.info("  - items: List of analysis names to group together")
        else:
            ux.warning("No job configurations found")

    # Preview Grouping Rollup jobs (groups of groups)
    if 'Grouping Rollup' in created_batches:
        rollup_batch_id = created_batches['Grouping Rollup']
        
        ux.subheader("Grouping Rollup Jobs - Groups of Groups (first 5)")
        
        # Get job configurations
        job_config_query = """
            SELECT 
                jc.id,
                jc.job_configuration_data,
                j.id as job_id,
                j.status
            FROM irp_job_configuration jc
            INNER JOIN irp_job j ON jc.id = j.job_configuration_id
            WHERE jc.batch_id = %s
            LIMIT 5
        """
        
        job_configs = execute_query(job_config_query, (rollup_batch_id,))
        
        if not job_configs.empty:
            # Display job configuration details
            job_rows = []
            for _, job_config in job_configs.iterrows():
                config_data = job_config['job_configuration_data']
                items = config_data.get('items', [])
                items_count = len(items)
                # Show first few items as preview
                items_preview = ', '.join(items[:2])
                if len(items) > 2:
                    items_preview += f', ... (+{len(items) - 2} more)'
                
                job_rows.append([
                    job_config['job_id'],
                    config_data.get('Group_Name', 'N/A'),
                    items_count,
                    items_preview,
                    job_config['status']
                ])
            
            ux.table(job_rows, headers=["Job ID", "Group Name", "# Items", "Items (Preview)", "Status"])
            
            ux.info("\nEach job configuration contains:")
            ux.info("  - Full metadata from configuration file")
            ux.info("  - Group_Name: Name of the rollup group")
            ux.info("  - items: List of group names AND/OR analysis names")
            ux.warning("⚠ These jobs require Grouping batch to complete first")
        else:
            ux.warning("No job configurations found")

    # Preview Export to RDM jobs
    if 'Export to RDM' in created_batches:
        export_batch_id = created_batches['Export to RDM']
        
        ux.subheader("Export to RDM Jobs")
        
        # Get job configurations
        job_config_query = """
            SELECT 
                jc.id,
                jc.job_configuration_data,
                j.id as job_id,
                j.status
            FROM irp_job_configuration jc
            INNER JOIN irp_job j ON jc.id = j.job_configuration_id
            WHERE jc.batch_id = %s
            LIMIT 5
        """
        
        job_configs = execute_query(job_config_query, (export_batch_id,))
        
        if not job_configs.empty:
            # Display job configuration details
            job_rows = []
            for _, job_config in job_configs.iterrows():
                config_data = job_config['job_configuration_data']
                analysis_names_list = config_data.get('analysis_names', [])
                analysis_count = config_data.get('analysis_count', 0)
                group_count = config_data.get('group_count', 0)
                
                job_rows.append([
                    job_config['job_id'],
                    config_data.get('rdm_name', 'N/A'),
                    config_data.get('server_name', 'N/A'),
                    analysis_count,
                    group_count,
                    len(analysis_names_list),
                    job_config['status']
                ])
            
            ux.table(job_rows, headers=["Job ID", "RDM Name", "Server", "# Analyses", "# Groups", "Total Items", "Status"])
            
            ux.info("\nJob configuration contains:")
            ux.info("  - Full metadata from configuration file")
            ux.info("  - rdm_name: Target RDM database name")
            ux.info("  - server_name: Database server (databridge-1)")
            ux.info("  - analysis_names: Combined list of all analysis and group names to export")
            ux.warning("⚠ This job requires all Grouping batches to complete first")
        else:
            ux.warning("No job configurations found")
    
    step.log("Job configuration preview displayed")
    
except Exception as e:
    ux.error(f"✗ Failed to preview job configurations: {str(e)}")
    # Don't fail step, this is just display
    step.log(f"Warning: Failed to preview jobs: {str(e)}", level="WARNING")

## 8) Complete Step Execution

In [None]:
# Complete step execution
ux.header("Step Completion")

try:
    # Prepare output data
    output_data = {
        'configuration_id': config_id,
        'batches': created_batches,  # {batch_type: batch_id}
        'batch_types_created': batch_types_to_create,
        'total_job_count': total_jobs
    }
    
    # Complete the step
    step.complete(output_data)

    ux.success("\n" + "="*60)
    ux.success("✓ BATCHES CREATED SUCCESSFULLY")
    ux.success("="*60)
    ux.info(f"\nCreated {len(created_batches)} batch(es) with {total_jobs} total job(s)")
    ux.info("Batches are in INITIATED status and ready for submission")
    ux.info("\nNext: Stage 02 will handle batch submission and job monitoring")

except Exception as e:
    ux.error(f"✗ Step completion failed: {str(e)}")
    step.fail(str(e))
    raise