# Step 02: Verify RDM Export

This notebook verifies that all expected analyses and groups were successfully exported to the RDM.

**Tasks:**
- Retrieve expected analyses and groups from configuration
- Query the RDM database to get actual exported items
- Compare expected vs actual and report discrepancies

**Prerequisites:**
- Stage_06/Step_01 must be complete (RDM export job finished)
- RDM export batch must have status COMPLETED

## 1) Setup

In [None]:
%load_ext autoreload
%autoreload 2

from helpers.notebook_setup import initialize_notebook_context
from helpers import ux
from helpers.batch import read_batch, get_batch_jobs
from helpers.database import execute_query
from helpers.sqlserver import execute_query_from_file
from helpers.irp_integration import IRPClient
from helpers.constants import BatchType, BatchStatus

In [None]:
# Initialize notebook context and step tracking
context, step = initialize_notebook_context('Step_02_Verify_RDM_Export.ipynb', allow_rerun=True)

# Display context
ux.header("Verify RDM Export")
ux.info(f"Cycle: {context.cycle_name}")
ux.info(f"Stage: {context.stage_name}")
ux.info(f"Step: {context.step_name}")
ux.success(f"Step tracking initialized for '{context.step_name}'")

## 2) Retrieve Export Batch and Verify Completion

In [None]:
# Retrieve Export to RDM batch from Stage_01/Step_03
ux.subheader("Retrieve Export to RDM Batch")

# Query for Stage_01/Step_03 step run
query = """
    SELECT sr.id, sr.step_id, sr.run_num, sr.output_data, sr.completed_ts
    FROM irp_step_run sr
    INNER JOIN irp_step s ON sr.step_id = s.id
    INNER JOIN irp_stage sg ON s.stage_id = sg.id
    INNER JOIN irp_cycle c ON sg.cycle_id = c.id
    WHERE c.cycle_name = %s
      AND sg.stage_num = 1
      AND s.step_num = 3
      AND sr.status = 'COMPLETED'
    ORDER BY sr.completed_ts DESC
    LIMIT 1
"""

result = execute_query(query, (context.cycle_name,))

if result.empty:
    raise ValueError("Batch creation step not found - please complete Stage_01/Step_03 first")

output_data = result.iloc[0]['output_data']
batches = output_data.get('batches', {})

if BatchType.EXPORT_TO_RDM not in batches:
    raise ValueError(f"Export to RDM batch not found. Available: {list(batches.keys())}")

export_batch_id = int(batches[BatchType.EXPORT_TO_RDM])

ux.success(f"Retrieved Export to RDM batch: ID={export_batch_id}")
step.log(f"Retrieved Export to RDM batch: ID={export_batch_id}")

In [None]:
# Verify batch is complete
ux.subheader("Verify Batch Status")

batch = read_batch(export_batch_id)
batch_status = batch['status']

batch_info = [
    ["Batch ID", batch['id']],
    ["Batch Type", batch['batch_type']],
    ["Status", batch_status],
    ["Created", batch['created_ts'].strftime('%Y-%m-%d %H:%M:%S')]
]
ux.table(batch_info, headers=["Property", "Value"])

if batch_status != BatchStatus.COMPLETED:
    ux.warning(f"\nBatch is not yet COMPLETED (status: {batch_status})")
    ux.info("Please wait for the RDM export batch to complete before running this verification.")
    raise ValueError(f"Batch status must be COMPLETED, got: {batch_status}")

ux.success("Batch status is COMPLETED - proceeding with verification")

## 3) Get Expected Analyses and Groups from Configuration

In [None]:
# Get expected items from job configurations
ux.subheader("Expected Items from Configuration")

jobs = get_batch_jobs(export_batch_id)

# Collect all expected analysis names from all jobs
expected_names = set()
rdm_name = None
total_analysis_count = 0
total_group_count = 0

for job in jobs:
    config_query = "SELECT job_configuration_data FROM irp_job_configuration WHERE id = %s"
    config_result = execute_query(config_query, (job['job_configuration_id'],))
    
    if not config_result.empty:
        config = config_result.iloc[0]['job_configuration_data']
        
        # Get RDM name (should be same for all jobs)
        if rdm_name is None:
            rdm_name = config.get('rdm_name')
        
        # Collect analysis names
        analysis_names = config.get('analysis_names', [])
        expected_names.update(analysis_names)
        
        total_analysis_count += config.get('analysis_count', 0)
        total_group_count += config.get('group_count', 0)

ux.info(f"RDM Name: {rdm_name}")
ux.info(f"Total expected items: {len(expected_names)}")
ux.info(f"  - Analyses: {total_analysis_count}")
ux.info(f"  - Groups: {total_group_count}")

step.log(f"Expected {len(expected_names)} items in RDM '{rdm_name}'")

## 4) Query RDM Database for Actual Exports

In [None]:
# Query the RDM database to get actual exported items
ux.subheader("Query RDM Database")

# Get the full RDM database name from Moody's API
irp_client = IRPClient()
rdm_full_name = irp_client.rdm.get_rdm_database_full_name(rdm_name)

ux.info(f"RDM Name (from config): {rdm_name}")
ux.info(f"RDM Full Name (from Moody's): {rdm_full_name}")

try:
    # Execute the RDM summary query using the full database name
    rdm_results = execute_query_from_file(
        'data_export/rdm_analysis_summary.sql',
        params={'RDM_NAME': rdm_full_name},
        connection='DATABRIDGE'
    )
    
    if rdm_results and len(rdm_results) > 0:
        rdm_df = rdm_results[0]
        ux.success(f"Retrieved {len(rdm_df)} items from RDM")
    else:
        rdm_df = None
        ux.warning("No data returned from RDM query")

except Exception as e:
    ux.error(f"Failed to query RDM: {str(e)}")
    raise

In [None]:
# Parse RDM results into analyses and groups
ux.subheader("RDM Contents Summary")

if rdm_df is not None and not rdm_df.empty:
    # Get actual names from RDM
    actual_names = set(rdm_df['NAME'].tolist())
    
    # Separate analyses and groups based on ISGROUP flag
    actual_analyses = set(rdm_df[rdm_df['ISGROUP'] == 0]['NAME'].tolist())
    actual_groups = set(rdm_df[rdm_df['ISGROUP'] == 1]['NAME'].tolist())
    
    ux.info(f"Total items in RDM: {len(actual_names)}")
    ux.info(f"  - Analyses: {len(actual_analyses)}")
    ux.info(f"  - Groups: {len(actual_groups)}")
    
    step.log(f"Found {len(actual_names)} items in RDM ({len(actual_analyses)} analyses, {len(actual_groups)} groups)")
else:
    actual_names = set()
    actual_analyses = set()
    actual_groups = set()
    ux.warning("No items found in RDM")

## 5) Compare Expected vs Actual

In [None]:
# Compare expected vs actual
ux.subheader("Comparison Results")

# Find discrepancies
missing_from_rdm = expected_names - actual_names
extra_in_rdm = actual_names - expected_names
matched = expected_names & actual_names

# Summary table
comparison_summary = [
    ["Expected items", len(expected_names)],
    ["Actual items in RDM", len(actual_names)],
    ["Matched", len(matched)],
    ["Missing from RDM", len(missing_from_rdm)],
    ["Extra in RDM (unexpected)", len(extra_in_rdm)]
]
ux.table(comparison_summary, headers=["Metric", "Count"])

# Determine overall status
has_discrepancies = len(missing_from_rdm) > 0 or len(extra_in_rdm) > 0

if has_discrepancies:
    ux.warning("\nDiscrepancies detected!")
else:
    ux.success("\nAll expected items found in RDM - no discrepancies!")

In [None]:
# Show missing items (expected but not in RDM)
if missing_from_rdm:
    ux.subheader("Missing from RDM")
    ux.error(f"The following {len(missing_from_rdm)} item(s) were expected but NOT found in the RDM:")
    
    for name in sorted(missing_from_rdm):
        ux.error(f"  - {name}")
    
    ux.info("\nPossible causes:")
    ux.info("  - Export job failed for these items")
    ux.info("  - Analysis/group name mismatch between config and Moody's")
    ux.info("  - Items were deleted from Moody's before export")
else:
    ux.success("No missing items - all expected items are in the RDM")

In [None]:
# Show extra items (in RDM but not expected)
if extra_in_rdm:
    ux.subheader("Extra Items in RDM")
    ux.warning(f"The following {len(extra_in_rdm)} item(s) are in the RDM but were NOT in the configuration:")
    
    for name in sorted(extra_in_rdm):
        ux.warning(f"  - {name}")
    
    ux.info("\nPossible causes:")
    ux.info("  - RDM was appended to from a previous export")
    ux.info("  - Additional items were manually exported")
    ux.info("  - Configuration was modified after batch creation")
else:
    ux.success("No extra items - RDM contains only expected items")

## 6) Complete Step Execution

In [None]:
# Complete step execution
ux.header("Step Completion")

# Prepare output data
output_data = {
    'rdm_name': rdm_name,
    'rdm_full_name': rdm_full_name,
    'expected_count': len(expected_names),
    'actual_count': len(actual_names),
    'matched_count': len(matched),
    'missing_count': len(missing_from_rdm),
    'extra_count': len(extra_in_rdm),
    'missing_items': sorted(list(missing_from_rdm)),
    'extra_items': sorted(list(extra_in_rdm)),
    'has_discrepancies': has_discrepancies,
    'actual_analyses': len(actual_analyses),
    'actual_groups': len(actual_groups)
}

# Complete the step
step.complete(output_data)

# Send Teams notification for milestone completion
import os
from helpers.teams_notification import TeamsNotificationClient
from helpers.database import get_current_schema
teams = TeamsNotificationClient()

# Build action buttons with notebook link and dashboard
actions = []
base_url = os.environ.get('TEAMS_DEFAULT_JUPYTERLAB_URL', '')
if base_url:
    notebook_path = str(context.notebook_path)
    if 'workflows' in notebook_path:
        rel_path = notebook_path.split('workflows')[-1].lstrip('/\\')
        notebook_url = f"{base_url.rstrip('/')}/lab/tree/workspace/workflows/{rel_path}"
        actions.append({"title": "Open Notebook", "url": notebook_url})

dashboard_url = os.environ.get('TEAMS_DEFAULT_DASHBOARD_URL', '')
if dashboard_url:
    schema = get_current_schema()
    cycle_dashboard_url = f"{dashboard_url.rstrip('/')}/{schema}/cycle/{context.cycle_name}"
    actions.append({"title": "View Cycle Dashboard", "url": cycle_dashboard_url})

# Build summary message
summary_parts = [
    f"**RDM:** {rdm_full_name}",
    f"**Expected:** {len(expected_names)} | **Actual:** {len(actual_names)} | **Matched:** {len(matched)}",
]

if has_discrepancies:
    if missing_from_rdm:
        summary_parts.append(f"**Missing:** {len(missing_from_rdm)} item(s)")
    if extra_in_rdm:
        summary_parts.append(f"**Extra:** {len(extra_in_rdm)} item(s)")

# Send warning if there are discrepancies, otherwise success
if has_discrepancies:
    teams.send_warning(
        title=f"[{context.cycle_name}] RDM Export Verification - Discrepancies Found",
        message=f"**Cycle:** {context.cycle_name}\n"
                f"**Stage:** {context.stage_name}\n"
                f"**Step:** {context.step_name}\n\n" +
                "\n".join(summary_parts),
        actions=actions if actions else None
    )
else:
    teams.send_success(
        title=f"[{context.cycle_name}] RDM Export Verification Completed",
        message=f"**Cycle:** {context.cycle_name}\n"
                f"**Stage:** {context.stage_name}\n"
                f"**Step:** {context.step_name}\n\n" +
                "\n".join(summary_parts) + "\n\nâœ“ All expected items found in RDM",
        actions=actions if actions else None
    )

ux.success("\n" + "="*60)
if has_discrepancies:
    ux.warning("RDM EXPORT VERIFICATION COMPLETED WITH DISCREPANCIES")
else:
    ux.success("RDM EXPORT VERIFICATION COMPLETED SUCCESSFULLY")
ux.success("="*60)

ux.info(f"\nRDM: {rdm_full_name}")
ux.info(f"Expected: {len(expected_names)} | Actual: {len(actual_names)} | Matched: {len(matched)}")

if has_discrepancies:
    if missing_from_rdm:
        ux.error(f"Missing: {len(missing_from_rdm)} item(s)")
    if extra_in_rdm:
        ux.warning(f"Extra: {len(extra_in_rdm)} item(s)")