# Step 03: Grouping Summary

This notebook provides a summary of all grouping jobs submitted in this stage.

**Tasks:**
- Retrieve all grouping batch results
- Display summary statistics
- Show job completion status

## 1) Setup

In [None]:
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path

from helpers.notebook_setup import initialize_notebook_context
from helpers import ux
from helpers.batch import read_batch, get_batch_jobs
from helpers.database import execute_query
from helpers.constants import BatchType, BatchStatus, JobStatus

In [None]:
# Initialize notebook context and step tracking
context, step = initialize_notebook_context('Step_03_Grouping_Summary.ipynb')

# Display context
ux.header("Grouping Summary")
ux.info(f"Cycle: {context.cycle_name}")
ux.info(f"Stage: {context.stage_name}")
ux.info(f"Step: {context.step_name}")
ux.success(f"Step tracking initialized for '{context.step_name}'")

## 2) Retrieve Batch Information

In [None]:
# Query for Stage_01/Step_03 step run to get batch IDs
ux.subheader("Retrieve Batch Information")

query = """
    SELECT sr.id, sr.step_id, sr.run_num, sr.output_data, sr.completed_ts
    FROM irp_step_run sr
    INNER JOIN irp_step s ON sr.step_id = s.id
    INNER JOIN irp_stage sg ON s.stage_id = sg.id
    INNER JOIN irp_cycle c ON sg.cycle_id = c.id
    WHERE c.cycle_name = %s
      AND sg.stage_num = 1
      AND s.step_num = 3
      AND sr.status = 'COMPLETED'
    ORDER BY sr.completed_ts DESC
    LIMIT 1
"""

result = execute_query(query, (context.cycle_name,))

if result.empty:
    raise ValueError("Batch creation step not found - please complete Stage_01/Step_03 first")

output_data = result.iloc[0]['output_data']
batches = output_data.get('batches', {})

# Get grouping batch IDs
grouping_batch_ids = []
if BatchType.GROUPING in batches:
    grouping_batch_ids.append(int(batches[BatchType.GROUPING]))
if BatchType.GROUPING_ROLLUP in batches:
    grouping_batch_ids.append(int(batches[BatchType.GROUPING_ROLLUP]))

ux.info(f"Found {len(grouping_batch_ids)} grouping batch(es)")
step.log(f"Retrieved {len(grouping_batch_ids)} grouping batch IDs")

## 3) Grouping Summary

In [None]:
# Summarize all grouping batches
ux.subheader("Grouping Jobs Summary")

total_jobs = 0
total_finished = 0
total_failed = 0
total_other = 0
all_jobs = []

for batch_id in grouping_batch_ids:
    batch = read_batch(batch_id)
    jobs = get_batch_jobs(batch_id)
    
    for job in jobs:
        if job['skipped']:
            continue
        total_jobs += 1
        if job['status'] == JobStatus.FINISHED:
            total_finished += 1
        elif job['status'] == JobStatus.FAILED:
            total_failed += 1
        else:
            total_other += 1
        
        # Get job config for display
        config_query = "SELECT job_configuration_data FROM irp_job_configuration WHERE id = %s"
        config_result = execute_query(config_query, (job['job_configuration_id'],))
        if not config_result.empty:
            config = config_result.iloc[0]['job_configuration_data']
            group_name = config.get('Group_Name', 'N/A')
            items_count = len(config.get('items', []))
        else:
            group_name = 'N/A'
            items_count = 0
        
        all_jobs.append({
            'job_id': job['id'],
            'group_name': group_name,
            'items_count': items_count,
            'status': job['status']
        })

# Display summary
summary_rows = [
    ["Total Jobs", total_jobs],
    ["Finished", total_finished],
    ["Failed", total_failed],
    ["In Progress", total_other]
]
ux.table(summary_rows, headers=["Metric", "Count"])

# Display job details
if all_jobs:
    ux.info("\nJob Details:")
    job_rows = [[j['job_id'], j['group_name'], j['items_count'], j['status']] for j in all_jobs]
    ux.table(job_rows, headers=["Job ID", "Group Name", "# Items", "Status"])

# Status message
ux.info("")
if total_failed > 0:
    ux.warning(f"{total_failed} job(s) failed - review failed jobs above")
elif total_other > 0:
    ux.info(f"{total_other} job(s) still in progress")
elif total_jobs == 0:
    ux.info("No grouping jobs in this cycle")
else:
    ux.success(f"All {total_finished} grouping job(s) completed successfully")

## 4) Complete Step Execution

In [None]:
# Complete step execution
ux.header("Step Completion")

# Prepare output data
output_data = {
    'total_jobs': total_jobs,
    'finished': total_finished,
    'failed': total_failed,
    'in_progress': total_other
}

# Complete the step
step.complete(output_data)

# Send Teams notification for milestone completion
import os
from helpers.teams_notification import TeamsNotificationClient
from helpers.database import get_current_schema
teams = TeamsNotificationClient()

# Build action buttons with notebook link and dashboard
actions = []
base_url = os.environ.get('TEAMS_DEFAULT_JUPYTERLAB_URL', '')
if base_url:
    notebook_path = str(context.notebook_path)
    if 'workflows' in notebook_path:
        rel_path = notebook_path.split('workflows')[-1].lstrip('/\\')
        notebook_url = f"{base_url.rstrip('/')}/lab/tree/workspace/workflows/{rel_path}"
        actions.append({"title": "Open Notebook", "url": notebook_url})

dashboard_url = os.environ.get('TEAMS_DEFAULT_DASHBOARD_URL', '')
if dashboard_url:
    schema = get_current_schema()
    cycle_dashboard_url = f"{dashboard_url.rstrip('/')}/{schema}/cycle/{context.cycle_name}"
    actions.append({"title": "View Cycle Dashboard", "url": cycle_dashboard_url})

# Build summary message
summary_parts = [
    f"**Total Groups:** {total_jobs}",
    f"**Completed:** {total_finished}",
]

if total_failed > 0:
    summary_parts.append(f"**Failed:** {total_failed}")
if total_other > 0:
    summary_parts.append(f"**In Progress:** {total_other}")

# Send warning if there are failures, otherwise success
if total_failed > 0:
    teams.send_warning(
        title=f"[{context.cycle_name}] Grouping Summary - Completed with Failures",
        message=f"**Cycle:** {context.cycle_name}\n"
                f"**Stage:** {context.stage_name}\n"
                f"**Step:** {context.step_name}\n\n" +
                "\n".join(summary_parts),
        actions=actions if actions else None
    )
else:
    teams.send_success(
        title=f"[{context.cycle_name}] Grouping Summary Completed",
        message=f"**Cycle:** {context.cycle_name}\n"
                f"**Stage:** {context.stage_name}\n"
                f"**Step:** {context.step_name}\n\n" +
                "\n".join(summary_parts),
        actions=actions if actions else None
    )

ux.success("\n" + "="*60)
ux.success("GROUPING SUMMARY COMPLETE")
ux.success("="*60)
ux.info(f"\nTotal groups created: {total_finished}")
if total_failed > 0:
    ux.warning(f"Failed jobs: {total_failed}")
ux.info("\nGrouping stage complete. Proceed to Export stage if needed.")