# Job Handler

> Functions for starting transcription jobs and handling SSE streaming

In [None]:
#| default_exp workflow.job_handler

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import asyncio
from typing import Dict, Any
from fasthtml.common import *

from cjm_fasthtml_workflow_transcription_single_file.core.config import SingleFileWorkflowConfig
from cjm_fasthtml_workflow_transcription_single_file.core.html_ids import SingleFileHtmlIds
from cjm_fasthtml_workflow_transcription_single_file.components.processor import transcription_in_progress
from cjm_fasthtml_workflow_transcription_single_file.components.results import transcription_results, transcription_error
from cjm_fasthtml_workflow_transcription_single_file.core.protocols import PluginRegistryProtocol
from cjm_fasthtml_workflow_transcription_single_file.storage.file_storage import ResultStorage

In [None]:
#| export
def get_job_session_info(
    job_id: str,  # Unique job identifier
    job,  # Job object from the manager
    plugin_registry: PluginRegistryProtocol,  # Plugin registry for getting plugin info
) -> tuple[Dict[str, Any], Dict[str, Any]]:  # Tuple of (file_info, plugin_info) dictionaries
    """Get file and plugin info from job object and plugin registry."""
    # File info from job attributes
    file_info = {
        "name": getattr(job, "file_name", "unknown"),
        "path": getattr(job, "file_path", ""),
    }

    # Plugin info from registry
    plugin_id = getattr(job, "plugin_id", "unknown")
    plugin_obj = plugin_registry.get_plugin(plugin_id)
    plugin_info = {
        "id": plugin_id,
        "title": plugin_obj.title if plugin_obj else plugin_id,
        "supports_streaming": plugin_obj.supports_streaming if plugin_obj else False
    }

    return file_info, plugin_info

In [None]:
#| export
def _save_job_result_once(
    job_id: str,  # Job identifier
    job,  # Job object
    data: Dict[str, Any],  # Transcription data containing text and metadata
    plugin_registry: PluginRegistryProtocol,  # Plugin registry for getting plugin info
    result_storage: ResultStorage,  # Storage for saving transcription results
) -> None:
    """Save transcription result to disk, ensuring it's only saved once per job.
    
    Called from the SSE stream handler as a fallback. The primary save mechanism
    is the workflow's `_on_job_completed` callback called by TranscriptionJobManager.
    """
    # Skip if auto-save is disabled
    if not result_storage.should_auto_save():
        return

    # Check if job metadata indicates it's already been saved
    if hasattr(job, 'metadata') and job.metadata and job.metadata.get('saved_to_disk'):
        return

    try:
        # Get file and plugin info from job attributes and registry
        file_info, plugin_info = get_job_session_info(job_id, job, plugin_registry)

        result_storage.save(
            job_id=job_id,
            file_path=file_info.get("path", getattr(job, "file_path", "")),
            file_name=file_info.get("name", getattr(job, "file_name", "")),
            plugin_id=plugin_info.get("id", getattr(job, "plugin_id", "")),
            plugin_name=plugin_info.get("title", getattr(job, "plugin_id", "")),
            text=data.get('text', ''),
            metadata=data.get('metadata', {}),
            additional_info={}
        )

        # Mark as saved in job metadata
        if not job.metadata:
            job.metadata = {}
        job.metadata['saved_to_disk'] = True

    except Exception as e:
        print(f"Error saving transcription result for job {job_id}: {e}")

In [None]:
#| export
def _create_sse_swap_message(
    content,  # HTML content to wrap
    container_id: str,  # Target container ID for the swap
):  # Div with OOB swap attributes
    """Wrap content in a Div with HTMX OOB swap for SSE messages."""
    return Div(
        content,
        id=container_id,
        hx_swap_oob="true"
    )

## start_transcription_job

Starts a transcription job using the job manager and returns the in-progress UI.

In [None]:
#| export
async def start_transcription_job(
    state: Dict[str, Any],  # Workflow state containing plugin_id, file_path, file_name, etc.
    request,  # FastHTML request object
    config: SingleFileWorkflowConfig,  # Workflow configuration
    router,  # Workflow router for generating route URLs
    transcription_manager,  # Manager for starting transcription jobs
    plugin_registry: PluginRegistryProtocol,  # Plugin registry for getting plugin info
):  # transcription_in_progress component showing job status
    """Handle workflow completion by starting the transcription job.
    
    Called by StepFlow's `on_complete` handler when the user confirms
    and clicks "Start Transcription".
    """
    plugin_id = state.get("plugin_id")
    file_path = state.get("file_path")
    file_name = state.get("file_name")

    # Start the transcription job via the internal manager
    job = await transcription_manager.start_transcription(
        plugin_id=plugin_id,
        file_path=file_path,
        file_name=file_name
    )

    # Get plugin info for display
    plugin_info_obj = plugin_registry.get_plugin(plugin_id)

    file_info = {
        "name": file_name,
        "path": file_path,
        "type": state.get("file_type", "unknown"),
        "size_str": state.get("file_size", "unknown")
    }

    plugin_info = {
        "id": plugin_id,
        "title": plugin_info_obj.title if plugin_info_obj else plugin_id,
        "supports_streaming": plugin_info_obj.supports_streaming if plugin_info_obj else False
    }

    # Note: Workflow state is cleared by the workflow's on_complete handler
    # after this function returns, via state_store.clear_state()

    # Return in-progress view
    return transcription_in_progress(
        job_id=job.id,
        plugin_info=plugin_info,
        file_info=file_info,
        config=config,
        router=router,
    )

## create_job_stream_handler

Creates an async generator for SSE streaming of transcription progress.

In [None]:
#| export
def create_job_stream_handler(
    job_id: str,  # Unique job identifier
    request,  # FastHTML request object
    config: SingleFileWorkflowConfig,  # Workflow configuration
    router,  # Workflow router for generating route URLs
    stepflow_router: APIRouter,  # StepFlow router for generating stepflow URLs
    transcription_manager,  # Manager for getting job status
    plugin_registry: PluginRegistryProtocol,  # Plugin registry for getting plugin info
    result_storage: ResultStorage,  # Storage for saving transcription results
):  # Async generator for SSE streaming
    """Create an SSE stream generator for monitoring job completion."""
    poll_interval = config.sse_poll_interval
    container_id = config.container_id
    # Build URL using router's .to() method for proper route generation
    stepflow_start_url = stepflow_router.start.to()

    async def job_stream():
        try:
            # Check if job exists
            job = transcription_manager.get_job(job_id)
            if not job:
                yield sse_message(Div("Job not found"))
                return

            # Poll for completion
            while True:
                job = transcription_manager.get_job(job_id)
                if not job:
                    break

                # Check if job finished
                if job.status in ['completed', 'failed', 'cancelled']:
                    result = transcription_manager.get_job_result(job_id)

                    if job.status == 'completed' and result and result.get('status') == 'success':
                        data = result.get('data', {})
                        file_info, plugin_info = get_job_session_info(job_id, job, plugin_registry)

                        # Save result to disk (only once)
                        _save_job_result_once(job_id, job, data, plugin_registry, result_storage)

                        results = transcription_results(
                            job_id=job_id,
                            transcription_text=data.get('text', ''),
                            metadata=data.get('metadata', {}),
                            file_info=file_info,
                            plugin_info=plugin_info,
                            config=config,
                            router=router,
                            stepflow_router=stepflow_router,
                        )

                        yield sse_message(_create_sse_swap_message(results, container_id))

                    elif job.status == 'failed':
                        file_info, _ = get_job_session_info(job_id, job, plugin_registry)
                        error_msg = transcription_error(
                            f"Transcription failed: {job.error}",
                            file_info,
                            config=config,
                            stepflow_router=stepflow_router,
                        )
                        yield sse_message(_create_sse_swap_message(error_msg, container_id))

                    elif job.status == 'cancelled':
                        # Return a message that triggers reload of the start view
                        # We can't call stepflow_router.start directly, so we use a redirect approach
                        redirect_div = Div(
                            Script(f"""
                                htmx.ajax('GET', '{stepflow_start_url}', {{
                                    target: '#{container_id}',
                                    swap: 'innerHTML'
                                }});
                            """),
                            id=container_id,
                            hx_swap_oob="true"
                        )
                        yield sse_message(redirect_div)

                    break

                # Heartbeat
                await asyncio.sleep(poll_interval)
                yield ": heartbeat\n\n"

        except Exception as e:
            print(f"Error in job stream for {job_id}: {e}")
            import traceback
            traceback.print_exc()

    return job_stream

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()