# Job Handler

> Functions for starting transcription jobs and handling SSE streaming

In [None]:
#| default_exp workflow.job_handler

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import asyncio
from typing import Dict, Any
from fasthtml.common import *

from cjm_plugin_system.core.manager import PluginManager

from cjm_fasthtml_workflow_transcription_single_file.core.config import SingleFileWorkflowConfig
from cjm_fasthtml_workflow_transcription_single_file.core.html_ids import SingleFileHtmlIds
from cjm_fasthtml_workflow_transcription_single_file.core.job_tracker import TranscriptionJob, TranscriptionJobTracker
from cjm_fasthtml_workflow_transcription_single_file.components.processor import transcription_in_progress
from cjm_fasthtml_workflow_transcription_single_file.components.results import transcription_results, transcription_error
from cjm_fasthtml_workflow_transcription_single_file.core.protocols import PluginRegistryProtocol
from cjm_fasthtml_workflow_transcription_single_file.storage.file_storage import ResultStorage
from cjm_fasthtml_workflow_transcription_single_file.workflow.workflow import SingleFileTranscriptionWorkflow

In [None]:
#| export
def get_job_session_info(
    job_id: str,  # Unique job identifier
    job: TranscriptionJob,  # Job object from the tracker
    plugin_manager: PluginManager,  # Plugin manager for getting plugin info
) -> tuple[Dict[str, Any], Dict[str, Any]]:  # Tuple of (file_info, plugin_info) dictionaries
    """Get file and plugin info from job object and plugin manager."""
    # File info from job attributes
    file_info = {
        "name": job.file_name,
        "path": job.file_path,
    }

    # Plugin info from plugin manager
    plugin_meta = plugin_manager.get_plugin_meta(job.plugin_name)
    plugin_info = {
        "id": job.plugin_name,
        "title": plugin_meta.name if plugin_meta else job.plugin_name,
        "supports_streaming": False  # Could check from manifest if needed
    }

    return file_info, plugin_info

In [None]:
#| export
def _save_job_result_once(
    job_id: str,  # Job identifier
    job: TranscriptionJob,  # Job object
    data: Dict[str, Any],  # Transcription data containing text and metadata
    plugin_manager: PluginManager,  # Plugin manager for getting plugin info
    result_storage: ResultStorage,  # Storage for saving transcription results
) -> None:
    """Save transcription result to disk, ensuring it's only saved once per job.
    
    Called from the SSE stream handler as a fallback. The primary save mechanism
    is the workflow's `_on_job_completed` callback called by TranscriptionJobTracker.
    """
    # Skip if auto-save is disabled
    if not result_storage.should_auto_save():
        return

    # Check if job metadata indicates it's already been saved
    if job.metadata and job.metadata.get('saved_to_disk'):
        return

    try:
        # Get file and plugin info from job and plugin manager
        file_info, plugin_info = get_job_session_info(job_id, job, plugin_manager)

        result_storage.save(
            job_id=job_id,
            file_path=file_info.get("path", job.file_path),
            file_name=file_info.get("name", job.file_name),
            plugin_id=plugin_info.get("id", job.plugin_name),
            plugin_name=plugin_info.get("title", job.plugin_name),
            text=data.get('text', ''),
            metadata=data.get('metadata', {}),
            additional_info={}
        )

        # Mark as saved in job metadata
        if not job.metadata:
            job.metadata = {}
        job.metadata['saved_to_disk'] = True

    except Exception as e:
        print(f"Error saving transcription result for job {job_id}: {e}")

In [None]:
#| export
def _create_sse_swap_message(
    content,  # HTML content to wrap
    container_id: str,  # Target container ID for the swap
):  # Div with OOB swap attributes
    """Wrap content in a Div with HTMX OOB swap for SSE messages."""
    return Div(
        content,
        id=container_id,
        hx_swap_oob="true"
    )

## start_transcription_job

Starts a transcription job using the job manager and returns the in-progress UI.

In [None]:
#| export
async def start_transcription_job(
    state: Dict[str, Any],  # Workflow state containing plugin_id, file_path, file_name, etc.
    request,  # FastHTML request object
    workflow: SingleFileTranscriptionWorkflow,  # Workflow instance providing config and dependencies
):  # transcription_in_progress component showing job status
    """Start a transcription job and return the in-progress UI component."""
    # Note: UI uses plugin_id, internal API uses plugin_name
    plugin_name = state.get("plugin_id")
    file_path = state.get("file_path")
    file_name = state.get("file_name")

    # Create job in tracker
    job = workflow.job_tracker.create_job(
        plugin_name=plugin_name,
        file_path=file_path,
        file_name=file_name
    )

    # Define async execution function
    async def execute_transcription():
        workflow.job_tracker.mark_running(job.id)
        try:
            # Execute via PluginManager
            result = await workflow.plugin_manager.execute_plugin_async(
                plugin_name,
                audio=file_path
            )
            # Convert result to dict if needed (e.g., if it's a dataclass)
            if hasattr(result, '__dict__'):
                result_dict = {
                    "text": getattr(result, 'text', ''),
                    "confidence": getattr(result, 'confidence', None),
                    "segments": getattr(result, 'segments', []),
                    "metadata": getattr(result, 'metadata', {})
                }
            elif isinstance(result, dict):
                result_dict = result
            else:
                result_dict = {"text": str(result), "metadata": {}}
                
            workflow.job_tracker.mark_completed(job.id, result_dict)
        except Exception as e:
            import traceback
            traceback.print_exc()
            workflow.job_tracker.mark_failed(job.id, str(e))

    # Create and store the async task for cancellation support
    task = asyncio.create_task(execute_transcription())
    workflow.job_tracker.mark_running(job.id, task)

    # Get plugin info for display
    plugin_meta = workflow.plugin_manager.get_plugin_meta(plugin_name)

    file_info = {
        "name": file_name,
        "path": file_path,
        "type": state.get("file_type", "unknown"),
        "size_str": state.get("file_size", "unknown")
    }

    plugin_info = {
        "id": plugin_name,
        "title": plugin_meta.name if plugin_meta else plugin_name,
        "supports_streaming": False
    }

    # Return in-progress view
    return transcription_in_progress(
        job_id=job.id,
        plugin_info=plugin_info,
        file_info=file_info,
        config=workflow.config,
        router=workflow.router,
    )

## create_job_stream_handler

Creates an async generator for SSE streaming of transcription progress.

In [None]:
#| export
def create_job_stream_handler(
    job_id: str,  # Unique job identifier
    request,  # FastHTML request object
    workflow: SingleFileTranscriptionWorkflow,  # Workflow instance providing config and dependencies
):  # Async generator for SSE streaming
    """Create an SSE stream generator for monitoring job completion."""
    poll_interval = workflow.config.sse_poll_interval
    container_id = workflow.config.container_id
    # Build URL using router's .to() method for proper route generation
    stepflow_start_url = workflow.stepflow_router.start.to()

    async def job_stream():
        try:
            # Check if job exists
            job = workflow.job_tracker.get_job(job_id)
            if not job:
                yield sse_message(Div("Job not found"))
                return

            # Poll for completion
            while True:
                job = workflow.job_tracker.get_job(job_id)
                if not job:
                    break

                # Check if job finished
                if job.status in ['completed', 'failed', 'cancelled']:
                    result = workflow.job_tracker.get_job_result(job_id)

                    if job.status == 'completed' and result and result.get('status') == 'success':
                        data = result.get('data', {})
                        file_info, plugin_info = get_job_session_info(job_id, job, workflow.plugin_manager)

                        # Save result to disk (only once)
                        _save_job_result_once(job_id, job, data, workflow.plugin_manager, workflow.result_storage)

                        results = transcription_results(
                            job_id=job_id,
                            transcription_text=data.get('text', ''),
                            metadata=data.get('metadata', {}),
                            file_info=file_info,
                            plugin_info=plugin_info,
                            config=workflow.config,
                            router=workflow.router,
                            stepflow_router=workflow.stepflow_router,
                        )

                        yield sse_message(_create_sse_swap_message(results, container_id))

                    elif job.status == 'failed':
                        file_info, _ = get_job_session_info(job_id, job, workflow.plugin_manager)
                        error_msg = transcription_error(
                            f"Transcription failed: {job.error}",
                            file_info,
                            config=workflow.config,
                            stepflow_router=workflow.stepflow_router,
                        )
                        yield sse_message(_create_sse_swap_message(error_msg, container_id))

                    elif job.status == 'cancelled':
                        # Return a message that triggers reload of the start view
                        redirect_div = Div(
                            Script(f"""
                                htmx.ajax('GET', '{stepflow_start_url}', {{
                                    target: '#{container_id}',
                                    swap: 'innerHTML'
                                }});
                            """),
                            id=container_id,
                            hx_swap_oob="true"
                        )
                        yield sse_message(redirect_div)

                    break

                # Heartbeat
                await asyncio.sleep(poll_interval)
                yield ": heartbeat\n\n"

        except Exception as e:
            print(f"Error in job stream for {job_id}: {e}")
            import traceback
            traceback.print_exc()

    return job_stream

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()