# Single File Transcription Workflow

> Main workflow class orchestrating all subsystems for single-file transcription

In [None]:
#| default_exp workflow.workflow

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from typing import Dict, Any, List, Optional
from fasthtml.common import *
from fasthtml.common import APIRouter
from fastcore.basics import patch

from cjm_fasthtml_interactions.patterns.step_flow import Step, StepFlow
from cjm_fasthtml_interactions.core.context import InteractionContext
from cjm_fasthtml_interactions.patterns.async_loading import AsyncLoadingContainer
from cjm_fasthtml_sse.core import SSEBroadcastManager
from cjm_fasthtml_workers.extensions.adapters import (
    ResourceManagerAdapter,
    SSEBroadcasterAdapter,
)
from cjm_transcription_job_manager.core.manager import TranscriptionJobManager
from cjm_fasthtml_resources.core.manager import ResourceManager
from cjm_fasthtml_plugins.core.registry import UnifiedPluginRegistry
from cjm_transcription_plugin_system.plugin_interface import TranscriptionPlugin
from cjm_fasthtml_settings.core.utils import get_default_values_from_schema

from cjm_fasthtml_workflow_transcription_single_file.core.config import SingleFileWorkflowConfig
from cjm_fasthtml_workflow_transcription_single_file.core.html_ids import SingleFileHtmlIds
from cjm_fasthtml_workflow_transcription_single_file.core.adapters import PluginRegistryAdapter, DefaultConfigPluginRegistryAdapter
from cjm_fasthtml_workflow_transcription_single_file.media.library import MediaLibrary
from cjm_fasthtml_workflow_transcription_single_file.storage.file_storage import ResultStorage
from cjm_fasthtml_workflow_transcription_single_file.components.steps import (
    render_plugin_selection,
    render_file_selection,
    render_confirmation,
)
from cjm_fasthtml_workflow_transcription_single_file.workflow.job_handler import start_transcription_job

## SingleFileTranscriptionWorkflow Class

The main workflow class that orchestrates all subsystems for single-file transcription:
- Plugin registry for transcription plugins
- Resource manager for GPU/CPU availability
- Job manager for background transcription execution
- Media library for file discovery and serving
- Result storage for saving transcripts
- Step flow for the 3-step wizard UI

In [None]:
#| export
class SingleFileTranscriptionWorkflow:
    """Self-contained single-file transcription workflow.

    Creates and manages internal UnifiedPluginRegistry, ResourceManager,
    TranscriptionJobManager, SSEBroadcastManager, MediaLibrary, ResultStorage,
    StepFlow (plugin → file → confirm wizard), and APIRouter.
    """

    # Class-level metadata for future plugin system discovery
    workflow_name: str = "single_file_transcription"
    workflow_version: str = "1.0.0"
    workflow_category: str = "transcription"
    workflow_title: str = "Single File Transcription"
    workflow_description: str = "Transcribe individual audio/video files using configurable transcription plugins"

    def __init__(
        self,
        config: Optional[SingleFileWorkflowConfig] = None,  # Explicit config (bypasses auto-loading)
        **config_overrides  # Override specific config values
    ):
        """Initialize the workflow with auto-loaded or explicit configuration."""
        # Build configuration: explicit config takes precedence, otherwise auto-load with overrides
        if config is not None:
            self.config = config
        else:
            self.config = SingleFileWorkflowConfig.from_saved_config(**config_overrides)
        
        self._app = None  # Set in setup()

        # Create internal UnifiedPluginRegistry with workflow-specific config directory
        # This gives the workflow its own isolated plugin configurations
        self._plugin_registry = UnifiedPluginRegistry(
            config_dir=self.config.plugin_config_dir
        )

        # Register transcription plugin system with auto-discovery
        self._plugin_registry.register_plugin_system(
            category=self.config.plugin_category,
            plugin_interface=TranscriptionPlugin,
            display_name="Transcription",
            auto_discover=True
        )

        # Ensure all discovered plugins have a config file (using defaults if needed)
        # This allows the worker to load plugins that haven't been explicitly configured
        self._ensure_plugin_configs_exist()

        # Create internal ResourceManager with workflow-specific threshold
        self._resource_manager = ResourceManager(
            gpu_memory_threshold_percent=self.config.gpu_memory_threshold_percent
        )

        # Create internal SSE manager for this workflow
        self._sse_manager = SSEBroadcastManager()

        # Create internal adapters for TranscriptionJobManager
        # Use custom plugin adapter that provides default config values for unconfigured plugins
        plugin_adapter = DefaultConfigPluginRegistryAdapter(
            self._plugin_registry,
            category=self.config.plugin_category
        )
        resource_adapter = ResourceManagerAdapter(self._resource_manager)
        sse_adapter = SSEBroadcasterAdapter(self._sse_manager)

        # Create internal TranscriptionJobManager with workflow-specific callback
        self._transcription_manager = TranscriptionJobManager(
            plugin_registry=plugin_adapter,
            resource_manager=resource_adapter,
            event_broadcaster=sse_adapter,
            on_job_completed_callback=self._on_job_completed
        )

        # Create internal MediaLibrary
        self._media_library = MediaLibrary(self.config.media)

        # Create internal ResultStorage
        self._result_storage = ResultStorage(self.config.storage)

        # Create workflow's plugin registry adapter
        self._plugin_adapter = PluginRegistryAdapter(self._plugin_registry)

        # Create StepFlow
        self._step_flow = self._create_step_flow()

        # Create routers (main router first, then stepflow)
        self._router = self._create_router()
        self._stepflow_router = self._step_flow.create_router(
            prefix=self.config.get_full_stepflow_prefix()
        )

        # Create media pagination and router (for grid/list browser view)
        self._media_library.create_pagination(
            pagination_id=f"{self.config.workflow_id}_media",
            content_id=SingleFileHtmlIds.MEDIA_BROWSER_CONTENT,
            preview_route_func=self._create_preview_route_func(),
            modal_id=SingleFileHtmlIds.MEDIA_PREVIEW_MODAL
        )
        self._media_router = self._media_library.get_pagination_router(
            prefix=self.config.get_full_media_prefix()
        )

        # Create file selection pagination and router (for file selection step table)
        self._media_library.create_file_selection_pagination(
            pagination_id=f"{self.config.workflow_id}_file_select",
            content_id=SingleFileHtmlIds.FILE_SELECTION_TABLE,
            preview_url_func=self._create_preview_url_func(),
            preview_target_id=SingleFileHtmlIds.MEDIA_PREVIEW_WRAPPER
        )
        self._file_selection_router = self._media_library.get_file_selection_router(
            prefix=f"{self.config.route_prefix}/file_select"
        )

    @classmethod
    def create_and_setup(
        cls,
        app,  # FastHTML application instance
        config: Optional[SingleFileWorkflowConfig] = None,  # Explicit config (bypasses auto-loading)
        **config_overrides  # Override specific config values
    ) -> "SingleFileTranscriptionWorkflow":  # Configured and setup workflow instance
        """Create, configure, and setup a workflow in one call."""
        workflow = cls(config=config, **config_overrides)
        workflow.setup(app)
        return workflow

    @property
    def transcription_manager(self) -> TranscriptionJobManager:
        """Access to internal transcription manager."""
        return self._transcription_manager
    
    @property
    def plugin_registry(self) -> PluginRegistryAdapter:
        """Access to plugin registry adapter."""
        return self._plugin_adapter
    
    @property
    def media_library(self) -> MediaLibrary:
        """Access to internal media library."""
        return self._media_library
    
    @property
    def result_storage(self) -> ResultStorage:
        """Access to internal result storage."""
        return self._result_storage
    
    @property
    def router(self) -> APIRouter:
        """Main workflow router."""
        return self._router
    
    @property
    def stepflow_router(self) -> APIRouter:
        """StepFlow-generated router."""
        return self._stepflow_router

In [None]:
#| export
@patch
def setup(
    self: SingleFileTranscriptionWorkflow,
    app,  # FastHTML application instance
) -> None:
    """Initialize workflow with FastHTML app. Must be called after app creation."""
    self._app = app
    self._media_library.mount(app)

The `cleanup` method is called when the workflow is being removed or the application is shutting down. It currently clears the media library cache and app reference. Future database-per-service pattern updates will add closing database connections, flushing pending writes, and releasing other resources.

In [None]:
#| export
@patch
def cleanup(
    self: SingleFileTranscriptionWorkflow,
) -> None:
    """Clean up workflow resources. Mirrors PluginInterface.cleanup() for future plugin system compatibility."""
    self._media_library.clear_cache()
    self._app = None

In [None]:
#| export
@patch
def _ensure_plugin_configs_exist(
    self: SingleFileTranscriptionWorkflow,
) -> None:
    """Ensure all discovered plugins have config files.
    
    For plugins without saved config files, creates a config file with
    default values from the plugin's schema. Required because workers
    only load plugins that have config files.
    """
    plugins = self._plugin_registry.get_plugins_by_category(self.config.plugin_category)

    for plugin_meta in plugins:
        if not plugin_meta.is_configured:
            # Get default config from schema
            if hasattr(plugin_meta, 'config_schema') and plugin_meta.config_schema:
                default_config = get_default_values_from_schema(plugin_meta.config_schema)
                # Save the default config to create a config file
                self._plugin_registry.save_plugin_config(
                    plugin_meta.get_unique_id(),
                    default_config
                )

In [None]:
#| export
@patch
def get_routers(
    self: SingleFileTranscriptionWorkflow,
) -> List[APIRouter]:  # List containing main router, stepflow router, media router, and file selection router
    """Return all routers for registration with the app."""
    routers = [self._router, self._stepflow_router]
    if self._media_router:
        routers.append(self._media_router)
    if self._file_selection_router:
        routers.append(self._file_selection_router)
    return routers

In [None]:
#| export
@patch
def render_entry_point(
    self: SingleFileTranscriptionWorkflow,
    request,  # FastHTML request object
    sess,  # FastHTML session object
) -> FT:  # AsyncLoadingContainer component
    """Render the workflow entry point for embedding in tabs, etc.

    Returns an AsyncLoadingContainer that loads the current_status endpoint,
    which determines what to show (running job, workflow in progress,
    completed job, or fresh start).
    """
    # Check if there are any discovered plugins (configured or not)
    # Plugins can use default config values even without saved .json files
    plugins = self._plugin_adapter.get_all_plugins()

    if not plugins:
        # No plugins discovered at all - show message with optional redirect
        from cjm_fasthtml_daisyui.components.actions.button import btn, btn_colors
        from cjm_fasthtml_daisyui.utilities.semantic_colors import text_dui
        from cjm_fasthtml_tailwind.utilities.spacing import m
        from cjm_fasthtml_tailwind.core.base import combine_classes

        content = [
            P(
                "No transcription plugins are available. Please install a transcription plugin package.",
                cls=combine_classes(text_dui.base_content.opacity(60), m.b(4))
            )
        ]
        if self.config.no_plugins_redirect:
            content.append(
                A(
                    "Go to Settings",
                    href=self.config.no_plugins_redirect,
                    cls=combine_classes(btn, btn_colors.primary)
                )
            )
        return Div(*content, id=self.config.container_id)

    # Plugins available - load current status asynchronously
    current_status_url = f"{self.config.route_prefix}/current_status"
    return AsyncLoadingContainer(
        container_id=self.config.container_id,
        load_url=current_status_url
    )

In [None]:
#| export
@patch
def _on_job_completed(
    self: SingleFileTranscriptionWorkflow,
    job_id: str,  # The completed job's ID
    manager,  # The TranscriptionJobManager instance
) -> None:
    """Workflow-specific completion handling. Auto-saves results if enabled."""
    if not self._result_storage.should_auto_save():
        return

    job = manager.get_job(job_id)
    result = manager.get_job_result(job_id)

    if not job or not result:
        return

    # Check if already saved (via job metadata)
    if hasattr(job, 'metadata') and job.metadata and job.metadata.get('saved_to_disk'):
        return

    try:
        data = result.get('data', {})
        plugin_info = self._plugin_adapter.get_plugin(job.plugin_id)

        self._result_storage.save(
            job_id=job_id,
            file_path=job.file_path,
            file_name=job.file_name,
            plugin_id=job.plugin_id,
            plugin_name=plugin_info.title if plugin_info else job.plugin_id,
            text=data.get('text', ''),
            metadata=data.get('metadata', {})
        )

        # Mark as saved in job metadata
        if not job.metadata:
            job.metadata = {}
        job.metadata['saved_to_disk'] = True

        print(f"[Workflow] Auto-saved result for job {job_id}")

    except Exception as e:
        print(f"[Workflow] Error saving result for {job_id}: {e}")
        import traceback
        traceback.print_exc()

In [None]:
#| export
@patch
def _create_preview_route_func(
    self: SingleFileTranscriptionWorkflow,
):  # Function that generates preview route URLs
    """Create a function that generates preview route URLs (with optional media_type)."""
    route_prefix = self.config.route_prefix

    def preview_route_func(idx: int, media_type: Optional[str] = None) -> str:
        url = f"{route_prefix}/media_preview?idx={idx}"
        if media_type:
            url += f"&media_type={media_type}"
        return url

    return preview_route_func

In [None]:
#| export
@patch
def _create_preview_url_func(
    self: SingleFileTranscriptionWorkflow,
):  # Function that generates preview URLs for file selection
    """Create a function that generates preview URLs for file selection (index only)."""
    route_prefix = self.config.route_prefix

    def preview_url_func(idx: int) -> str:
        return f"{route_prefix}/media_preview?idx={idx}"

    return preview_url_func

In [None]:
#| export
@patch
def _create_step_flow(
    self: SingleFileTranscriptionWorkflow,
) -> StepFlow:  # Configured StepFlow instance
    """Create and configure the StepFlow instance."""
    # Create wrapper functions that capture self
    workflow = self

    def load_plugins(request) -> Dict[str, Any]:
        """Load available transcription plugins."""
        plugins = workflow._plugin_adapter.get_all_plugins()
        return {"plugins": plugins}

    def load_media_files(request) -> Dict[str, Any]:
        """Load available media files from internal MediaLibrary."""
        files = workflow._media_library.get_transcribable_files()
        # Convert MediaFile to the format expected by the step
        from cjm_fasthtml_workflow_transcription_single_file.media.models import MediaFile
        media_infos = []
        for f in files:
            # Create a simple object with the expected attributes
            class MediaFileInfo:
                def __init__(self, media_file: MediaFile):
                    self.path = media_file.path
                    self.name = media_file.name
                    self.media_type = media_file.media_type
                    self.size_str = media_file.size_str
                    self.modified_str = media_file.modified_str
            media_infos.append(MediaFileInfo(f))
        return {"media_files": media_infos}

    def load_confirmation_data(request) -> Dict[str, Any]:
        """Load data for confirmation step."""
        return {}

    def render_plugin_step(ctx: InteractionContext):
        """Render plugin selection step."""
        return render_plugin_selection(
            ctx,
            config=workflow.config,
            plugin_registry=workflow._plugin_adapter,
            settings_modal_url=workflow._router.settings_modal.to(),
            plugin_details_url=workflow._router.plugin_details.to(),
            raw_plugin_registry=workflow._plugin_registry,
            save_plugin_config_url=workflow._router.save_plugin_config.to(),
            reset_plugin_config_url=workflow._router.reset_plugin_config.to(),
        )

    def render_file_step(ctx: InteractionContext):
        """Render file selection step."""
        return render_file_selection(
            ctx,
            config=workflow.config,
            file_selection_router=workflow._media_library.file_selection_router,
        )

    def render_confirm_step(ctx: InteractionContext):
        """Render confirmation step."""
        return render_confirmation(ctx, plugin_registry=workflow._plugin_adapter)

    def validate_plugin_selection(state: Dict[str, Any]) -> bool:
        """Validate that a plugin has been selected."""
        plugin_id = state.get("plugin_id", "")
        return bool(plugin_id and plugin_id.strip())

    def validate_file_selection(state: Dict[str, Any]) -> bool:
        """Validate that a file has been selected."""
        return bool(state.get("file_path")) and bool(state.get("file_name"))

    async def on_complete(state: Dict[str, Any], request):
        """Handle workflow completion."""
        return await start_transcription_job(
            state,
            request,
            config=workflow.config,
            router=workflow._router,
            transcription_manager=workflow._transcription_manager,
            plugin_registry=workflow._plugin_adapter,
        )

    return StepFlow(
        flow_id=self.config.workflow_id,
        container_id=self.config.container_id,
        steps=[
            Step(
                id="plugin",
                title="Select Plugin",
                render=render_plugin_step,
                validate=validate_plugin_selection,
                data_loader=load_plugins,
                data_keys=["plugin_id"],
                show_back=False,
                show_cancel=True,
                next_button_text="Continue"
            ),
            Step(
                id="file",
                title="Select File",
                render=render_file_step,
                validate=validate_file_selection,
                data_loader=load_media_files,
                data_keys=["file_path", "file_name", "file_type", "file_size"],
                show_back=True,
                show_cancel=True,
                next_button_text="Continue"
            ),
            Step(
                id="confirm",
                title="Confirm",
                render=render_confirm_step,
                data_loader=load_confirmation_data,
                data_keys=[],
                show_back=True,
                show_cancel=True,
                next_button_text="Start Transcription"
            )
        ],
        on_complete=on_complete,
        show_progress=self.config.show_progress,
        wrap_in_form=True
    )

In [None]:
#| export
@patch
def _create_router(
    self: SingleFileTranscriptionWorkflow,
) -> APIRouter:  # Configured APIRouter with all workflow routes
    """Create the workflow's API router with all routes."""
    from cjm_fasthtml_workflow_transcription_single_file.workflow.routes import init_router
    return init_router(self)

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()