# Job Tracker

> Lightweight job state tracking for transcription workflows

In [None]:
#| default_exp core.job_tracker

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import asyncio
import uuid
from dataclasses import dataclass, field
from datetime import datetime
from typing import Any, Callable, Dict, List, Optional

## TranscriptionJob

Dataclass representing a transcription job's state. This is a lightweight structure for tracking job progress without the process management complexity of the old `TranscriptionJobManager`.

In [None]:
#| export
@dataclass
class TranscriptionJob:
    """Represents a transcription job's state."""
    id: str  # Unique job identifier (UUID)
    plugin_name: str  # Plugin name for execution
    file_path: str  # Path to the audio/video file
    file_name: str  # Display name of the file
    status: str = "pending"  # Job status: pending, running, completed, failed, cancelled
    created_at: str = field(default_factory=lambda: datetime.now().isoformat())  # ISO timestamp
    started_at: Optional[str] = None  # When execution began
    completed_at: Optional[str] = None  # When job finished
    result: Optional[Dict[str, Any]] = None  # Transcription result data
    error: Optional[str] = None  # Error message if failed
    metadata: Dict[str, Any] = field(default_factory=dict)  # Additional job metadata
    task: Optional[asyncio.Task] = None  # Async task handle for cancellation

## TranscriptionJobTracker

Lightweight job state tracker that works with the new `PluginManager` architecture. Unlike the old `TranscriptionJobManager`, this class:

- Does **not** manage worker processes (handled by `PluginManager`)
- Does **not** handle resource scheduling (handled by `PluginManager`'s scheduler)
- Does **not** manage plugin loading (handled by `PluginManager`)

It simply tracks job state and coordinates with an externally-provided `PluginManager` for execution.

In [None]:
#| export
class TranscriptionJobTracker:
    """Lightweight job state tracker for transcription workflows."""
    
    def __init__(
        self,
        on_job_completed: Optional[Callable[[str, 'TranscriptionJobTracker'], None]] = None,  # Completion callback
    ):
        """Initialize the job tracker."""
        self.jobs: Dict[str, TranscriptionJob] = {}
        self.results: Dict[str, Dict[str, Any]] = {}
        self._on_job_completed = on_job_completed
    
    def create_job(
        self,
        plugin_name: str,  # Name of the plugin to execute
        file_path: str,  # Path to audio/video file
        file_name: str,  # Display name of the file
        **metadata  # Additional job metadata
    ) -> TranscriptionJob:  # Created job instance
        """Create a new transcription job."""
        job = TranscriptionJob(
            id=str(uuid.uuid4()),
            plugin_name=plugin_name,
            file_path=file_path,
            file_name=file_name,
            metadata=metadata
        )
        self.jobs[job.id] = job
        return job
    
    def mark_running(
        self,
        job_id: str,  # Job identifier
        task: Optional[asyncio.Task] = None  # Async task handle
    ) -> None:
        """Mark a job as running."""
        if job_id in self.jobs:
            job = self.jobs[job_id]
            job.status = "running"
            job.started_at = datetime.now().isoformat()
            job.task = task
    
    def mark_completed(
        self,
        job_id: str,  # Job identifier
        result: Dict[str, Any]  # Transcription result
    ) -> None:
        """Mark a job as completed with result."""
        if job_id in self.jobs:
            job = self.jobs[job_id]
            job.status = "completed"
            job.completed_at = datetime.now().isoformat()
            job.result = result
            job.task = None
            
            self.results[job_id] = {
                "status": "success",
                "data": result
            }
            
            # Call completion callback
            if self._on_job_completed:
                try:
                    self._on_job_completed(job_id, self)
                except Exception as e:
                    print(f"[JobTracker] Error in completion callback for {job_id}: {e}")
    
    def mark_failed(
        self,
        job_id: str,  # Job identifier
        error: str  # Error message
    ) -> None:
        """Mark a job as failed with error."""
        if job_id in self.jobs:
            job = self.jobs[job_id]
            job.status = "failed"
            job.completed_at = datetime.now().isoformat()
            job.error = error
            job.task = None
            
            self.results[job_id] = {
                "status": "error",
                "error": error
            }
    
    async def cancel_job(
        self,
        job_id: str  # Job identifier
    ) -> bool:  # True if cancellation was successful
        """Cancel a running job."""
        if job_id not in self.jobs:
            return False
        
        job = self.jobs[job_id]
        
        if job.status != "running":
            return False
        
        # Cancel the async task if present
        if job.task and not job.task.done():
            job.task.cancel()
            try:
                await job.task
            except asyncio.CancelledError:
                pass
        
        job.status = "cancelled"
        job.completed_at = datetime.now().isoformat()
        job.task = None
        
        self.results[job_id] = {
            "status": "cancelled"
        }
        
        return True
    
    def get_job(
        self,
        job_id: str  # Job identifier
    ) -> Optional[TranscriptionJob]:  # Job instance or None
        """Get a job by ID."""
        return self.jobs.get(job_id)
    
    def get_job_result(
        self,
        job_id: str  # Job identifier
    ) -> Optional[Dict[str, Any]]:  # Result dict or None
        """Get a job's result."""
        return self.results.get(job_id)
    
    def get_running_jobs(self) -> List[TranscriptionJob]:  # List of running jobs
        """Get all currently running jobs."""
        return [job for job in self.jobs.values() if job.status == "running"]
    
    def clear_completed(
        self,
        keep_results: bool = False  # Whether to keep results in memory
    ) -> int:  # Number of jobs cleared
        """Clear completed, failed, and cancelled jobs."""
        to_remove = [
            job_id for job_id, job in self.jobs.items()
            if job.status in ['completed', 'failed', 'cancelled']
        ]
        
        for job_id in to_remove:
            del self.jobs[job_id]
            if not keep_results and job_id in self.results:
                del self.results[job_id]
        
        return len(to_remove)

## Usage Example

The `TranscriptionJobTracker` is used in conjunction with a `PluginManager` (provided by the host application):

```python
from cjm_plugin_system.core.manager import PluginManager
from cjm_fasthtml_workflow_transcription_single_file.core.job_tracker import (
    TranscriptionJobTracker
)

# Host application provides the PluginManager
plugin_manager = PluginManager()
plugin_manager.load_all()

# Workflow creates its own job tracker
def on_complete(job_id, tracker):
    job = tracker.get_job(job_id)
    print(f"Job {job_id} completed: {job.file_name}")

job_tracker = TranscriptionJobTracker(on_job_completed=on_complete)

# Start a job
job = job_tracker.create_job(
    plugin_name="cjm-transcription-plugin-whisper",
    file_path="/path/to/audio.mp3",
    file_name="audio.mp3"
)

# Execute via PluginManager (in an async context)
async def run_transcription():
    job_tracker.mark_running(job.id)
    try:
        result = await plugin_manager.execute_plugin_async(
            job.plugin_name,
            audio=job.file_path
        )
        job_tracker.mark_completed(job.id, result)
    except Exception as e:
        job_tracker.mark_failed(job.id, str(e))
```

In [None]:
# Test: Create job tracker
tracker = TranscriptionJobTracker()

# Test: Create job
job = tracker.create_job(
    plugin_name="test-plugin",
    file_path="/test/audio.mp3",
    file_name="audio.mp3"
)

assert job.status == "pending"
assert job.plugin_name == "test-plugin"
assert job.file_name == "audio.mp3"
print(f"Created job: {job.id}")

# Test: Mark running
tracker.mark_running(job.id)
assert tracker.get_job(job.id).status == "running"
print(f"Job status: {tracker.get_job(job.id).status}")

# Test: Mark completed
tracker.mark_completed(job.id, {"text": "Hello world", "metadata": {}})
assert tracker.get_job(job.id).status == "completed"
result = tracker.get_job_result(job.id)
assert result["status"] == "success"
assert result["data"]["text"] == "Hello world"
print(f"Job result: {result}")

print("All tests passed!")

Created job: 60f8e8ee-80e5-4a01-af8e-bd3a22bbdc31
Job status: running
Job result: {'status': 'success', 'data': {'text': 'Hello world', 'metadata': {}}}
All tests passed!


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()