# Transcription Plugin Interface

> Domain-specific plugin interface for audio transcription

In [None]:
#| default_exp plugin_interface

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from abc import abstractmethod
from pathlib import Path
from typing import List, Union

from cjm_plugin_system.core.interface import PluginInterface

from cjm_transcription_plugin_system.core import AudioData, TranscriptionResult

In [None]:
#| export
class TranscriptionPlugin(PluginInterface):
    """
    Abstract base class for all transcription plugins.
    
    Extends PluginInterface with transcription-specific requirements:
    - `supported_formats`: List of audio file extensions this plugin can handle
    - `execute`: Accepts audio path (str) or AudioData, returns TranscriptionResult
    
    NOTE: When running via RemotePluginProxy, AudioData objects are automatically
    serialized to temp files via FileBackedDTO, so the Worker receives a file path.
    """
    
    # Entry point group for discovery (legacy, kept for metadata)
    entry_point_group = "transcription.plugins"

    @property
    @abstractmethod
    def supported_formats(self) -> List[str]: # e.g., ['wav', 'mp3', 'flac']
        """List of supported audio file extensions (without the dot)."""
        ...

    @abstractmethod
    def execute(
        self,
        audio: Union[AudioData, str, Path], # Audio data or file path
        **kwargs
    ) -> TranscriptionResult: # Transcription result with text, confidence, segments
        """
        Transcribe audio to text.
        
        When called via Proxy, AudioData is auto-converted to a file path string
        before reaching this method in the Worker process.
        """
        ...

## How It Works

```
Host Process                              Worker Process (Isolated Env)
┌─────────────────────┐                  ┌─────────────────────────────┐
│ audio = AudioData(  │                  │                             │
│   samples=np.array, │                  │  TranscriptionPlugin        │
│   sample_rate=16000 │                  │    .execute(                │
│ )                   │                  │       audio="/tmp/xyz.wav"  │
│                     │                  │    )                        │
│ plugin.execute(     │   HTTP/JSON      │                             │
│   audio=audio       │ ─────────────────▶  # audio is now a PATH      │
│ )                   │  (path string)   │  # Plugin reads from disk   │
│                     │                  │                             │
│ # Proxy detects     │                  │                             │
│ # FileBackedDTO,    │                  │                             │
│ # calls to_temp_file│                  │                             │
└─────────────────────┘                  └─────────────────────────────┘
```

The `RemotePluginProxy` automatically:
1. Detects `AudioData` implements `FileBackedDTO`
2. Calls `audio.to_temp_file()` to save to disk
3. Sends the file path string to the Worker
4. Worker's `execute()` receives a path, not the AudioData object

## Example Implementation

A minimal transcription plugin that demonstrates the interface:

In [None]:
from typing import Any, Dict, Optional

class ExampleTranscriptionPlugin(TranscriptionPlugin):
    """Example implementation showing how to create a transcription plugin."""
    
    def __init__(self):
        self._config: Dict[str, Any] = {}
        self._model = None

    @property
    def name(self) -> str:
        return "example-transcription"
    
    @property
    def version(self) -> str:
        return "1.0.0"
    
    @property
    def supported_formats(self) -> List[str]:
        return ["wav", "mp3", "flac"]

    def initialize(self, config: Optional[Dict[str, Any]] = None) -> None:
        """Initialize with configuration."""
        self._config = config or {"model": "base"}
        self._model = f"MockModel-{self._config.get('model', 'base')}"

    def execute(
        self,
        audio: Union[AudioData, str, Path],
        **kwargs
    ) -> TranscriptionResult:
        """Transcribe audio (receives file path when called via Proxy)."""
        # In Worker process, audio is typically a string path
        audio_path = str(audio) if not isinstance(audio, AudioData) else "in-memory"
        
        return TranscriptionResult(
            text=f"Transcribed from {audio_path}",
            confidence=0.95,
            segments=[{"start": 0.0, "end": 1.0, "text": "Mock transcription"}],
            metadata={"model": self._config.get("model")}
        )

    def get_config_schema(self) -> Dict[str, Any]:
        """Return JSON Schema for configuration."""
        return {
            "type": "object",
            "properties": {
                "model": {
                    "type": "string",
                    "enum": ["tiny", "base", "small", "medium", "large"],
                    "default": "base"
                },
                "language": {
                    "type": "string",
                    "default": "en"
                }
            }
        }

    def get_current_config(self) -> Dict[str, Any]:
        """Return current configuration."""
        return self._config

    def cleanup(self) -> None:
        """Clean up resources."""
        self._model = None

In [None]:
# Test the example plugin
plugin = ExampleTranscriptionPlugin()
plugin.initialize({"model": "large", "language": "en"})

print(f"Plugin: {plugin.name} v{plugin.version}")
print(f"Supported formats: {plugin.supported_formats}")
print(f"Config schema: {plugin.get_config_schema()}")
print(f"Current config: {plugin.get_current_config()}")

# Test execution with a file path (as Worker would receive)
result = plugin.execute("/tmp/audio.wav")
print(f"\nResult: {result}")

# Cleanup
plugin.cleanup()

Plugin: example-transcription v1.0.0
Supported formats: ['wav', 'mp3', 'flac']
Config schema: {'type': 'object', 'properties': {'model': {'type': 'string', 'enum': ['tiny', 'base', 'small', 'medium', 'large'], 'default': 'base'}, 'language': {'type': 'string', 'default': 'en'}}}
Current config: {'model': 'large', 'language': 'en'}

Result: TranscriptionResult(text='Transcribed from /tmp/audio.wav', confidence=0.95, segments=[{'start': 0.0, 'end': 1.0, 'text': 'Mock transcription'}], metadata={'model': 'large'})


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()