# Transcription Plugin Interface

> Domain-specific plugin interface for audio transcription plugins

In [None]:
#| default_exp plugin_interface

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
import logging
from typing import Optional, Dict, Any, Union, List, Tuple, Generator, Type
from pathlib import Path

# Import generic plugin infrastructure from cjm-plugin-system
from cjm_plugin_system.core.interface import PluginInterface
from cjm_plugin_system.core.metadata import PluginMeta
from cjm_plugin_system.utils.validation import (
    dict_to_config, config_to_dict, validate_config,
    SCHEMA_TITLE, SCHEMA_DESC, SCHEMA_MIN, SCHEMA_MAX, SCHEMA_ENUM
)

# Import domain-specific types
from cjm_transcription_plugin_system.core import AudioData, TranscriptionResult

In [None]:
#| export
class TranscriptionPlugin(PluginInterface):
    """Transcription-specific plugin interface.
    
    This extends the generic PluginInterface with transcription-specific
    requirements like supported audio formats and the execute signature.
    
    All transcription plugins must implement this interface.
    """

    entry_point_group = "transcription.plugins"
    
    @property
    @abstractmethod
    def supported_formats(
        self
    ) -> List[str]:  # List of file extensions without the dot (e.g., ['wav', 'mp3', 'flac'])
        """List of supported audio formats."""
        pass
    
    @abstractmethod
    def execute(
        self,
        audio: Union[AudioData, str, Path],  # Audio data (AudioData object), file path (str), or Path object
        **kwargs  # Additional plugin-specific parameters (e.g., language, model)
    ) -> TranscriptionResult:  # Transcription result with text, confidence, segments, and metadata
        """Transcribe audio to text."""
        pass

## TranscriptionPlugin Interface

The `TranscriptionPlugin` interface extends the generic `PluginInterface` from `cjm-plugin-system` with transcription-specific requirements:

- **`supported_formats`** property: List of audio file formats this plugin can handle
- **`execute`** method signature: Takes audio input and returns `TranscriptionResult`

All generic plugin functionality (dataclass-based configuration, validation, streaming support, etc.) is inherited from the base `PluginInterface` class.

Plugins define their configuration using dataclasses with field metadata for validation constraints.

In [None]:
# Test PluginMeta dataclass
meta = PluginMeta(
    name="test_plugin",
    version="1.0.0",
    description="A test plugin",
    author="Test Author"
)

print("PluginMeta instance:")
print(meta)
print(f"\nName: {meta.name}")
print(f"Version: {meta.version}")
print(f"Enabled: {meta.enabled}")
print(f"Instance: {meta.instance}")

# Test with minimal arguments
minimal_meta = PluginMeta(name="minimal", version="0.1.0")
print(f"\nMinimal PluginMeta: {minimal_meta}")

# Test equality
meta_copy = PluginMeta(name="minimal", version="0.1.0")
print(f"Equality test: {minimal_meta == meta_copy}")

PluginMeta instance:
PluginMeta(name='test_plugin', version='1.0.0', description='A test plugin', author='Test Author', package_name='', instance=None, enabled=True)

Name: test_plugin
Version: 1.0.0
Enabled: True
Instance: None

Minimal PluginMeta: PluginMeta(name='minimal', version='0.1.0', description='', author='', package_name='', instance=None, enabled=True)
Equality test: True


## Testing ExamplePlugin

Let's test a simple transcription plugin implementation.

In [None]:
# Define configuration dataclass for the example plugin
@dataclass
class ExamplePluginConfig:
    """Configuration for ExamplePlugin."""
    model:str = field(
        default="base",
        metadata={
            SCHEMA_TITLE: "Model",
            SCHEMA_DESC: "Model size to use for transcription",
            SCHEMA_ENUM: ["tiny", "base", "small", "medium", "large"]
        }
    )
    language:str = field(
        default="auto",
        metadata={
            SCHEMA_TITLE: "Language",
            SCHEMA_DESC: "Language code (e.g., 'en', 'es') or 'auto' for detection"
        }
    )
    batch_size:int = field(
        default=8,
        metadata={
            SCHEMA_TITLE: "Batch Size",
            SCHEMA_DESC: "Batch size for processing",
            SCHEMA_MIN: 1,
            SCHEMA_MAX: 32
        }
    )
    enable_vad:bool = field(
        default=True,
        metadata={
            SCHEMA_TITLE: "Enable VAD",
            SCHEMA_DESC: "Enable voice activity detection"
        }
    )

class ExamplePlugin(TranscriptionPlugin):
    """An example transcription plugin implementation with dataclass configuration."""
    
    config_class = ExamplePluginConfig

    def __init__(self):
        self.logger = logging.getLogger(f"{__name__}.{type(self).__name__}")
        self.config: ExamplePluginConfig = None
        self.model = None
    
    @property
    def name(self) -> str: # Plugin name identifier
        return "example_plugin"
    
    @property
    def version(self) -> str: # Plugin version string
        return "1.0.0"

    @property
    def supported_formats(self) -> List[str]: # List of supported audio file extensions
        return ["wav", "mp3", "flac"]
    
    def get_current_config(self) -> ExamplePluginConfig: # Current configuration dataclass
        """Return the current configuration."""
        return self.config
    
    def initialize(
        self, 
        config: Optional[Any] = None # Configuration dataclass, dict, or None
    ) -> None:
        """Initialize the plugin."""
        if config is None:
            self.config = ExamplePluginConfig()
        elif isinstance(config, ExamplePluginConfig):
            self.config = config
        elif isinstance(config, dict):
            self.config = dict_to_config(ExamplePluginConfig, config, validate=True)
        else:
            raise TypeError(f"Expected ExamplePluginConfig, dict, or None, got {type(config).__name__}")
        
        self.logger.info(f"Initializing {self.name} with config: {self.config}")
        
        # Simulate loading a model based on config
        self.model = f"MockModel-{self.config.model}"
    
    def execute(
        self,
        audio: Union[AudioData, str, Path], # Audio data or path to audio file
        **kwargs # Additional plugin-specific parameters
    ) -> TranscriptionResult: # Transcription result with text and metadata
        """Execute the plugin's functionality."""
        self.logger.info(f"Example plugin executed with model: {self.model}")
        self.logger.info(f"Config: {self.config}")
        
        # Mock transcription result
        return TranscriptionResult(
            text=f"Transcription using {self.model}",
            confidence=0.95,
            segments=[],
            metadata={"model": self.config.model}
        )

    def is_available(self) -> bool: # True if plugin dependencies are available
        """Check availability."""
        return True
    
    def cleanup(self) -> None:
        """Clean up resources."""
        self.logger.info(f"Cleaning up {self.name}")
        self.model = None

In [None]:
#| eval: false
logging.basicConfig(level=logging.INFO)

example_plugin = ExamplePlugin()
example_plugin.initialize()
transcription_result = example_plugin.execute("test_audio.mp3")
example_plugin.cleanup()
print(f"Result text: {transcription_result.text}")
print(f"Result confidence: {transcription_result.confidence}")

INFO:__main__.ExamplePlugin:Initializing example_plugin with config: ExamplePluginConfig(model='base', language='auto', batch_size=8, enable_vad=True)
INFO:__main__.ExamplePlugin:Example plugin executed with model: MockModel-base
INFO:__main__.ExamplePlugin:Config: ExamplePluginConfig(model='base', language='auto', batch_size=8, enable_vad=True)
INFO:__main__.ExamplePlugin:Cleaning up example_plugin


Result text: Transcription using MockModel-base
Result confidence: 0.95


In [None]:
# Test the dataclass configuration functionality
plugin = ExamplePlugin()

# Get the configuration class
print("Configuration Class:", plugin.config_class.__name__)

# Get default configuration
defaults = plugin.get_config_defaults()
print("\nDefault Configuration:")
for k, v in defaults.items():
    print(f"  {k}: {v!r}")

print("\n" + "="*50 + "\n")

# Initialize with partial config (using defaults for missing values)
plugin.initialize({"model": "small", "language": "en"})
current = plugin.get_current_config()
print("Current Configuration after initialization:")
print(f"  model: {current.model}")
print(f"  language: {current.language}")
print(f"  batch_size: {current.batch_size}")
print(f"  enable_vad: {current.enable_vad}")

print("\n" + "="*50 + "\n")

# Test configuration validation
print("Configuration Validation Tests:")

# Valid config
try:
    plugin.initialize({"model": "tiny"})
    print("✓ Valid config with model='tiny' accepted")
except ValueError as e:
    print(f"✗ Unexpected error: {e}")

# Invalid model name
try:
    plugin.initialize({"model": "invalid_model"})
    print("✗ Should have rejected invalid model")
except ValueError as e:
    print(f"✓ Invalid model rejected: {e}")

# Batch size exceeds maximum
try:
    plugin.initialize({"model": "base", "batch_size": 100})
    print("✗ Should have rejected batch_size > 32")
except ValueError as e:
    print(f"✓ Batch size > max rejected: {e}")

INFO:__main__.ExamplePlugin:Initializing example_plugin with config: ExamplePluginConfig(model='small', language='en', batch_size=8, enable_vad=True)
INFO:__main__.ExamplePlugin:Initializing example_plugin with config: ExamplePluginConfig(model='tiny', language='auto', batch_size=8, enable_vad=True)


Configuration Class: ExamplePluginConfig

Default Configuration:
  model: 'base'
  language: 'auto'
  batch_size: 8
  enable_vad: True


Current Configuration after initialization:
  model: small
  language: en
  batch_size: 8
  enable_vad: True


Configuration Validation Tests:
✓ Valid config with model='tiny' accepted
✓ Invalid model rejected: model: 'invalid_model' is not one of ['tiny', 'base', 'small', 'medium', 'large']
✓ Batch size > max rejected: batch_size: 100 is greater than maximum 32


## Example: Whisper Plugin Implementation

In [None]:
# Define comprehensive Whisper configuration dataclass
@dataclass
class WhisperPluginConfig:
    """Comprehensive configuration for Whisper transcription plugin."""
    model:str = field(
        default="base",
        metadata={
            SCHEMA_TITLE: "Model",
            SCHEMA_DESC: "Whisper model size. Larger models are more accurate but slower.",
            SCHEMA_ENUM: ["tiny", "tiny.en", "base", "base.en", "small", "small.en", 
                         "medium", "medium.en", "large", "large-v1", "large-v2", "large-v3"]
        }
    )
    device:str = field(
        default="auto",
        metadata={
            SCHEMA_TITLE: "Device",
            SCHEMA_DESC: "Computation device for inference",
            SCHEMA_ENUM: ["cpu", "cuda", "mps", "auto"]
        }
    )
    compute_type:str = field(
        default="default",
        metadata={
            SCHEMA_TITLE: "Compute Type",
            SCHEMA_DESC: "Model precision/quantization",
            SCHEMA_ENUM: ["default", "float16", "float32", "int8", "int8_float16"]
        }
    )
    language:Optional[str] = field(
        default=None,
        metadata={
            SCHEMA_TITLE: "Language",
            SCHEMA_DESC: "Language code (e.g., 'en', 'es', 'fr') or None for auto-detection"
        }
    )
    task:str = field(
        default="transcribe",
        metadata={
            SCHEMA_TITLE: "Task",
            SCHEMA_DESC: "Task to perform",
            SCHEMA_ENUM: ["transcribe", "translate"]
        }
    )
    temperature:float = field(
        default=0.0,
        metadata={
            SCHEMA_TITLE: "Temperature",
            SCHEMA_DESC: "Sampling temperature. 0 for deterministic.",
            SCHEMA_MIN: 0.0,
            SCHEMA_MAX: 1.0
        }
    )
    beam_size:int = field(
        default=5,
        metadata={
            SCHEMA_TITLE: "Beam Size",
            SCHEMA_DESC: "Beam search width.",
            SCHEMA_MIN: 1,
            SCHEMA_MAX: 10
        }
    )
    word_timestamps:bool = field(
        default=False,
        metadata={
            SCHEMA_TITLE: "Word Timestamps",
            SCHEMA_DESC: "Extract word-level timestamps"
        }
    )
    vad_filter:bool = field(
        default=False,
        metadata={
            SCHEMA_TITLE: "VAD Filter",
            SCHEMA_DESC: "Enable voice activity detection filter"
        }
    )

class WhisperPlugin(TranscriptionPlugin):
    """Example Whisper transcription plugin with dataclass configuration."""
    
    config_class = WhisperPluginConfig
    
    def __init__(self):
        self.logger = logging.getLogger(f"{__name__}.{type(self).__name__}")
        self.config: WhisperPluginConfig = None
        self.model = None
        self.processor = None
    
    @property
    def name(self) -> str: # Plugin name identifier
        return "whisper"
    
    @property
    def version(self) -> str: # Plugin version string
        return "1.0.0"
    
    @property
    def supported_formats(self) -> List[str]: # List of supported audio file extensions
        return ["wav", "mp3", "flac", "m4a", "ogg", "webm"]
    
    def get_current_config(self) -> WhisperPluginConfig: # Current configuration dataclass
        """Return current configuration."""
        return self.config
    
    def initialize(
        self, 
        config: Optional[Any] = None # Configuration dataclass, dict, or None
    ) -> None:
        """Initialize the Whisper model with configuration."""
        if config is None:
            self.config = WhisperPluginConfig()
        elif isinstance(config, WhisperPluginConfig):
            self.config = config
        elif isinstance(config, dict):
            self.config = dict_to_config(WhisperPluginConfig, config, validate=True)
        else:
            raise TypeError(f"Expected WhisperPluginConfig, dict, or None, got {type(config).__name__}")
        
        self.logger.info(f"Initializing Whisper with config: {self.config}")
        
        # Mock implementation
        self.model = f"WhisperModel-{self.config.model}"
        self.processor = f"WhisperProcessor-{self.config.device}"
    
    def execute(
        self,
        audio: Union[AudioData, str, Path], # Audio data or path to audio file
        **kwargs # Additional plugin-specific parameters
    ) -> TranscriptionResult: # Transcription result with text, confidence, segments, and metadata
        """Transcribe audio using Whisper."""
        if not self.model:
            raise RuntimeError("Plugin not initialized. Call initialize() first.")
        
        self.logger.info(f"Transcribing with Whisper model: {self.model}")
        
        # Mock transcription result
        return TranscriptionResult(
            text=f"Mock transcription using {self.config.model} model",
            confidence=0.95,
            segments=[
                {"start": 0.0, "end": 2.5, "text": "Mock transcription", "confidence": 0.96},
                {"start": 2.5, "end": 5.0, "text": f"using {self.config.model} model", "confidence": 0.94}
            ],
            metadata={
                "model": self.config.model,
                "language": self.config.language or "auto-detected",
                "device": self.config.device,
                "task": self.config.task
            }
        )
    
    def is_available(self) -> bool: # True if Whisper dependencies are available
        """Check if Whisper dependencies are available."""
        return True  # Mock always available
    
    def cleanup(self) -> None:
        """Clean up model from memory."""
        self.logger.info("Cleaning up Whisper model")
        self.model = None
        self.processor = None

In [None]:
# Test the Whisper plugin with dataclass config
whisper_plugin = WhisperPlugin()

# Get config class info
print("Whisper Configuration:")
print(f"Config class: {whisper_plugin.config_class.__name__}")
print(f"Available models: {WhisperPluginConfig.__dataclass_fields__['model'].metadata.get(SCHEMA_ENUM)}")
print(f"Available devices: {WhisperPluginConfig.__dataclass_fields__['device'].metadata.get(SCHEMA_ENUM)}")

print("\n" + "="*50 + "\n")

# Test initialization with different configurations
configs_to_test = [
    {"model": "tiny"},
    {"model": "large-v3", "device": "cuda", "language": "en"},
    {"model": "base", "temperature": 0.2, "beam_size": 3, "word_timestamps": True}
]

for config in configs_to_test:
    print(f"Initializing with config: {config}")
    whisper_plugin.initialize(config)
    current = whisper_plugin.get_current_config()
    print(f"  model: {current.model}")
    print(f"  device: {current.device}")
    print(f"  word_timestamps: {current.word_timestamps}")
    
    # Execute transcription
    result = whisper_plugin.execute("dummy_audio.wav")
    print(f"  Result: {result.text}")
    print("-" * 30)

INFO:__main__.WhisperPlugin:Initializing Whisper with config: WhisperPluginConfig(model='tiny', device='auto', compute_type='default', language=None, task='transcribe', temperature=0.0, beam_size=5, word_timestamps=False, vad_filter=False)
INFO:__main__.WhisperPlugin:Transcribing with Whisper model: WhisperModel-tiny
INFO:__main__.WhisperPlugin:Initializing Whisper with config: WhisperPluginConfig(model='large-v3', device='cuda', compute_type='default', language='en', task='transcribe', temperature=0.0, beam_size=5, word_timestamps=False, vad_filter=False)
INFO:__main__.WhisperPlugin:Transcribing with Whisper model: WhisperModel-large-v3
INFO:__main__.WhisperPlugin:Initializing Whisper with config: WhisperPluginConfig(model='base', device='auto', compute_type='default', language=None, task='transcribe', temperature=0.2, beam_size=3, word_timestamps=True, vad_filter=False)
INFO:__main__.WhisperPlugin:Transcribing with Whisper model: WhisperModel-base


Whisper Configuration:
Config class: WhisperPluginConfig
Available models: ['tiny', 'tiny.en', 'base', 'base.en', 'small', 'small.en', 'medium', 'medium.en', 'large', 'large-v1', 'large-v2', 'large-v3']
Available devices: ['cpu', 'cuda', 'mps', 'auto']


Initializing with config: {'model': 'tiny'}
  model: tiny
  device: auto
  word_timestamps: False
  Result: Mock transcription using tiny model
------------------------------
Initializing with config: {'model': 'large-v3', 'device': 'cuda', 'language': 'en'}
  model: large-v3
  device: cuda
  word_timestamps: False
  Result: Mock transcription using large-v3 model
------------------------------
Initializing with config: {'model': 'base', 'temperature': 0.2, 'beam_size': 3, 'word_timestamps': True}
  model: base
  device: auto
  word_timestamps: True
  Result: Mock transcription using base model
------------------------------


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()