# cjm-transcription-plugin-voxtral-vllm

> Mistral Voxtral plugin for the cjm-transcription-plugin-system library - provides local speech-to-text transcription through vLLM with configurable model selection and parameter control.

## Install

```bash
pip install cjm_transcription_plugin_voxtral_vllm
```

## Project Structure

```
nbs/
└── plugin.ipynb # Plugin implementation for Mistral Voxtral transcription through vLLM server
```

Total: 1 notebook

## Module Dependencies

```mermaid
graph LR
    plugin[plugin<br/>Voxtral VLLM Plugin]

```

No cross-module dependencies detected.

## CLI Reference

No CLI commands found in this project.

## Module Overview

Detailed documentation for each module in the project:

### Voxtral VLLM Plugin (`plugin.ipynb`)
> Plugin implementation for Mistral Voxtral transcription through vLLM server

#### Import

```python
from cjm_transcription_plugin_voxtral_vllm.plugin import (
    VLLMServer,
    VoxtralVLLMPlugin
)
```

#### Functions

```python
@patch
def supports_streaming(
    self: VoxtralVLLMPlugin # The plugin instance
) -> bool: # True if streaming is supported
    "Check if this plugin supports streaming transcription."
```

```python
@patch
def execute_stream(
    self: VoxtralVLLMPlugin, # The plugin instance
    audio: Union[AudioData, str, Path], # Audio data or path to audio file
    **kwargs # Additional plugin-specific parameters
) -> Generator[str, None, TranscriptionResult]: # Yields text chunks, returns final result
    "Stream transcription results chunk by chunk."
```

#### Classes

```python
class VLLMServer:
    def __init__(
        self,
        model: str = "mistralai/Voxtral-Mini-3B-2507", # Model name to serve
        port: int = 8000, # Port for the server
        host: str = "0.0.0.0", # Host address to bind to
        gpu_memory_utilization: float = 0.85, # Fraction of GPU memory to use
        log_level: str = "INFO", # Logging level (DEBUG, INFO, WARNING, ERROR)
        capture_logs: bool = True, # Whether to capture and display server logs
        **kwargs # Additional vLLM server arguments
    )
    "vLLM server manager for Voxtral models."
    
    def __init__(
            self,
            model: str = "mistralai/Voxtral-Mini-3B-2507", # Model name to serve
            port: int = 8000, # Port for the server
            host: str = "0.0.0.0", # Host address to bind to
            gpu_memory_utilization: float = 0.85, # Fraction of GPU memory to use
            log_level: str = "INFO", # Logging level (DEBUG, INFO, WARNING, ERROR)
            capture_logs: bool = True, # Whether to capture and display server logs
            **kwargs # Additional vLLM server arguments
        )
    
    def add_log_callback(
            self, 
            callback: Callable[[str], None] # Function that receives log line strings
        ) -> None: # Returns nothing
        "Add a callback function to receive each log line."
    
    def start(
            self, 
            wait_for_ready: bool = True, # Wait for server to be ready before returning
            timeout: int = 120, # Maximum seconds to wait for server readiness
            show_progress: bool = True # Show progress indicators during startup
        ) -> None: # Returns nothing
        "Start the vLLM server."
    
    def stop(self) -> None: # Returns nothing
            """Stop the vLLM server."""
            if self.process and self.process.poll() is None
        "Stop the vLLM server."
    
    def restart(self) -> None: # Returns nothing
            """Restart the server."""
            self.stop()
            time.sleep(2)
            self.start()
        
        def is_running(self) -> bool: # True if server is running and responsive
        "Restart the server."
    
    def is_running(self) -> bool: # True if server is running and responsive
        "Check if server is running and responsive."
    
    def get_recent_logs(
            self, 
            n: int = 100 # Number of recent log lines to retrieve
        ) -> List[str]: # List of recent log lines
        "Get the most recent n log lines."
    
    def get_metrics_from_logs(self) -> dict: # Dictionary with performance metrics
            """Parse recent logs to extract performance metrics."""
            metrics = {
                "prompt_throughput": 0.0,
        "Parse recent logs to extract performance metrics."
    
    def tail_logs(
            self, 
            follow: bool = True, # Continue displaying new logs as they arrive
            n: int = 10 # Number of initial lines to display
        ) -> None: # Returns nothing
        "Tail the server logs (similar to tail -f)."
```

```python
class VoxtralVLLMPlugin:
    def __init__(self):
        """Initialize the Voxtral VLLM plugin with default configuration."""
        self.logger = logging.getLogger(f"{__name__}.{type(self).__name__}")
        self.config = {}
        self.server: Optional[VLLMServer] = None
    "Mistral Voxtral transcription plugin via vLLM server."
    
    def __init__(self):
            """Initialize the Voxtral VLLM plugin with default configuration."""
            self.logger = logging.getLogger(f"{__name__}.{type(self).__name__}")
            self.config = {}
            self.server: Optional[VLLMServer] = None
        "Initialize the Voxtral VLLM plugin with default configuration."
    
    def name(self) -> str: # The plugin name identifier
            """Get the plugin name identifier."""
            return "voxtral_vllm"
        
        @property
        def version(self) -> str: # The plugin version string
        "Get the plugin name identifier."
    
    def version(self) -> str: # The plugin version string
            """Get the plugin version string."""
            return "1.0.0"
        
        @property
        def supported_formats(self) -> List[str]: # List of supported audio formats
        "Get the plugin version string."
    
    def supported_formats(self) -> List[str]: # List of supported audio formats
            """Get the list of supported audio file formats."""
            return ["wav", "mp3", "flac", "m4a", "ogg", "webm", "mp4", "avi", "mov"]
    
        @staticmethod
        def get_config_schema() -> Dict[str, Any]: # Configuration schema dictionary
        "Get the list of supported audio file formats."
    
    def get_config_schema() -> Dict[str, Any]: # Configuration schema dictionary
            """Return configuration schema for Voxtral VLLM."""
            return {
                "$schema": "http://json-schema.org/draft-07/schema#",
        "Return configuration schema for Voxtral VLLM."
    
    def get_current_config(self) -> Dict[str, Any]: # Current configuration dictionary
            """Return current configuration."""
            defaults = self.get_config_defaults()
            return {**defaults, **self.config}
        
        def initialize(
            self,
            config: Optional[Dict[str, Any]] = None # Configuration dictionary to initialize the plugin
        ) -> None: # Returns nothing
        "Return current configuration."
    
    def initialize(
            self,
            config: Optional[Dict[str, Any]] = None # Configuration dictionary to initialize the plugin
        ) -> None: # Returns nothing
        "Initialize the plugin with configuration."
    
    def execute(
            self,
            audio: Union[AudioData, str, Path], # Audio data or path to audio file to transcribe
            **kwargs # Additional arguments to override config
        ) -> TranscriptionResult: # Transcription result with text and metadata
        "Transcribe audio using Voxtral via vLLM."
    
    def is_available(self) -> bool: # True if vLLM and dependencies are available
            """Check if vLLM and required dependencies are available."""
            if not OPENAI_AVAILABLE
        "Check if vLLM and required dependencies are available."
    
    def cleanup(self) -> None: # Returns nothing
            """Clean up resources."""
            self.logger.info("Cleaning up Voxtral VLLM plugin")
            
            # Stop managed server if running
            if self.config.get("server_mode") == "managed" and self.server
        "Clean up resources."
```
