# Test Plugin Integration

> Test the Voxtral HF plugin with the transcription plugin system

In [1]:
import logging
import json
import numpy as np
from pathlib import Path

from cjm_transcription_plugin_system.plugin_manager import PluginManager
from cjm_transcription_plugin_system.core import AudioData
from cjm_transcription_plugin_voxtral_hf.plugin import VoxtralHFPlugin

## Test Direct Plugin Usage

In [2]:
# Create plugin directly
plugin = VoxtralHFPlugin()

# Check basic properties
print(f"Plugin: {plugin.name} v{plugin.version}")
print(f"Available: {plugin.is_available()}")
print(f"Supported formats: {', '.join(plugin.supported_formats)}")
print(f"Supports streaming: {plugin.supports_streaming()}")

Plugin: voxtral_hf v1.0.0
Available: True
Supported formats: wav, mp3, flac, m4a, ogg, webm, mp4, avi, mov
Supports streaming: True


In [3]:
# Get configuration schema
schema = plugin.get_config_schema()
print("Configuration options:")
print(f"- Required: {schema.get('required', [])}")
print(f"- Total properties: {len(schema['properties'])}")
print("\nAvailable models:")
for model in schema['properties']['model_id']['enum']:
    print(f"  - {model}")

Configuration options:
- Required: ['model_id']
- Total properties: 14

Available models:
  - mistralai/Voxtral-Mini-3B-2507
  - mistralai/Voxtral-Small-24B-2507


## Test with Plugin Manager

In [4]:
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(name)s - %(levelname)s - %(message)s')

# Create plugin manager
manager = PluginManager()

In [5]:
# Option 1: Load plugin from module directly (for development)
# This works even without the package being installed
import sys
import os

# Add parent directory to path to import the plugin module
parent_dir = Path.cwd().parent
if str(parent_dir) not in sys.path:
    sys.path.insert(0, str(parent_dir))

# Import and register the plugin
from cjm_transcription_plugin_voxtral_hf.plugin import VoxtralHFPlugin

# Create a temporary module file for the plugin manager to load
temp_plugin_file = Path("temp_voxtral_plugin.py")
with open(temp_plugin_file, "w") as f:
    f.write("from cjm_transcription_plugin_voxtral_hf.plugin import VoxtralHFPlugin\n")

# Load the plugin
success = manager.load_plugin_from_module(
    str(temp_plugin_file),
    config={"model_id": "mistralai/Voxtral-Mini-3B-2507", "device": "cpu"}  # Use Mini model for testing
)

print(f"Plugin loaded: {success}")

# Clean up temp file
temp_plugin_file.unlink()

cjm_transcription_plugin_voxtral_hf.plugin.VoxtralHFPlugin - INFO - Initialized Voxtral HF plugin with model 'mistralai/Voxtral-Mini-3B-2507' on device 'cpu' with dtype 'torch.float32'
cjm_transcription_plugin_system.plugin_manager.PluginManager - INFO - Loaded plugin from module: voxtral_hf


Plugin loaded: True


In [6]:
# List loaded plugins
print("Loaded plugins:")
for meta in manager.list_plugins():
    print(f"  - {meta.name} v{meta.version} (enabled: {meta.enabled})")

Loaded plugins:
  - voxtral_hf v1.0.0 (enabled: True)


In [7]:
# Get plugin configuration
current_config = manager.get_plugin_config("voxtral_hf")
print("Current Voxtral configuration:")
config_subset = {k: current_config[k] for k in ["model_id", "device", "language", "dtype"] if k in current_config}
print(json.dumps(config_subset, indent=2))

Current Voxtral configuration:
{
  "model_id": "mistralai/Voxtral-Mini-3B-2507",
  "device": "cpu",
  "language": "en",
  "dtype": "auto"
}


## Test Configuration Management

In [8]:
# Validate different configurations
test_configs = [
    ({"model_id": "mistralai/Voxtral-Small-24B-2507"}, "Valid: switching to Small model"),
    ({"model_id": "invalid_model"}, "Invalid: bad model name"),
    ({"temperature": 1.5}, "Valid: adjusting temperature"),
    ({"temperature": 3.0}, "Invalid: temperature out of range"),
]

for config, description in test_configs:
    is_valid, error = manager.validate_plugin_config("voxtral_hf", config)
    print(f"{description}")
    print(f"  Config: {config}")
    print(f"  Valid: {is_valid}")
    if error:
        print(f"  Error: {error[:100]}...")
    print()

Valid: switching to Small model
  Config: {'model_id': 'mistralai/Voxtral-Small-24B-2507'}
  Valid: True

Invalid: bad model name
  Config: {'model_id': 'invalid_model'}
  Valid: False
  Error: 'invalid_model' is not one of ['mistralai/Voxtral-Mini-3B-2507', 'mistralai/Voxtral-Small-24B-2507']...

Valid: adjusting temperature
  Config: {'temperature': 1.5}
  Valid: False
  Error: 'model_id' is a required property

Failed validating 'required' in schema:
    {'$schema': 'http://j...

Invalid: temperature out of range
  Config: {'temperature': 3.0}
  Valid: False
  Error: 'model_id' is a required property

Failed validating 'required' in schema:
    {'$schema': 'http://j...



In [9]:
# Update configuration
new_config = {
    "temperature": 0.8,
    "language": "en",
    "do_sample": False,
    "max_new_tokens": 10000
}

success = manager.update_plugin_config("voxtral_hf", new_config, merge=True)
print(f"Configuration updated: {success}")

if success:
    updated_config = manager.get_plugin_config("voxtral_hf")
    print("\nUpdated configuration:")
    for key in new_config:
        print(f"  {key}: {updated_config[key]}")

cjm_transcription_plugin_voxtral_hf.plugin.VoxtralHFPlugin - INFO - Initialized Voxtral HF plugin with model 'mistralai/Voxtral-Mini-3B-2507' on device 'cpu' with dtype 'torch.float32'
cjm_transcription_plugin_system.plugin_manager.PluginManager - INFO - Updated configuration for plugin: voxtral_hf


Configuration updated: True

Updated configuration:
  temperature: 0.8
  language: en
  do_sample: False
  max_new_tokens: 10000


## Test Transcription

In [10]:
from nbdev.config import get_config
from pathlib import Path

config = get_config()
project_dir = config.config_path
test_dir = project_dir/"./test_files/"
audio_path = test_dir/"short_test_audio.mp3"
assert audio_path.exists()

In [11]:
print(f"Transcribing: {audio_path}")
result = manager.execute_plugin("voxtral_hf", audio_path)
print("Transcription result:")
print(f"  Text: {result.text}")
print(f"  Metadata: {result.metadata}")

cjm_transcription_plugin_voxtral_hf.plugin.VoxtralHFPlugin - INFO - Loading Voxtral model: mistralai/Voxtral-Mini-3B-2507


Transcribing: /mnt/SN850X_8TB_EXT4/Projects/GitHub/cj-mills/cjm-transcription-plugin-voxtral-hf/test_files/short_test_audio.mp3


Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

mistral_common.tokens.tokenizers.tekken - INFO - Vocab size: 150000
mistral_common.tokens.tokenizers.tekken - INFO - Cutting vocab to first 130072 tokens.
mistral_common.tokens.tokenizers.tekken - INFO - Vocab size: 150000
mistral_common.tokens.tokenizers.tekken - INFO - Cutting vocab to first 130072 tokens.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

cjm_transcription_plugin_voxtral_hf.plugin.VoxtralHFPlugin - INFO - Voxtral model loaded successfully
cjm_transcription_plugin_voxtral_hf.plugin.VoxtralHFPlugin - INFO - Processing audio with Voxtral mistralai/Voxtral-Mini-3B-2507
cjm_transcription_plugin_voxtral_hf.plugin.VoxtralHFPlugin - INFO - Transcription completed: 43 words


Transcription result:
  Text: November the 10th, Wednesday, 9 p.m. I'm standing in a dark alley. After waiting several hours, the time has come. A woman with long dark hair approaches. I have to act and fast before she realizes what has happened. I must find out.
  Metadata: {'model': 'mistralai/Voxtral-Mini-3B-2507', 'language': 'en', 'device': 'cpu', 'dtype': 'torch.float32'}


## Test Streaming Support

In [12]:
# Check streaming support
print(f"Plugin supports streaming: {manager.check_streaming_support('voxtral_hf')}")
print(f"All streaming plugins: {manager.get_streaming_plugins()}")

Plugin supports streaming: True
All streaming plugins: ['voxtral_hf']


In [13]:
# Test streaming transcription
print("Testing streaming transcription:")
print("Streaming output: ", end="")
for chunk in manager.execute_plugin_stream("voxtral_hf", audio_path):
    print(chunk, end="", flush=True)
print("\n\nStreaming completed!")

cjm_transcription_plugin_system.plugin_manager.PluginManager - INFO - Using streaming mode for plugin voxtral_hf
cjm_transcription_plugin_voxtral_hf.plugin.VoxtralHFPlugin - INFO - Streaming transcription with Voxtral mistralai/Voxtral-Mini-3B-2507


Testing streaming transcription:
Streaming output: November the 10th, Wednesday, 9 p.m. I'm standing in a dark alley. After waiting several hours, the time has come. A woman with long dark hair approaches. I have to act and fast before she realizes what has happened. I must find out.

Streaming completed!


## Test Plugin Lifecycle

In [14]:
# Test disabling and enabling
print("Testing plugin lifecycle:")

# Disable plugin
manager.disable_plugin("voxtral_hf")
print(f"Plugin disabled: {not manager.plugins['voxtral_hf'].enabled}")

# Try to execute while disabled (should fail)
try:
    manager.execute_plugin("voxtral_hf", audio_path)
except ValueError as e:
    print(f"Expected error: {e}")

# Re-enable plugin
manager.enable_plugin("voxtral_hf")
print(f"Plugin re-enabled: {manager.plugins['voxtral_hf'].enabled}")

Testing plugin lifecycle:
Plugin disabled: True
Expected error: Plugin voxtral_hf is disabled
Plugin re-enabled: True


In [15]:
# Clean up
print("\nCleaning up...")
manager.unload_plugin("voxtral_hf")
print(f"Plugins loaded: {len(manager.list_plugins())}")

cjm_transcription_plugin_voxtral_hf.plugin.VoxtralHFPlugin - INFO - Unloading Voxtral model



Cleaning up...


cjm_transcription_plugin_voxtral_hf.plugin.VoxtralHFPlugin - INFO - Cleanup completed
cjm_transcription_plugin_system.plugin_manager.PluginManager - INFO - Unloaded plugin: voxtral_hf


Plugins loaded: 0


## Test Entry Point Discovery (After Installation)

In [16]:
# This will only work after the package is installed with pip install -e .
# or pip install cjm-transcription-plugin-voxtral-hf

print("Testing entry point discovery:")
manager2 = PluginManager()

# Discover plugins via entry points
discovered = manager2.discover_plugins()
print(f"\nDiscovered {len(discovered)} plugin(s) via entry points:")
for plugin_meta in discovered:
    print(f"  - {plugin_meta.name} v{plugin_meta.version} from {plugin_meta.package_name}")

# Load discovered plugins
for plugin_meta in discovered:
    if plugin_meta.name == "voxtral_hf":
        success = manager2.load_plugin(plugin_meta, config={"model_id": "mistralai/Voxtral-Mini-3B-2507"})
        print(f"\nLoaded {plugin_meta.name}: {success}")

cjm_transcription_plugin_system.plugin_manager.PluginManager - INFO - Discovered plugin: voxtral_hf v0.0.1 from package cjm-transcription-plugin-voxtral-hf
cjm_transcription_plugin_voxtral_hf.plugin.VoxtralHFPlugin - INFO - Initialized Voxtral HF plugin with model 'mistralai/Voxtral-Mini-3B-2507' on device 'cuda' with dtype 'torch.bfloat16'
cjm_transcription_plugin_system.plugin_manager.PluginManager - INFO - Loaded plugin: voxtral_hf


Testing entry point discovery:

Discovered 1 plugin(s) via entry points:
  - voxtral_hf v0.0.1 from cjm-transcription-plugin-voxtral-hf

Loaded voxtral_hf: True


In [17]:
# Test with larger model if desired
# manager2.update_plugin_config('voxtral_hf', config={"model_id": "mistralai/Voxtral-Small-24B-2507", "device": "cuda"})
# manager2.get_plugin_config('voxtral_hf')

In [18]:
# Test transcription with discovered plugin
if len(discovered) > 0 and "voxtral_hf" in [p.name for p in discovered]:
    print(f"Transcribing: {audio_path}")
    result = manager2.execute_plugin("voxtral_hf", audio_path)
    print("Transcription result:")
    print(f"  Text: {result.text}")
    print(f"  Metadata: {result.metadata}")
    
    # Clean up
    manager2.get_plugin('voxtral_hf').cleanup()

cjm_transcription_plugin_voxtral_hf.plugin.VoxtralHFPlugin - INFO - Loading Voxtral model: mistralai/Voxtral-Mini-3B-2507


Transcribing: /mnt/SN850X_8TB_EXT4/Projects/GitHub/cj-mills/cjm-transcription-plugin-voxtral-hf/test_files/short_test_audio.mp3


Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

mistral_common.tokens.tokenizers.tekken - INFO - Vocab size: 150000
mistral_common.tokens.tokenizers.tekken - INFO - Cutting vocab to first 130072 tokens.
mistral_common.tokens.tokenizers.tekken - INFO - Vocab size: 150000
mistral_common.tokens.tokenizers.tekken - INFO - Cutting vocab to first 130072 tokens.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

cjm_transcription_plugin_voxtral_hf.plugin.VoxtralHFPlugin - INFO - Voxtral model loaded successfully
cjm_transcription_plugin_voxtral_hf.plugin.VoxtralHFPlugin - INFO - Processing audio with Voxtral mistralai/Voxtral-Mini-3B-2507
cjm_transcription_plugin_voxtral_hf.plugin.VoxtralHFPlugin - INFO - Transcription completed: 43 words
cjm_transcription_plugin_voxtral_hf.plugin.VoxtralHFPlugin - INFO - Unloading Voxtral model
cjm_transcription_plugin_voxtral_hf.plugin.VoxtralHFPlugin - INFO - Cleanup completed


Transcription result:
  Text: November the 10th, Wednesday, 9 p.m. I'm standing in a dark alley. After waiting several hours, the time has come. A woman with long dark hair approaches. I have to act and fast before she realises what has happened. I must find out.
  Metadata: {'model': 'mistralai/Voxtral-Mini-3B-2507', 'language': 'en', 'device': 'cuda', 'dtype': 'torch.bfloat16'}
