# Test Plugin Integration

> Test the Gemini plugin with the transcription plugin system

In [1]:
import os
import logging
import json
import numpy as np
from pathlib import Path

from cjm_transcription_plugin_system.plugin_manager import PluginManager
from cjm_transcription_plugin_system.core import AudioData
from cjm_transcription_plugin_gemini.plugin import GeminiPlugin

## Test Direct Plugin Usage

In [2]:
# Create plugin directly
plugin = GeminiPlugin()

# Check basic properties
print(f"Plugin: {plugin.name} v{plugin.version}")
print(f"Available: {plugin.is_available()}")
print(f"Supported formats: {', '.join(plugin.supported_formats)}")

Plugin: gemini v1.0.0
Available: True
Supported formats: wav, mp3, aiff, aac, ogg, flac


In [3]:
# Get configuration schema
schema = plugin.get_config_schema()
print("Configuration options:")
print(f"- Required: {schema.get('required', [])}")
print(f"- Total properties: {len(schema['properties'])}")
print("\nKey configuration properties:")
for prop in ['model', 'temperature', 'downsample_audio', 'prompt']:
    prop_schema = schema['properties'][prop]
    print(f"  {prop}:")
    print(f"    Type: {prop_schema.get('type')}")
    print(f"    Default: {prop_schema.get('default')}")
    print(f"    Description: {prop_schema.get('description')}")

Configuration options:
- Required: ['model']
- Total properties: 14

Key configuration properties:
  model:
    Type: string
    Default: gemini-2.5-flash
    Description: Gemini model to use for transcription
  temperature:
    Type: number
    Default: 0.0
    Description: Sampling temperature
  downsample_audio:
    Type: boolean
    Default: False
    Description: Downsample audio before uploading (requires ffmpeg)
  prompt:
    Type: string
    Default: Generate a transcription of the audio, only extract speech and ignore background audio.
    Description: Prompt for transcription


## Test with Plugin Manager

In [4]:
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(name)s - %(levelname)s - %(message)s')

# Create plugin manager
manager = PluginManager()

In [5]:
# Load plugin from module (for development)
import sys
parent_dir = Path.cwd().parent
if str(parent_dir) not in sys.path:
    sys.path.insert(0, str(parent_dir))

# Create a temporary module file
temp_plugin_file = Path("temp_gemini_plugin.py")
with open(temp_plugin_file, "w") as f:
    f.write("from cjm_transcription_plugin_gemini.plugin import GeminiPlugin\n")

# Load the plugin
# Note: API key must be set in environment or config
config = {"model": "gemini-2.5-flash"}
if os.environ.get("GEMINI_API_KEY"):
    success = manager.load_plugin_from_module(str(temp_plugin_file), config=config)
    print(f"Plugin loaded: {success}")
else:
    print("Set GEMINI_API_KEY environment variable to load the plugin")
    success = False

# Clean up temp file
temp_plugin_file.unlink()

httpx - INFO - HTTP Request: GET https://generativelanguage.googleapis.com/v1beta/models "HTTP/1.1 200 OK"
httpx - INFO - HTTP Request: GET https://generativelanguage.googleapis.com/v1beta/models?pageToken=Ch5tb2RlbHMvaW1hZ2VuLTMuMC1nZW5lcmF0ZS0wMDI%3D "HTTP/1.1 200 OK"
cjm_transcription_plugin_gemini.plugin.GeminiPlugin - INFO - Found 36 audio-capable models
cjm_transcription_plugin_gemini.plugin.GeminiPlugin - INFO - Updated max_output_tokens to 65536 for model 'gemini-2.5-flash'
cjm_transcription_plugin_gemini.plugin.GeminiPlugin - INFO - Initialized Gemini plugin with model 'gemini-2.5-flash'
cjm_transcription_plugin_system.plugin_manager.PluginManager - INFO - Loaded plugin from module: gemini


Plugin loaded: True


In [6]:
if success:
    # List loaded plugins
    print("Loaded plugins:")
    for meta in manager.list_plugins():
        print(f"  - {meta.name} v{meta.version} (enabled: {meta.enabled})")
    
    # Get available models
    gemini_plugin = manager.get_plugin("gemini")
    if hasattr(gemini_plugin, 'get_available_models'):
        models = gemini_plugin.get_available_models()
        print(f"\nAvailable models ({len(models)} total):")
        for model in models[:10]:  # Show first 10
            print(f"  - {model}")

httpx - INFO - HTTP Request: GET https://generativelanguage.googleapis.com/v1beta/models "HTTP/1.1 200 OK"
httpx - INFO - HTTP Request: GET https://generativelanguage.googleapis.com/v1beta/models?pageToken=Ch5tb2RlbHMvaW1hZ2VuLTMuMC1nZW5lcmF0ZS0wMDI%3D "HTTP/1.1 200 OK"
cjm_transcription_plugin_gemini.plugin.GeminiPlugin - INFO - Found 36 audio-capable models


Loaded plugins:
  - gemini v1.0.0 (enabled: True)

Available models (36 total):
  - gemma-3n-e4b-it
  - gemma-3n-e2b-it
  - gemma-3-4b-it
  - gemma-3-27b-it
  - gemma-3-1b-it
  - gemma-3-12b-it
  - gemini-exp-1206
  - gemini-2.5-pro-preview-06-05
  - gemini-2.5-pro-preview-05-06
  - gemini-2.5-pro-preview-03-25


In [7]:
if success:
    # Get current configuration
    current_config = manager.get_plugin_config("gemini")
    print("Current Gemini configuration:")
    config_subset = {
        k: current_config[k] 
        for k in ["model", "temperature", "top_p", "downsample_audio", "safety_settings"] 
        if k in current_config
    }
    print(json.dumps(config_subset, indent=2))

Current Gemini configuration:
{
  "model": "gemini-2.5-flash",
  "temperature": 0.0,
  "top_p": 0.95,
  "downsample_audio": false,
  "safety_settings": "OFF"
}


## Test Configuration Management

In [8]:
if success:
    # Test configuration validation
    test_configs = [
        ({"model": "gemini-2.0-flash"}, "Valid: switching to 2.0 flash"),
        ({"model": "invalid_model"}, "Invalid: bad model name"),
        ({"temperature": 0.7}, "Valid: adjusting temperature"),
        ({"temperature": 3.0}, "Invalid: temperature out of range"),
        ({"downsample_audio": True, "downsample_rate": 16000}, "Valid: enable downsampling"),
    ]
    
    for config, description in test_configs:
        is_valid, error = manager.validate_plugin_config("gemini", config)
        print(f"{description}")
        print(f"  Config: {config}")
        print(f"  Valid: {is_valid}")
        if error:
            print(f"  Error: {error[:100]}...")
        print()

Valid: switching to 2.0 flash
  Config: {'model': 'gemini-2.0-flash'}
  Valid: True

Invalid: bad model name
  Config: {'model': 'invalid_model'}
  Valid: False
  Error: 'invalid_model' is not one of ['gemma-3n-e4b-it', 'gemma-3n-e2b-it', 'gemma-3-4b-it', 'gemma-3-27b-i...

Valid: adjusting temperature
  Config: {'temperature': 0.7}
  Valid: False
  Error: 'model' is a required property

Failed validating 'required' in schema:
    {'$schema': 'http://json...

Invalid: temperature out of range
  Config: {'temperature': 3.0}
  Valid: False
  Error: 'model' is a required property

Failed validating 'required' in schema:
    {'$schema': 'http://json...

Valid: enable downsampling
  Config: {'downsample_audio': True, 'downsample_rate': 16000}
  Valid: False
  Error: 'model' is a required property

Failed validating 'required' in schema:
    {'$schema': 'http://json...



In [9]:
if success:
    # Update configuration
    new_config = {
        "temperature": 0.3,
        "prompt": "Transcribe this audio accurately, including any technical terms.",
        "downsample_audio": True,
        "downsample_rate": 16000
    }
    
    update_success = manager.update_plugin_config("gemini", new_config, merge=True)
    print(f"Configuration updated: {update_success}")
    
    if update_success:
        updated_config = manager.get_plugin_config("gemini")
        print("\nUpdated configuration:")
        for key in new_config:
            print(f"  {key}: {updated_config[key]}")

cjm_transcription_plugin_gemini.plugin.GeminiPlugin - INFO - Cleanup completed
httpx - INFO - HTTP Request: GET https://generativelanguage.googleapis.com/v1beta/models "HTTP/1.1 200 OK"
httpx - INFO - HTTP Request: GET https://generativelanguage.googleapis.com/v1beta/models?pageToken=Ch5tb2RlbHMvaW1hZ2VuLTMuMC1nZW5lcmF0ZS0wMDI%3D "HTTP/1.1 200 OK"
cjm_transcription_plugin_gemini.plugin.GeminiPlugin - INFO - Found 36 audio-capable models
cjm_transcription_plugin_gemini.plugin.GeminiPlugin - INFO - Updated max_output_tokens to 65536 for model 'gemini-2.5-flash'
cjm_transcription_plugin_gemini.plugin.GeminiPlugin - INFO - Initialized Gemini plugin with model 'gemini-2.5-flash'
cjm_transcription_plugin_system.plugin_manager.PluginManager - INFO - Updated configuration for plugin: gemini


Configuration updated: True

Updated configuration:
  temperature: 0.3
  prompt: Transcribe this audio accurately, including any technical terms.
  downsample_audio: True
  downsample_rate: 16000


## Test Transcription

In [10]:
from nbdev.config import get_config
from pathlib import Path

config = get_config()
project_dir = config.config_path
test_dir = project_dir/"./test_files/"
audio_path = test_dir/"short_test_audio.mp3"
# audio_path = test_dir/"constitution_01_unitedstates_128kb.mp3"
assert audio_path.exists()

In [11]:
# Create test audio
def create_test_audio():
    """Create a simple test audio signal."""
    sample_rate = 16000
    duration = 2  # seconds
    t = np.linspace(0, duration, sample_rate * duration)
    
    # Create a simple tone
    frequency = 440  # A4 note
    audio = 0.3 * np.sin(2 * np.pi * frequency * t)
    
    # Add some variation
    audio += 0.1 * np.sin(2 * np.pi * frequency * 2 * t)
    audio += 0.05 * np.random.randn(len(t))  # Add noise
    
    return AudioData(
        samples=audio.astype(np.float32),
        sample_rate=sample_rate,
        duration=len(audio) / sample_rate,
        filepath=None,
        metadata={"description": "Test tone signal"}
    )

test_audio = create_test_audio()
print(f"Created test audio: {test_audio.duration:.2f} seconds at {test_audio.sample_rate} Hz")

Created test audio: 2.00 seconds at 16000 Hz


In [12]:
if success:
    # Test with synthetic audio (may not produce meaningful text)
    try:
        print("Testing with synthetic audio...")
        result = manager.execute_plugin("gemini", test_audio)
        print("Transcription result:")
        print(f"  Text: '{result.text[:200]}...'" if len(result.text) > 200 else f"  Text: '{result.text}'")
        print(f"  Metadata: {result.metadata}")
    except Exception as e:
        print(f"Note: Synthetic audio may not produce meaningful results")
        print(f"Error: {e}")

Testing with synthetic audio...


Downsampling:   0%|          | 0.0/2.0s [00:00<?]

cjm_transcription_plugin_gemini.plugin.GeminiPlugin - INFO - Downsampled audio to 16000Hz
cjm_transcription_plugin_gemini.plugin.GeminiPlugin - INFO - Transcribing with Gemini model: gemini-2.5-flash (max_tokens: 65536)
google_genai.models - INFO - AFC is enabled with max remote calls: 10.



Successfully downsampled audio.


httpx - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"
google_genai.models - INFO - AFC remote call 1 is done.
cjm_transcription_plugin_gemini.plugin.GeminiPlugin - INFO - Transcription completed: 1 words


Transcription result:
  Text: '[Tone]'
  Metadata: {'model': 'gemini-2.5-flash', 'temperature': 0.3, 'top_p': 0.95, 'max_output_tokens': 65536, 'prompt': 'Transcribe this audio accurately, including any technical terms.'}


In [13]:
if success:
    # Test with actual audio file if available
    if audio_path.exists():
        print(f"Transcribing: {audio_path}")
        
        # You can override config at execution time
        result = manager.execute_plugin(
            "gemini", 
            str(audio_path),
            temperature=0.0,  # Override for deterministic output
            prompt="Provide a detailed transcription of this audio."
        )
        
        print(f"\nTranscription (first 500 chars):")
        print(result.text[:500])
        print(f"\n...({len(result.text)} total characters)")
        print(f"\nModel used: {result.metadata.get('model')}")
    else:
        print(f"Sample audio file not found: {audio_path}")
        print("Place an audio file at this path to test real transcription.")

Transcribing: /mnt/SN850X_8TB_EXT4/Projects/GitHub/cj-mills/cjm-transcription-plugin-gemini/test_files/short_test_audio.mp3


Downsampling:   0%|          | 0.0/28.0s [00:00<?]

cjm_transcription_plugin_gemini.plugin.GeminiPlugin - INFO - Downsampled audio to 16000Hz
cjm_transcription_plugin_gemini.plugin.GeminiPlugin - INFO - Transcribing with Gemini model: gemini-2.5-flash (max_tokens: 65536)
google_genai.models - INFO - AFC is enabled with max remote calls: 10.



Successfully downsampled audio.


httpx - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"
google_genai.models - INFO - AFC remote call 1 is done.
cjm_transcription_plugin_gemini.plugin.GeminiPlugin - INFO - Transcription completed: 43 words



Transcription (first 500 chars):
November the 10th, Wednesday, 9 p.m.

I'm standing in a dark alley.

After waiting several hours, the time has come.

A woman with long dark hair approaches.
I have to act, and fast, before she realizes what has happened.
I must find out.

...(238 total characters)

Model used: gemini-2.5-flash


## Test Downsampling Feature

In [14]:
if success:
    # Test with downsampling enabled
    print("Testing downsampling feature...")
    
    # Update config to enable downsampling
    downsample_config = {
        "downsample_audio": True,
        "downsample_rate": 8000,  # Very low sample rate
        "downsample_channels": 1  # Mono
    }
    
    manager.update_plugin_config("gemini", downsample_config, merge=True)
    
    # Create higher quality audio
    hq_audio = AudioData(
        samples=np.random.randn(44100 * 2).astype(np.float32) * 0.1,  # 2 seconds at 44.1kHz
        sample_rate=44100,
        duration=2.0,
        filepath=None,
        metadata={"description": "High quality test audio"}
    )
    
    print(f"Original audio: {hq_audio.sample_rate} Hz")
    print(f"Will downsample to: {downsample_config['downsample_rate']} Hz")
    
    try:
        result = manager.execute_plugin("gemini", hq_audio)
        print("Downsampling and transcription successful!")
    except Exception as e:
        print(f"Note: Downsampling requires ffmpeg to be installed")
        print(f"Error: {e}")

cjm_transcription_plugin_gemini.plugin.GeminiPlugin - INFO - Cleanup completed
httpx - INFO - HTTP Request: GET https://generativelanguage.googleapis.com/v1beta/models "HTTP/1.1 200 OK"


Testing downsampling feature...


httpx - INFO - HTTP Request: GET https://generativelanguage.googleapis.com/v1beta/models?pageToken=Ch5tb2RlbHMvaW1hZ2VuLTMuMC1nZW5lcmF0ZS0wMDI%3D "HTTP/1.1 200 OK"
cjm_transcription_plugin_gemini.plugin.GeminiPlugin - INFO - Found 36 audio-capable models
cjm_transcription_plugin_gemini.plugin.GeminiPlugin - INFO - Updated max_output_tokens to 65536 for model 'gemini-2.5-flash'
cjm_transcription_plugin_gemini.plugin.GeminiPlugin - INFO - Initialized Gemini plugin with model 'gemini-2.5-flash'
cjm_transcription_plugin_system.plugin_manager.PluginManager - INFO - Updated configuration for plugin: gemini


Original audio: 44100 Hz
Will downsample to: 8000 Hz


Downsampling:   0%|          | 0.0/2.0s [00:00<?]

cjm_transcription_plugin_gemini.plugin.GeminiPlugin - INFO - Downsampled audio to 8000Hz
cjm_transcription_plugin_gemini.plugin.GeminiPlugin - INFO - Transcribing with Gemini model: gemini-2.5-flash (max_tokens: 65536)
google_genai.models - INFO - AFC is enabled with max remote calls: 10.



Successfully downsampled audio.


httpx - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"
google_genai.models - INFO - AFC remote call 1 is done.
cjm_transcription_plugin_gemini.plugin.GeminiPlugin - INFO - Transcription completed: 1 words


Downsampling and transcription successful!


## Test Entry Point Discovery

In [15]:
# This will work after installing the package
print("Testing entry point discovery:")
manager2 = PluginManager()

# Discover plugins via entry points
discovered = manager2.discover_plugins()
print(f"\nDiscovered {len(discovered)} plugin(s) via entry points:")
for plugin_meta in discovered:
    print(f"  - {plugin_meta.name} v{plugin_meta.version} from {plugin_meta.package_name}")

# Load discovered Gemini plugin
for plugin_meta in discovered:
    if plugin_meta.name == "gemini":
        if os.environ.get("GEMINI_API_KEY"):
            success = manager2.load_plugin(plugin_meta, config={"model": "gemini-2.5-flash"})
            print(f"\nLoaded {plugin_meta.name}: {success}")
        else:
            print(f"\nSkipping {plugin_meta.name} - API key not set")

cjm_transcription_plugin_system.plugin_manager.PluginManager - INFO - Discovered plugin: gemini v0.0.1 from package cjm-transcription-plugin-gemini
httpx - INFO - HTTP Request: GET https://generativelanguage.googleapis.com/v1beta/models "HTTP/1.1 200 OK"


Testing entry point discovery:

Discovered 1 plugin(s) via entry points:
  - gemini v0.0.1 from cjm-transcription-plugin-gemini


httpx - INFO - HTTP Request: GET https://generativelanguage.googleapis.com/v1beta/models?pageToken=Ch5tb2RlbHMvaW1hZ2VuLTMuMC1nZW5lcmF0ZS0wMDI%3D "HTTP/1.1 200 OK"
cjm_transcription_plugin_gemini.plugin.GeminiPlugin - INFO - Found 36 audio-capable models
cjm_transcription_plugin_gemini.plugin.GeminiPlugin - INFO - Updated max_output_tokens to 65536 for model 'gemini-2.5-flash'
cjm_transcription_plugin_gemini.plugin.GeminiPlugin - INFO - Initialized Gemini plugin with model 'gemini-2.5-flash'
cjm_transcription_plugin_system.plugin_manager.PluginManager - INFO - Loaded plugin: gemini



Loaded gemini: True


## Cleanup

In [16]:
if success:
    # Clean up
    print("Cleaning up...")
    manager.unload_plugin("gemini")
    print(f"Plugins remaining: {len(manager.list_plugins())}")

cjm_transcription_plugin_gemini.plugin.GeminiPlugin - INFO - Cleanup completed
cjm_transcription_plugin_system.plugin_manager.PluginManager - INFO - Unloaded plugin: gemini


Cleaning up...
Plugins remaining: 0
