# cjm-transcription-plugin-gemini

> Google Gemini API plugin for the cjm-transcription-plugin-system library - provides speech-to-text transcription with configurable model selection and parameter control.

## Install

```bash
pip install cjm_transcription_plugin_gemini
```

## Project Structure

```
nbs/
└── plugin.ipynb # Plugin implementation for Google Gemini API transcription
```

Total: 1 notebook across 1 directory

## Module Dependencies

```mermaid
graph LR
    plugin[plugin<br/>Gemini Plugin]

```

No cross-module dependencies detected.

## CLI Reference

No CLI commands found in this project.

## Module Overview

Detailed documentation for each module in the project:

### Gemini Plugin (`plugin.ipynb`)
> Plugin implementation for Google Gemini API transcription

#### Import

```python
from cjm_transcription_plugin_gemini.plugin import (
    GeminiPluginConfig,
    GeminiPlugin
)
```

#### Functions

```python
@patch
def _get_api_key(
    self:GeminiPlugin
) -> str:  # The API key string
    "Get API key from config or environment."
```

```python
@patch
def _refresh_available_models(
    self:GeminiPlugin
) -> List[str]:  # List of available model names
    "Fetch and filter available models from Gemini API."
```

```python
@patch
def _update_max_tokens_for_model(
    self:GeminiPlugin,
    model_name: str  # Model name to update tokens for
) -> None
    "Update max_output_tokens config based on the model's token limit."
```

```python
@patch
def update_config(
    self:GeminiPlugin,
    config: Union[Dict[str, Any], GeminiPluginConfig]  # New configuration values
) -> None
    "Update plugin configuration, adjusting max_tokens if model changes."
```

```python
@patch
def _prepare_audio(
    self:GeminiPlugin,
    audio: Union[AudioData, str, Path]  # Audio data object or path to audio file
) -> Tuple[Path, bool]:  # Tuple of (processed audio path, whether temp file was created)
    "Prepare audio file for upload."
```

```python
@patch
def _upload_audio_file(
    self:GeminiPlugin,
    audio_path: Path  # Path to audio file to upload
) -> Any:  # Uploaded file object
    "Upload audio file to Gemini API."
```

```python
@patch
def _delete_uploaded_file(
    self:GeminiPlugin,
    file_name: str  # Name of file to delete
) -> None
    "Delete an uploaded file from Gemini API."
```

```python
@patch
def cleanup(
    self:GeminiPlugin
) -> None
    "Clean up resources."
```

```python
@patch
def get_available_models(
    self:GeminiPlugin
) -> List[str]:  # List of available model names
    "Get list of available audio-capable models."
```

```python
@patch
def get_model_info(
    self:GeminiPlugin,
    model_name: Optional[str] = None  # Model name to get info for, defaults to current model
) -> Dict[str, Any]:  # Dict with model information
    "Get information about a specific model including token limits."
```

```python
@patch
def supports_streaming(
    self:GeminiPlugin
) -> bool:  # True if streaming is supported
    "Check if this plugin supports streaming transcription."
```

```python
@patch
def execute_stream(
    self:GeminiPlugin,
    audio: Union[AudioData, str, Path],  # Audio data object or path to audio file
    **kwargs  # Additional arguments to override config
) -> Generator[str, None, TranscriptionResult]:  # Yields text chunks, returns final result
    "Stream transcription results chunk by chunk."
```

#### Classes

```python
@dataclass
class GeminiPluginConfig:
    "Configuration for Gemini transcription plugin."
    
    model: str = field(...)
    api_key: Optional[str] = field(...)
    prompt: str = field(...)
    temperature: float = field(...)
    top_p: float = field(...)
    max_output_tokens: int = field(...)
    seed: Optional[int] = field(...)
    response_mime_type: str = field(...)
    downsample_audio: bool = field(...)
    downsample_rate: int = field(...)
    downsample_channels: int = field(...)
    safety_settings: str = field(...)
    auto_refresh_models: bool = field(...)
    model_filter: List[str] = field(...)
    use_file_upload: bool = field(...)
    use_streaming: bool = field(...)
    delete_uploaded_files: bool = field(...)
```

```python
class GeminiPlugin:
    def __init__(self):
        """Initialize the Gemini plugin with default configuration."""
        self.logger = logging.getLogger(f"{__name__}.{type(self).__name__}")
        self.config: GeminiPluginConfig = None
    "Google Gemini API transcription plugin."
    
    def __init__(self):
            """Initialize the Gemini plugin with default configuration."""
            self.logger = logging.getLogger(f"{__name__}.{type(self).__name__}")
            self.config: GeminiPluginConfig = None
        "Initialize the Gemini plugin with default configuration."
    
    def name(
            self
        ) -> str:  # Plugin name identifier
        "Return the plugin name identifier."
    
    def version(
            self
        ) -> str:  # Plugin version string
        "Return the plugin version string."
    
    def supported_formats(
            self
        ) -> List[str]:  # List of supported audio formats
        "Return list of supported audio file formats."
    
    def get_current_config(
            self
        ) -> GeminiPluginConfig:  # Current configuration dataclass
        "Return current configuration."
    
    def get_config_dataclass() -> GeminiPluginConfig: # Configuration dataclass
            """Return dataclass describing the plugin's configuration options."""
            return GeminiPluginConfig
        
        def initialize(
            self,
            config: Optional[Any] = None  # Configuration dataclass, dict, or None
        ) -> None
        "Return dataclass describing the plugin's configuration options."
    
    def initialize(
            self,
            config: Optional[Any] = None  # Configuration dataclass, dict, or None
        ) -> None
        "Initialize the plugin with configuration."
    
    def execute(
            self,
            audio: Union[AudioData, str, Path],  # Audio data object or path to audio file
            **kwargs  # Additional arguments to override config
        ) -> TranscriptionResult:  # Transcription result object
        "Transcribe audio using Gemini."
    
    def is_available(
            self
        ) -> bool:  # True if the Gemini API is available
        "Check if Gemini API is available."
```
