# Result Storage

> File-based storage for transcription results

In [None]:
#| default_exp storage.file_storage

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import json
from pathlib import Path
from datetime import datetime
from typing import Dict, Any, Optional, List
from fastcore.basics import patch

from cjm_fasthtml_workflow_transcription_single_file.storage.config import StorageConfig

## ResultStorage Class

Handles saving and loading transcription results to/from JSON files. Organizes results by date with metadata for easy retrieval.

In [None]:
#| export
class ResultStorage:
    """File-based storage for transcription results."""

    def __init__(self,
                 config: StorageConfig  # Storage configuration
                 ):
        """Initialize the storage."""
        self.config = config
        self._results_dir: Optional[Path] = None

    @property
    def results_directory(self) -> Path:  # Path to the results directory
        """Get the results directory, creating it if needed."""
        if self._results_dir is None:
            self._results_dir = Path(self.config.results_directory)
            self._results_dir.mkdir(exist_ok=True, parents=True)
        return self._results_dir

In [None]:
#| export
@patch
def should_auto_save(
    self: ResultStorage
) -> bool:  # True if results should be automatically saved
    """Check if auto-save is enabled."""
    return self.config.auto_save

In [None]:
#| export
@patch
def save(
    self: ResultStorage,
    job_id: str,  # Unique job identifier
    file_path: str,  # Path to the transcribed media file
    file_name: str,  # Name of the media file
    plugin_id: str,  # Plugin unique identifier
    plugin_name: str,  # Plugin display name
    text: str,  # The transcription text
    metadata: Optional[Dict[str, Any]] = None,  # Optional metadata from the transcription plugin
    additional_info: Optional[Dict[str, Any]] = None  # Optional additional information to store
) -> Path:  # Path to the saved JSON file
    """Save a transcription result to JSON file."""
    # Create result data structure
    result_data = {
        "job_id": job_id,
        "timestamp": datetime.now().isoformat(),
        "file": {
            "path": file_path,
            "name": file_name
        },
        "plugin": {
            "id": plugin_id,
            "name": plugin_name
        },
        "transcription": {
            "text": text,
            "word_count": len(text.split()),
            "char_count": len(text)
        },
        "metadata": metadata or {},
        "additional_info": additional_info or {}
    }

    # Generate filename and save
    filename = self._generate_filename(job_id, file_name)
    result_path = self.results_directory / filename

    with open(result_path, 'w', encoding='utf-8') as f:
        json.dump(result_data, f, indent=2, ensure_ascii=False)

    return result_path

In [None]:
#| export
@patch
def load(
    self: ResultStorage,
    result_file: Path  # Path to the JSON result file
) -> Optional[Dict[str, Any]]:  # Dictionary containing the result data, or None if error
    """Load a transcription result from JSON file."""
    try:
        with open(result_file, 'r', encoding='utf-8') as f:
            return json.load(f)
    except Exception as e:
        print(f"[ResultStorage] Error loading result from {result_file}: {e}")
        return None

In [None]:
#| export
@patch
def list_results(
    self: ResultStorage,
    sort_by: str = "timestamp",  # Field to sort by ("timestamp", "file_name", "word_count")
    reverse: bool = True  # Sort in reverse order (newest first by default)
) -> List[Dict[str, Any]]:  # List of result dictionaries
    """List all saved transcription results."""
    results = []

    # Load all JSON files in the results directory
    for result_file in self.results_directory.glob("*.json"):
        result = self.load(result_file)
        if result:
            # Add filename for reference
            result["result_file"] = str(result_file)
            result["result_filename"] = result_file.name
            results.append(result)

    # Sort results
    sort_key_map = {
        "timestamp": lambda x: x.get("timestamp", ""),
        "file_name": lambda x: x.get("file", {}).get("name", ""),
        "word_count": lambda x: x.get("transcription", {}).get("word_count", 0)
    }

    if sort_by in sort_key_map:
        results.sort(key=sort_key_map[sort_by], reverse=reverse)

    return results

In [None]:
#| export
@patch
def get_by_job_id(
    self: ResultStorage,
    job_id: str  # The job identifier to search for
) -> Optional[Dict[str, Any]]:  # Result dictionary if found, None otherwise
    """Find and load a transcription result by job ID."""
    results = self.list_results()

    for result in results:
        if result.get("job_id") == job_id:
            return result

    return None

In [None]:
#| export
@patch
def delete(
    self: ResultStorage,
    result_file: str  # Path to the result file (can be full path or filename)
) -> bool:  # True if deletion successful, False otherwise
    """Delete a transcription result file."""
    try:
        file_path = Path(result_file)

        # If only filename provided, look in results directory
        if not file_path.is_absolute():
            file_path = self.results_directory / file_path

        if file_path.exists():
            file_path.unlink()
            return True
        return False

    except Exception as e:
        print(f"[ResultStorage] Error deleting result file {result_file}: {e}")
        return False

In [None]:
#| export
@patch
def update_text(
    self: ResultStorage,
    result_file: str,  # Path to the result file
    new_text: str  # New transcription text
) -> bool:  # True if update successful, False otherwise
    """Update the transcription text in a saved result."""
    try:
        file_path = Path(result_file)

        # If only filename provided, look in results directory
        if not file_path.is_absolute():
            file_path = self.results_directory / file_path

        # Load existing result
        result = self.load(file_path)
        if not result:
            return False

        # Update transcription data
        result["transcription"]["text"] = new_text
        result["transcription"]["word_count"] = len(new_text.split())
        result["transcription"]["char_count"] = len(new_text)
        result["additional_info"]["last_edited"] = datetime.now().isoformat()

        # Save updated result
        with open(file_path, 'w', encoding='utf-8') as f:
            json.dump(result, f, indent=2, ensure_ascii=False)

        return True

    except Exception as e:
        print(f"[ResultStorage] Error updating result file {result_file}: {e}")
        return False

In [None]:
#| export
@patch
def _generate_filename(
    self: ResultStorage,
    job_id: str,  # Unique job identifier
    file_name: str  # Original media file name
) -> str:  # Generated filename for the JSON result file
    """Generate a filename for storing transcription results."""
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    # Sanitize file_name for use in filename
    safe_name = Path(file_name).stem.replace(" ", "_")[:50]
    return f"{timestamp}_{job_id[:8]}_{safe_name}.json"

## Usage Examples

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()