# core

> Core data structures for audio transcription

In [None]:
#| default_exp core

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from abc import ABC, abstractmethod
from typing import Optional, Dict, Any, List
from pathlib import Path
import numpy as np
from dataclasses import dataclass, field

In [None]:
#| export
@dataclass
class AudioData:
    """Container for audio data and metadata."""
    samples: np.ndarray  # Audio sample data as a numpy array
    sample_rate: int  # Sample rate in Hz (e.g., 16000, 44100)
    duration: float  # Duration of the audio in seconds
    filepath: Optional[Path] = None  # Audio file path
    metadata: Dict[str, Any] = field(default_factory=dict)  # Additional metadata

In [None]:
#| export
@dataclass
class TranscriptionResult:
    """Standardized transcription output."""
    text: str  # The transcribed text
    confidence: Optional[float] = None  # Overall confidence score (0.0 to 1.0)
    segments: Optional[List[Dict]] = field(default_factory=list)  # List of transcription segments with timestamps and text
    metadata: Optional[Dict] = field(default_factory=dict)  # Transcription metadata

## Testing the dataclasses

In [None]:
# Test AudioData
import numpy as np

audio = AudioData(
    samples=np.array([0.1, 0.2, 0.3]),
    sample_rate=16000,
    duration=1.5,
    filepath=Path("/tmp/test.wav")
)

print("AudioData instance:")
print(audio)
print(f"\nMetadata: {audio.metadata}")
audio.metadata['format'] = 'wav'
print(f"Updated metadata: {audio.metadata}")

AudioData instance:
AudioData(samples=array([0.1, 0.2, 0.3]), sample_rate=16000, duration=1.5, filepath=Path('/tmp/test.wav'), metadata={})

Metadata: {}
Updated metadata: {'format': 'wav'}


In [None]:
# Test TranscriptionResult
result = TranscriptionResult(
    text="Hello world",
    confidence=0.95,
    segments=[
        {"start": 0.0, "end": 0.5, "text": "Hello"},
        {"start": 0.5, "end": 1.0, "text": "world"}
    ]
)

print("TranscriptionResult instance:")
print(result)
print(f"\nText: {result.text}")
print(f"Confidence: {result.confidence}")
print(f"Segments: {result.segments}")
print(f"Metadata: {result.metadata}")

TranscriptionResult instance:
TranscriptionResult(text='Hello world', confidence=0.95, segments=[{'start': 0.0, 'end': 0.5, 'text': 'Hello'}, {'start': 0.5, 'end': 1.0, 'text': 'world'}], metadata={})

Text: Hello world
Confidence: 0.95
Segments: [{'start': 0.0, 'end': 0.5, 'text': 'Hello'}, {'start': 0.5, 'end': 1.0, 'text': 'world'}]
Metadata: {}


In [None]:
# Test default values
result_minimal = TranscriptionResult(text="Just text")
print("\nMinimal TranscriptionResult:")
print(f"Text: {result_minimal.text}")
print(f"Confidence: {result_minimal.confidence}")
print(f"Segments: {result_minimal.segments}")
print(f"Metadata: {result_minimal.metadata}")

# Test equality (automatic with dataclass)
result_copy = TranscriptionResult(text="Just text")
print(f"\nEquality test: {result_minimal == result_copy}")


Minimal TranscriptionResult:
Text: Just text
Confidence: None
Segments: []
Metadata: {}

Equality test: True


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()