In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# Import necessary modules
from pathlib import Path

import pandas as pd
from dotenv import load_dotenv
from IPython.core.display import HTML
from IPython.display import display

# Import the transcription service modules
from tnh_scholar.audio_processing.transcription_service import (
    DiarizationChunker,
    TranscriptionFormatConverter,
    TranscriptionServiceFactory,
)

# Load environment variables from .env file
load_dotenv()


In [None]:
import logging

from tnh_scholar.logging_config import setup_logging

logger = setup_logging(log_level=logging.DEBUG)

In [None]:
# Replace with your audio file path
working_dir = Path.home() / "Desktop" / "transcription_wouter"
audio_file = working_dir / "qa_sr_abbess_wh_sh.mp3"
if not audio_file.exists:
    raise FileNotFoundError("Not found.")

In [None]:
def run_transcription(audio_file_path, provider="whisper", options=None):
    """
    Transcription with the specified provider.
    
    Args:
        audio_file_path: Path to the audio file
        provider: 'whisper' or 'assemblyai'
    
    Returns:
        Transcription result
    """
    if options is None:
        options = {}
    # Create the transcription service
    service = TranscriptionServiceFactory.create_service(provider=provider)

    # Print some info
    print(f"Testing transcription with {provider} service...")
    print(f"Audio file: {audio_file_path}")

    return service.transcribe(audio_file_path, options)

In [None]:
def gen_srt(audio_file_obj, provider="whisper", language=None, local_convert=False):
    """
    generate srt
    """
    format_type = "srt"
    # Create the transcription service
    service = TranscriptionServiceFactory.create_service(provider=provider)

    # Print some info
    print(f"Running {format_type.upper()} generation with {provider} service...")
    print(f"Audio file: {audio_file_obj}")

    transcription_options = {"language": language} if language else None
    
    # Generate the formatted transcription
    # use the local format converter if specified
    if local_convert:
        converter = TranscriptionFormatConverter()
        transcript = service.transcribe(audio_file_obj, options=transcription_options)
        return converter.convert(transcript)
        
    return service.transcribe_to_format(
        audio_file_obj, 
        format_type=format_type,
        transcription_options=transcription_options
    )
    

In [None]:
# Test Whisper transcription
options = {
    "language": "vi",
    "timestamp_granularities": ["word"] 
           }
whisper_result = run_transcription(audio_file, provider="whisper", options=options)



In [None]:
[utter['text'] for  utter in whisper_result['utterances']]

In [None]:
list(whisper_result["words"])

In [None]:
# Examine specific parts of the result
print("\nExamining detailed result:")
print(f"Language: {whisper_result['language']}")
print(f"Audio duration: {whisper_result.get('audio_duration_ms', 'Not available')}")
print(f"Word count: {len(whisper_result.get('words', []))}")

In [None]:
# Test 'hand' format conversion to SRT
converter = TranscriptionFormatConverter()
srt_conv = converter.convert(whisper_result)

In [None]:
print(srt_conv)

In [None]:
# Test SRT generation with Whisper
whisper_srt = gen_format(audio_file, provider="whisper", format_type="srt")


In [None]:
print(whisper_srt)

In [None]:
from tnh_scholar.utils.file_utils import write_str_to_file

out_srt = working_dir / "Dharma Talk Br. Phap Hoi (for transcription) 2-bit.srt"
write_str_to_file(out_srt, whisper_srt)