In [1]:
from pathlib import Path
import argparse
import os
import sys
import logging
import logging_config
from logging_config import setup_logging, get_child_logger
from video_processing import (
    get_youtube_urls_from_csv, 
    download_audio_yt,
    detect_boundaries, 
    split_audio_at_boundaries,
    split_audio_on_silence, 
    process_audio_chunks
)

In [3]:
setup_logging(log_filename="transcription_process_stepwise.log")
logger = get_child_logger("yt_transcribe")

In [5]:
# Parameter settings
output_base_dir = Path("./processed_videos/video_transcriptions")
start_time = None
url = None
prompt = "Dharma, Deer Park, Thay, Thich Nhat Hanh, Bodhicitta, Bodhisattva, Mahayana"
translate_flag = False
max_chunk_duration = 7 * 60 # 7 minutes
process_setting = "transcribe"

# Directory settings
video_name = "Taking Care of Our Fear ｜ Br. Phap Luu ｜ 2024-11-06"
if video_name:
    video_output_dir = output_base_dir / video_name
    video_output_dir.mkdir(parents=True, exist_ok=True)
    chunks_dir = video_output_dir / "chunks"
    chunks_dir.mkdir(parents=True, exist_ok=True)

In [7]:
# Step 1: Download audio
if process_setting == "download":
    logger.info("Downloading audio...")
    tmp_audio_file = download_audio_yt(url, output_base_dir, start_time=start_time)
    logger.info(f"Downloaded audio file: {tmp_audio_file}")

    video_name = tmp_audio_file.stem  # Use the stem of the audio file (title without extension)

    # Move the tmp audio file to the video directory
    audio_file = video_output_dir / tmp_audio_file.name

    try:
        tmp_audio_file.rename(audio_file)  
        logger.info(f"Moved audio file to: {audio_file}")
    except Exception as e:
        logger.error(f"Failed to move audio file to {video_output_dir}: {e}")
        # Ensure the code gracefully handles issues here, reassigning to the original tmp path.
        audio_file = tmp_audio_file

In [8]:
if process_setting == "split":
    # Step 2: Detect boundaries
    logger.info("Detecting boundaries...")
    boundaries = detect_boundaries(audio_file)
    logger.info(f"Boundaries generated.")

    # Step 3: Split audio into chunks
    logger.info("Splitting audio into chunks...")
    split_audio_at_boundaries(
        audio_file=audio_file,
        boundaries=boundaries, 
        output_dir=chunks_dir, 
        max_duration=max_chunk_duration
    )
    logger.info(f"Audio chunks saved to: {chunks_dir}")

In [9]:
# Step 4: Transcribe audio chunks
if process_setting == "transcribe":
    logger.info("Transcribing audio chunks...")
    transcript_file = video_output_dir / f"{video_name}.txt"
    jsonl_file = video_output_dir / f"{video_name}.jsonl"

    process_audio_chunks(
        directory=chunks_dir,
        output_file=transcript_file,
        jsonl_file=jsonl_file,
        prompt=prompt,
        translate=translate_flag
    )

    logger.info(f"Transcription completed for {url}")
    logger.info(f"Transcript saved to: {transcript_file}")
    logger.info(f"Raw transcription data saved to: {jsonl_file}")

2024-12-14 06:18:27,608 - tnh.yt_transcribe - [36mINFO[0m - Transcribing audio chunks...[0m
2024-12-14 06:18:27,610 - tnh.video_processing - [36mINFO[0m - Audio files found:
	['processed_videos/video_transcriptions/Taking Care of Our Fear ｜ Br. Phap Luu ｜ 2024-11-06/chunks/chunk_1.mp3', 'processed_videos/video_transcriptions/Taking Care of Our Fear ｜ Br. Phap Luu ｜ 2024-11-06/chunks/chunk_2.mp3'][0m
2024-12-14 06:18:27,611 - tnh.video_processing - [36mINFO[0m - Processing chunk_1.mp3...[0m
2024-12-14 06:18:27,612 - tnh.video_processing - [36mINFO[0m - Speech transcript parameters: file=processed_videos/video_transcriptions/Taking Care of Our Fear ｜ Br. Phap Luu ｜ 2024-11-06/chunks/chunk_1.mp3, model=whisper-1, response_format=verbose_json, mode=transcribe
	prompt='Dharma, Deer Park, Thay, Thich Nhat Hanh, Bodhicitta, Bodhisattva, Mahayana'[0m
2024-12-14 06:18:54,005 - tnh.video_processing - [36mINFO[0m - Processing chunk_2.mp3...[0m
2024-12-14 06:18:54,006 - tnh.video_pr