In [11]:
import json
import subprocess
from pathlib import Path
from typing import Optional

import yt_dlp

from tnh_scholar.utils.file_utils import ensure_directory_exists


In [6]:


def download_video(url: str, output_path: Path, audio_only: bool = False, format: Optional[str] = None) -> None:
    """
    Download the raw best quality audio or video from the given URL using yt-dlp.

    Args:
        url (str): The video URL.
        output_path (str): Path to save the downloaded file.
        audio_only (bool): If True, download best audio only.
        format (Optional[str]): Custom format string for yt-dlp.
    """
    ydl_opts = {
        'outtmpl': str(output_path),
        'format': format if format else ('bestaudio/best' if audio_only else 'bestvideo+bestaudio/best'),
        'quiet': False
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])

# Example usage:
# download_video('https://www.youtube.com/watch?v=example', '/path/to/save/audio.m4a')

In [None]:
BASE_DIR = Path.home() / "Desktop" / "tmp_video_downloads" / "sr_bamboo_1"

ensure_directory_exists(BASE_DIR)

url = "https://www.youtube.com/watch?v=yU-wfQuHzqw&t=7s&ab_channel=L%C3%A0ngMaiOfficial"
output_path = BASE_DIR 


In [8]:
download_video(url=url, output_path=output_path)

[youtube] Extracting URL: https://www.youtube.com/watch?v=yU-wfQuHzqw&t=7s&ab_channel=L%C3%A0ngMaiOfficial
[youtube] yU-wfQuHzqw: Downloading webpage
[youtube] yU-wfQuHzqw: Downloading tv client config
[youtube] yU-wfQuHzqw: Downloading tv player API JSON
[youtube] yU-wfQuHzqw: Downloading ios player API JSON
[youtube] yU-wfQuHzqw: Downloading m3u8 information
[info] yU-wfQuHzqw: Downloading 1 format(s): 137+251
[download] Destination: /Users/phapman/tmp_video_downloads.f137.mp4
[download] 100% of  951.17MiB in 00:02:07 at 7.48MiB/s      
[download] Destination: /Users/phapman/tmp_video_downloads.f251.webm
[download] 100% of   53.12MiB in 00:00:05 at 9.49MiB/s     
[Merger] Merging formats into "/Users/phapman/tmp_video_downloads.mkv"
Deleting original file /Users/phapman/tmp_video_downloads.f251.webm (pass -k to keep)
Deleting original file /Users/phapman/tmp_video_downloads.f137.mp4 (pass -k to keep)


In [12]:
def get_media_info(file_path: str) -> dict:
    """
    Get detailed media info (codecs, streams, etc.) using ffprobe.

    Args:
        file_path (str): Path to the media file.

    Returns:
        dict: Parsed ffprobe output.
    """
    cmd = [
        "ffprobe",
        "-v", "error",
        "-show_entries", "stream",
        "-print_format", "json",
        file_path
    ]
    result = subprocess.run(cmd, capture_output=True, text=True)
    return json.loads(result.stdout)

# Example usage:
# info = get_media_info("/Users/phapman/Desktop/tmp_video_downloads/sr_bamboo_1.mkv")
# for stream in info.get('streams', []):
#     if stream.get('codec_type') == 'audio':
#         print(stream)

In [13]:
get_media_info("/Users/phapman/tmp_video_downloads.mkv")

{'programs': [],
 'stream_groups': [],
 'streams': [{'index': 0,
   'codec_name': 'h264',
   'codec_long_name': 'H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10',
   'profile': 'High',
   'codec_type': 'video',
   'codec_tag_string': '[0][0][0][0]',
   'codec_tag': '0x0000',
   'width': 1920,
   'height': 1080,
   'coded_width': 1920,
   'coded_height': 1080,
   'closed_captions': 0,
   'film_grain': 0,
   'has_b_frames': 1,
   'sample_aspect_ratio': '1:1',
   'display_aspect_ratio': '16:9',
   'pix_fmt': 'yuv420p',
   'level': 40,
   'color_range': 'tv',
   'color_space': 'bt709',
   'color_transfer': 'bt709',
   'color_primaries': 'bt709',
   'chroma_location': 'left',
   'field_order': 'progressive',
   'refs': 1,
   'is_avc': 'true',
   'nal_length_size': '4',
   'r_frame_rate': '30/1',
   'avg_frame_rate': '30/1',
   'time_base': '1/1000',
   'start_pts': 0,
   'start_time': '0.000000',
   'bits_per_raw_sample': '8',
   'extradata_size': 45,
   'disposition': {'default': 1,
    'dub': 0