In [1]:
from youtube_transcript_api import YouTubeTranscriptApi, Transcript
from pprint import pprint
import ffmpeg
import json
import yt_dlp

In [None]:
YT_ID_OLA_CIENCIA_01 = 'hVNEKpSZ_dE'
YT_ID_HNAC_01 = '6ERinp7x3kc'

In [None]:
def show_video_transcript_obj(transcript: Transcript, show_dirs=False, show_vars=False) -> None:
    """
    Display information about a YouTube transcript object.

    Args:
        transcript (Transcript): The transcript object to display information about.
        show_dirs (bool, optional): Whether to display the transcript object's methods and attributes. Defaults to False.
        show_vars (bool, optional): Whether to display the transcript object's variables and their values. Defaults to False.

    Returns:
        None
    """
    
    display_str = '\n\t'.join([
        'Transcript:',
        f'{transcript.video_id=}',
        f'{transcript.is_generated=}',
        f'{transcript.language=}',
        f'{transcript.language_code=}',
        f'translation_languages: {len(transcript.translation_languages)}',
    ])

    print(display_str)

    if show_dirs:
        display(transcript.__dir__())
    if show_vars:
        display(vars(transcript))


def show_video_metadata(metadata: dict, show_everything=False) -> None:

    # Show all (the many) available information
    if show_everything:
        pprint(metadata)
        return

    # Show only main information
    metadata = {
        'id': metadata.get('id'),
        'title': metadata.get('title'),
        'url': metadata.get('url'),
        'duration': metadata.get('duration'),
        'view_count': metadata.get('view_count'),
        'description': metadata.get('description'),
        'upload_date': metadata.get('upload_date'),
        'channel': metadata.get('channel'),
        'channel_id': metadata.get('channel_id'),
        'likes': metadata.get('like_count'),
        # 'thumbnail': metadata.get('thumbnail'),
    }

    display(metadata)
    

In [None]:
def get_yt_video_info(video_id_or_url: str, skip_download=True, display_metadata=False, is_quiet=True) -> None:
    try:

        # Set options for YouTube downloader
        ydl_opts = {
            
            'quiet': is_quiet,
            'no_warnings': False,
            'windowsfilenames': True,
            'consoletitle': True,       # Display progress in the console window's titlebar.
            
            'skip_download': skip_download,  # Skip the actual download of the video file
            'simulate': True,       # Don't download video files. If unset (or None) -> Simulate only if listsubtitles, listformats or list_thumbnails is used
            
            'keepvideo': True,          # Keep the video file after post-processing
            'noplaylist': True,         # Download single video instead of a playlist if in doubt.
            # 'getcomments': False,        # Will not be written to disk unless writeinfojson is also given
            # 'daterange': _,         # A utils.DateRange object, download only if the upload_date is in the range.
            
            # 'writedescription': True,   # Write the video description to a .description file
            'forcejson': True,  # Force printing info_dict as JSON.
            'writeinfojson': True,      # Write the video description to a .info.json file
            # 'clean_infojson': _,        # Remove internal metadata from the infojson
            # 'paths': [], # allowed keys are 'home', 'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py)
            # 'cachedir': _,      # Location of the cache files in the filesystem. False to disable filesystem cache.
            # 'logger': _ # Log messages to a logging.Logger instance.': True,
            
            # 'cookiefile': 'cookies.txt',
            # 'cookiesfrombrowser': (_, _, _),    # (tuple) name of the browser + profile name/path from where cookies are loaded + the name of the keyring

            # Subtitle parameters
            # 'writeautomaticsub': _,     # Write the automatically generated subtitles to a file
            # 'listsubtitles': _,         # Lists all available subtitles for the video
            # 'subtitlesformat': _,       # The format code for subtitles
            # 'subtitleslangs': _,        # List of subtitles languages to download (can be regex); May contain "all"; To exclude: Add prefix "-" (e.g.: ['all', '-live_chat'])

        }

        # Run search
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            
            metadata = ydl.extract_info(video_id_or_url, download=False)
            if display_metadata:
                show_video_metadata(metadata=metadata, show_everything=False)
            return metadata
    
    except Exception as e:
        return f"Error extracting metadata: {str(e)}"
    
# pumba = get_yt_video_info('https://www.youtube.com/watch?v=hVNEKpSZ_dE')
# pumba = get_yt_video_info(video_id_or_url=YT_ID_OLA_CIENCIA_01)
pumba = get_yt_video_info(video_id_or_url=YT_ID_HNAC_01)
    
    

In [8]:
def get_transcript(video_id: str, show_info=False) -> None:

    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id=video_id, cookies=None)
        
        for transcript in transcript_list:

            if show_info:
                show_video_transcript_obj(transcript=transcript, show_dirs=True, show_vars=True)
            
            # transcript_text = transcript.fetch()
            # open('transcript.json', 'w').write(json.dumps(transcript_text, indent=4))

    except Exception as e:
        print('fu')
        raise e


# asdfpoia = get_transcript(video_id=YT_ID_OLA_CIENCIA_01, show_info=False)
asdfpoia = get_transcript(video_id=YT_ID_HNAC_01, show_info=True)

fu


TranscriptsDisabled: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=6ERinp7x3kc! This is most likely caused by:

Subtitles are disabled for this video

If you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https://github.com/jdepoix/youtube-transcript-api/issues. Please add which version of youtube_transcript_api you are using and provide the information needed to replicate the error. Also make sure that there are no open issues which already describe your problem!