In [15]:
!pip install -q youtube-transcript-api yt-dlp git+https://github.com/openai/whisper.git

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [16]:
import os
import subprocess
import whisper
from youtube_transcript_api import YouTubeTranscriptApi
import transformers
import warnings
from contextlib import redirect_stderr
import sys

# Suppress warnings from transformers
transformers.logging.set_verbosity_error()

# Suppress all warnings globally
warnings.filterwarnings("ignore")

In [31]:
class VideoToTranscript:

    @classmethod
    def run(cls, video_url):
        """
        Simple one-line method to process a video URL and get its transcript.
        Usage: transcript = VideoToTranscript.run("https://youtube.com/watch?v=...")
        """
        return cls(video_url).get_transcript()

    def __init__(self, video_url):
        self.video_url = video_url
        self.video_id = video_url.split("watch?v=")[-1]

    def clear_previous_files(self):
        output_audio_file = "output_audio.mp3"
        if os.path.exists(output_audio_file):
            os.remove(output_audio_file)
            #print(f"New Task Started...")

    def format_long_text(text, max_line_length=50):
      """
      Formats long text by adding new lines after a specific number of characters
      to ensure readability.
      :param text: The text to format
      :param max_line_length: Maximum length of a line before adding a newline
      :return: Formatted text with new lines
      """
      words = text.split(' ')
      formatted_text = []
      current_line = ""

      for word in words:
          # If adding the word exceeds max line length, start a new line
          if len(current_line) + len(word) + 1 > max_line_length:
              formatted_text.append(current_line)
              current_line = word
          else:
              if current_line:
                  current_line += " " + word
              else:
                  current_line = word

      # Append the last line
      if current_line:
          formatted_text.append(current_line)

      # Join all the lines with newline characters
      return "\n".join(formatted_text)

    def get_transcript(self):
        try:
            # Suppress any stderr output temporarily
            with open(os.devnull, "w") as fnull, redirect_stderr(fnull):
                # Fetch the transcript from YouTube
                transcript = YouTubeTranscriptApi.get_transcript(self.video_id)

            # Combine text from transcript
            transcript_text = "\n".join([item['text'] for item in transcript])

            print("Transcript fetched successfully from YouTube API:")
            print(transcript_text)
            return transcript_text
        except Exception:
            # No error message will be printed
            print("Failed to fetch transcript from YouTube API.")
            print('\n----------------------------------------------')
            return self.download_and_transcribe()

    def download_and_transcribe(self):

        # Prompt
        print("Videos without built-in transcript may take 2-3 minutes to generate transcript...")
        print('\n----------------------------------------------')
        # Define the output file name for the audio
        output_audio_file = "output_audio.mp3"

        # Command to download audio and save it as MP3
        command = [
            "yt-dlp",
            "-f", "bestaudio",         # Best audio format
            "--extract-audio",         # Extract audio only
            "--audio-format", "mp3",   # Convert to MP3
            "-o", output_audio_file,   # Output file path
            self.video_url
        ]

        # Run the command to download audio
        try:
            with open(os.devnull, "w") as fnull, redirect_stderr(fnull):
                subprocess.run(command, check=True, stdout=fnull)

            # Check if the MP3 file exists
            if os.path.exists(output_audio_file):
                print(f"Audio downloaded successfully. Start to generate transcript...")
                print('\n')


                # Load Whisper model
                model = whisper.load_model("tiny")

                # Transcribe the audio
                with open(os.devnull, "w") as fnull, redirect_stderr(fnull):
                    result = model.transcribe(output_audio_file)

                result = format_long_text(result["text"])

                print("Transcription completed using Whisper:")
                print('\n--------------------RESULT---------------------')
                print(result)
                self.clear_previous_files()  # Clear downloaded
                return result
            else:
                print("Audio file not found after download.")
        except Exception:
            # No error message will be printed
            print("Error during audio download or Whisper transcription.")
            return None

#Method Started Below

In [33]:
# 在括号引号内放入youtube视频链接
transcript = VideoToTranscript.run("https://www.youtube.com/watch?v=ehTIhQpj9ys")

Transcript fetched successfully from YouTube API:
recently I volunteered my time at a
hospice center for dying programmers
also known as X and I asked the question
you're on your deathbed a kid comes to
your side and says grandpa I want to
learn how to code what do you regret
most about programming my immediate
answer would be something like leave me
alone kid and let me die in peace but
it's actually an interesting question
because programming is not like most
careers it has billions of possible
paths with many conflicting opinions and
no single right way to do things despite
its unpredictable nature I found that
there are some Universal traps that
nearly every developer regrets falling
into and in today's video we're going to
look at 10 of them regret number one I
wish I would have written less code it's
extremely important to understand that
code is not an asset it's a liability
every block of code is something that
you'll have to maintain in the future
and something that could brea