## 1. Installing Required Libraries
- Place this in the first cell to ensure all necessary libraries are installed.

In [1]:
# Install required libraries
# Uncomment the line below if libraries are not installed
# !pip install yt-dlp openai-whisper gradio "imageio[ffmpeg]"

import imageio_ffmpeg as ffmpeg
print(ffmpeg.get_ffmpeg_exe())

/opt/anaconda3/envs/nlp/lib/python3.10/site-packages/imageio_ffmpeg/binaries/ffmpeg-macos-aarch64-v7.1


## 2. Import Libraries
- This cell ensures all the required imports are available.

In [2]:
# Import required libraries
import yt_dlp
import whisper
import gradio as gr
import os
import imageio_ffmpeg as ffmpeg

## 3. Helper Function: Download YouTube Audio
- This cell defines the function to download audio from YouTube using yt-dlp.

In [3]:
# Function to download audio from a YouTube video
def download_audio(youtube_url, output_path="audio"):
    """
    Downloads audio from a YouTube video using yt-dlp.

    Args:
        youtube_url (str): URL of the YouTube video.
        output_path (str): Path to save the downloaded audio (without extension).

    Returns:
        str: Path to the downloaded audio file.
    """
    ffmpeg_path = ffmpeg.get_ffmpeg_exe()
    print("Using FFmpeg Path:", ffmpeg_path)  # Debug information
    
    ydl_opts = {
        'format': 'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
        'outtmpl': output_path,  # Do not add .mp3 here
        'quiet': True,
        'ffmpeg_location': ffmpeg_path,  # Explicitly use FFmpeg path
    }
    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([youtube_url])
        return f"{output_path}.mp3"  # Append .mp3 for the returned path
    except Exception as e:
        print(f"Error during audio download: {e}")
        raise

## 4. Helper Function: Transcribe Audio
- This cell defines the transcription functionality using OpenAI’s Whisper.

In [4]:
# Function to transcribe the audio
def transcribe_audio(file_path, model_type="base"):
    """
    Transcribes the audio file using OpenAI Whisper.

    Args:
        file_path (str): Path to the audio file.
        model_type (str): Whisper model to use (e.g., 'tiny', 'base', 'small').

    Returns:
        str: The transcribed text.
    """
    print(f"Loading Whisper model: {model_type}")
    model = whisper.load_model(model_type)
    print("Transcribing audio...")
    result = model.transcribe(file_path)
    print("Transcription completed.")
    return result['text']

## 5. Main Function: Orchestrating the Workflow
- This cell combines the download and transcription steps.

In [5]:
# Function to download and transcribe YouTube video
def transcribe_youtube_video(youtube_url):
    """
    Downloads and transcribes a YouTube video's audio.

    Args:
        youtube_url (str): URL of the YouTube video.

    Returns:
        str: The transcription of the video's audio or an error message.
    """
    try:
        # Step 1: Download audio
        audio_file = download_audio(youtube_url)
        
        # Step 2: Transcribe audio
        transcript = transcribe_audio(audio_file)
        
        # Optional: Remove the downloaded audio file
        if os.path.exists(audio_file):
            os.remove(audio_file)
        
        return transcript
    except Exception as e:
        return f"Error: {str(e)}"

## 6. Building the Gradio App
- This cell defines the Gradio interface.

In [6]:
# Define Gradio interface
with gr.Blocks() as app:
    gr.Markdown("# YouTube Video Transcription App")
    gr.Markdown("Provide a YouTube video URL to transcribe its audio to text.")
    
    youtube_url = gr.Textbox(label="YouTube Video URL", placeholder="Enter the YouTube video URL here...")
    output_text = gr.Textbox(label="Transcript", placeholder="The transcript will appear here...", lines=10)
    
    submit_button = gr.Button("Transcribe")
    submit_button.click(transcribe_youtube_video, inputs=[youtube_url], outputs=[output_text])

## 7. Launching the Gradio App
- This cell launches the app for users to interact with.

In [7]:
# Launch the Gradio app
if __name__ == "__main__":
    app.launch(share=True)

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://b41f1ff8866192ef54.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
