In [None]:
# Install required libraries
!pip install gradio openai-whisper ffmpeg-python transformers FPDF

Collecting gradio
  Downloading gradio-5.17.1-py3-none-any.whl.metadata (16 kB)
Collecting openai-whisper
  Downloading openai-whisper-20240930.tar.gz (800 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/800.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━[0m [32m645.1/800.5 kB[0m [31m20.7 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.5/800.5 kB[0m [31m16.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting ffmpeg-python
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Collecting FPDF
  Downloading fpdf-1.7.2.tar.gz (39 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-n

In [None]:
# Import libraries
import gradio as gr
import whisper
from transformers import BartForConditionalGeneration, BartTokenizer
from fpdf import FPDF
import os
import time
import ffmpeg  # For audio format conversion

In [None]:
# Define BART model name
MODEL_NAME = "facebook/bart-large-cnn"

# Load BART tokenizer and model (loaded once for efficiency)
try:
    tokenizer = BartTokenizer.from_pretrained(MODEL_NAME)
    model = BartForConditionalGeneration.from_pretrained(MODEL_NAME)
    print("BART model loaded successfully!")
except Exception as e:
    raise Exception(f"Failed to load BART model: {str(e)}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

BART model loaded successfully!


In [None]:
# Function to load Whisper model dynamically
def load_whisper_model(model_size="base"):
    try:
        model = whisper.load_model(model_size)
        return model
    except Exception as e:
        raise Exception(f"Failed to load Whisper model '{model_size}': {str(e)}")

# Convert audio to WAV format for Whisper compatibility
def convert_audio_to_wav(audio_file):
    """Convert audio to WAV format if needed."""
    output_file = f"converted_{int(time.time())}.wav"
    try:
        stream = ffmpeg.input(audio_file)
        stream = ffmpeg.output(stream, output_file, format="wav", acodec="pcm_s16le", ar="16k")
        ffmpeg.run(stream, overwrite_output=True, quiet=True)
        return output_file
    except Exception as e:
        raise Exception(f"Audio conversion failed: {str(e)}")

# Transcribe audio
def transcribe_audio(audio_file, whisper_model_size="base"):
    """Transcribes audio with error handling and progress feedback."""
    if not audio_file:
        return "Error: Please upload an audio file.", None

    try:
        # Convert audio to WAV if needed
        if not audio_file.endswith('.wav'):
            audio_file = convert_audio_to_wav(audio_file)

        # Load Whisper model dynamically
        whisper_model = load_whisper_model(whisper_model_size)
        result = whisper_model.transcribe(audio_file)
        text = result["text"]

        # Clean up converted file
        if audio_file.endswith('.wav') and "converted_" in audio_file:
            os.remove(audio_file)

        return text, "Transcription completed successfully!"
    except Exception as e:
        return f"Error: Transcription failed - {str(e)}", None

# Generate summary
def generate_summary(text, max_length=150, min_length=50):
    """Generates a summary with configurable length."""
    if not text or not text.strip():
        return "Error: No input text provided for summarization!", None

    try:
        inputs = tokenizer.encode(text, return_tensors="pt", max_length=1024, truncation=True)
        summary_ids = model.generate(
            inputs,
            max_length=max_length,
            min_length=min_length,
            length_penalty=2.0,
            num_beams=4,
            early_stopping=True
        )
        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

        # Convert to bullet points
        bullet_summary = "\n".join([f"- {sentence.strip()}" for sentence in summary.split('. ') if sentence])
        return bullet_summary, "Summary generated successfully!"
    except Exception as e:
        return f"Error: Summarization failed - {str(e)}", None

# Save text as PDF
def save_as_pdf(text, filename_prefix="output"):
    """Saves text as PDF with dynamic filenames and better formatting."""
    if not text or not text.strip():
        return None, "Error: No text provided for PDF generation!"

    try:
        # Generate unique filename with timestamp
        timestamp = int(time.time())
        filename = f"{filename_prefix}_{timestamp}.pdf"

        pdf = FPDF()
        pdf.set_auto_page_break(auto=True, margin=15)
        pdf.add_page()
        pdf.set_font("Arial", size=12)

        # Split text into lines for better formatting
        lines = text.split('\n')
        for line in lines:
            pdf.multi_cell(190, 10, line)

        pdf.output(filename)
        return filename, f"PDF saved as {filename}!"
    except Exception as e:
        return None, f"Error: PDF generation failed - {str(e)}"

In [None]:
# Define and launch Gradio interface
with gr.Blocks(title="Meeting Transcriber & Summarizer") as demo:
    gr.Markdown("# 🎙️ Meeting Transcriber & Summarizer")
    gr.Markdown("Upload an audio file (MP3, WAV, etc.), transcribe it, and generate a summary. Download results as PDFs!")

    with gr.Row():
        # Audio input and model selection
        audio_input = gr.Audio(type="filepath", label="Upload Audio File (MP3, WAV, etc.)")
        whisper_model_dropdown = gr.Dropdown(
            choices=["tiny", "base", "small", "medium", "large"],
            value="base",
            label="Whisper Model Size (larger = more accurate, slower)"
        )

    # Transcription section
    transcribe_button = gr.Button("Transcribe Audio")
    transcription_output = gr.Textbox(label="Transcribed Text", lines=10)
    transcription_status = gr.Textbox(label="Status", interactive=False)

    # Summary section
    with gr.Row():
        max_length_slider = gr.Slider(50, 300, value=150, step=10, label="Summary Max Length")
        min_length_slider = gr.Slider(20, 100, value=50, step=10, label="Summary Min Length")
    summarize_button = gr.Button("Generate Summary")
    summary_output = gr.Textbox(label="Summary", lines=5)
    summary_status = gr.Textbox(label="Status", interactive=False)

    # Download section
    with gr.Row():
        download_transcription = gr.Button("Download Transcription as PDF")
        download_summary = gr.Button("Download Summary as PDF")
    transcription_file = gr.File(label="Download Transcription")
    summary_file = gr.File(label="Download Summary")

    # Event bindings with status updates
    transcribe_button.click(
        fn=transcribe_audio,
        inputs=[audio_input, whisper_model_dropdown],
        outputs=[transcription_output, transcription_status],
        show_progress=True
    )

    summarize_button.click(
        fn=generate_summary,
        inputs=[transcription_output, max_length_slider, min_length_slider],
        outputs=[summary_output, summary_status],
        show_progress=True
    )

    download_transcription.click(
        fn=lambda text: save_as_pdf(text, "transcription"),
        inputs=transcription_output,
        outputs=[transcription_file, transcription_status],
        show_progress=True
    )

    download_summary.click(
        fn=lambda text: save_as_pdf(text, "summary"),
        inputs=summary_output,
        outputs=[summary_file, summary_status],
        show_progress=True
    )

# Launch the app with debug mode for Colab
demo.launch(debug=True)

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://71900d9d734b3a4593.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


100%|███████████████████████████████████████| 139M/139M [00:04<00:00, 33.8MiB/s]
  checkpoint = torch.load(fp, map_location=device)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://71900d9d734b3a4593.gradio.live


