# Meetings Minutes transcriber and summarizer with Gradio
<p> Using Huggingface Models </p>

Link to Colab notebook https://colab.research.google.com/drive/1kdDGAcAyfUa4l59zudTtHJKqORVv081p#scrollTo=ec5b97ce

Link to download a smaller meeting audio https://www.kaggle.com/datasets/znevzz/speech-recognition-and-speaker-diarization

OR

https://huggingface.co/datasets/snorbyte/world-audio-natural-conversations-sample
https://huggingface.co/datasets/danielrosehill/Small-STT-Eval-Audio-Dataset

with smaller datasets

In [None]:
# package updates
!pip install -q --upgrade bitsandbytes accelerate transformers==4.57.6 gradio soundfile

In [None]:
# import
import os
import requests
from huggingface_hub import login
from google.colab import userdata
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig, TextIteratorStreamer
import torch
import gradio as gr
import threading

In [None]:
# Models
LLAMA = "meta-llama/Llama-3.2-3B-Instruct"
TRANSCRIBER_MODEL = "openai/whisper-medium.en"

In [None]:
# Sign in to HuggingFace Hub
hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

# STEP 1: Transcribe Audio with Huggingface models

In [None]:
# Initialize Transcriber Pipeline
transcriber_pipeline = pipeline(
    "automatic-speech-recognition",
    model=TRANSCRIBER_MODEL,
    dtype=torch.float16,
    device='cuda',
    return_timestamps=True
)

In [None]:
# quant config
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

In [None]:
# Initialize LLM
llm_tokenizer = AutoTokenizer.from_pretrained(LLAMA)
llm_tokenizer.pad_token = llm_tokenizer.eos_token
llm_model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map="auto", quantization_config=quant_config)

In [None]:
def transcribe_audio(audio_file):
  print("starting audio transcription")
  result = transcriber_pipeline(audio_file)
  print("audio transcription complete")
  return result["text"]

#STEP 2: Analyze & Report

In [None]:
def build_prompt(transcription):
  system_message = """
  You produce minutes of meetings from transcripts, with summary, key discussion points,
  takeaways and action items with owners, in markdown format without code blocks.
  """
  user_prompt = f"""
  Below is an extract transcript of a Denver council meeting.
  Please write minutes in markdown without code blocks, including:
  - a summary with attendees, location and date
  - discussion points
  - takeaways
  - action items with owners

  Transcription:
  {transcription}
  """

  return [
      {"role": "system", "content": system_message},
      {"role": "user", "content": user_prompt}
    ]

In [None]:
def generate_minutes(transcription):
  print("summarizing transcript")
  messages = build_prompt(transcription)

  inputs = llm_tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")

  streamer = TextIteratorStreamer(llm_tokenizer, skip_special_tokens=True, skip_prompt=True)

  generation_kwargs = dict(inputs=inputs, streamer=streamer, max_new_tokens=2000, do_sample=True, top_p=0.9, temperature=0.7)
  thread = threading.Thread(target=llm_model.generate, kwargs=generation_kwargs)
  thread.start()

  current_response = ""
  for new_text in streamer:
      current_response += new_text
      yield current_response

In [None]:
def process_audio_and_summarize(audio_file):
  if audio_file is None:
    return "Please upload an audio file to generate meeting minutes."

  # Initial message to indicate processing has started
  yield "Processing audio and generating minutes... Please wait."

  transcription = transcribe_audio(audio_file)

  # generate_minutes is a generator, so we need to iterate to get the full output
  full_minutes = ""
  for chunk in generate_minutes(transcription):
      full_minutes = chunk
      yield full_minutes # Yield chunks for streaming updates
  return full_minutes

In [None]:
# Build the gradio interface
gr.Interface(
    fn=process_audio_and_summarize,
    inputs=gr.Audio(sources=["upload"], type="filepath", label="Upload Audio File"),
    outputs=gr.Markdown(label="Meeting Minutes"),
    title="Meetings Minutes Transcriber and Summarizer",
    description="Upload an audio file to get a transcribed and summarized meeting minutes."
).launch(debug=True, share=True)