In [11]:
!pip install -q --upgrade bitsandbytes accelerate



In [4]:
import os
import requests
import threading
from dotenv import load_dotenv
from IPython.display import Markdown, display, update_display
from openai import OpenAI
from huggingface_hub import login
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer, BitsAndBytesConfig
import torch
import gradio as gr

In [9]:
load_dotenv(override=True)
hf_token = os.getenv('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [14]:
class GenerateMinute:
  audio_model = "openai/whisper-medium.en"
  llm_model = "meta-llama/Llama-3.2-3B-Instruct"
  quant_config = BitsAndBytesConfig(
      load_in_4bit=True,
      bnb_4bit_use_double_quant=True,
      bnb_4bit_compute_dtype=torch.bfloat16,
      bnb_4bit_quant_type="nf4"
  )

  def __init__(self, progress, audio_model=audio_model, llm_model=llm_model):
    self.progress = progress
    self.audio_model = audio_model
    self.llm_model = llm_model
    self.tokenizer = AutoTokenizer.from_pretrained(self.llm_model)
    self.tokenizer.pad_token = self.tokenizer.eos_token
    self.model = AutoModelForCausalLM.from_pretrained(
        self.llm_model, quantization_config=self.quant_config, device_map="auto"
      )
    
  def audio_to_text(self, audio_filepath):
    self.progress(0.4, desc="Transcribing audio...")
    try:
      if audio_filepath is None:
        raise ValueError("No audio file provided")
        
      if not os.path.exists(audio_filepath):
        raise ValueError("Audio file not found: {file_path}")

      pipe = pipeline(
          "automatic-speech-recognition",
          model=self.audio_model,
          chunk_length_s=30,
          device="cuda",
          return_timestamps=True
      )

      response = pipe(audio_filepath)

      text = response.strip()

      if not text:
        raise ValueError("No speech detected in audio")

      return text

    except Exception as e:
      raise ValueError(e)

  def create_minute(self, transcription):
    self.progress(0.7, desc="Generating meeting minutes...")

    system_message = """
    You produce minutes of meetings from transcripts, with summary, key discussion points,
    takeaways and action items with owners, in markdown format without code blocks.
    """

    user_prompt = f"""
    Below is an extract transcript of a Denver council meeting.
    Please write minutes in markdown without code blocks, including:
    - a summary with attendees, location and date
    - discussion points
    - takeaways
    - action items with owners

    Transcription:
    {transcription}
    """

    messages = [
      {"role": "system", "content": system_message},
      {"role": "user", "content": user_prompt}
    ]

    inputs = self.tokenizer(messages, return_tensors="pt").to(self.model.device)
    streamer = TextIteratorStreamer(self.tokenizer)

    thread = threading.Thread(
        target=self.model.generate, 
        kwargs={
          "input_ids": inputs,
          "max_new_tokens": 2000,
          "streamer": streamer
        }
    )

    thread.start()
    started = False

    for new_text in streamer:
      if not started:
        if "<|start_header_id|>assistant<|end_header_id|>" in new_text:
            started = True
            new_text = new_text.split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip()

      if started:
        if "<|eot_id|>" in new_text:
            new_text = new_text.replace("<|eot_id|>", "")  # Remove the unwanted token

        if new_text.strip():  # Only yield non-empty chunks
            yield new_text

  def process_meeting(self, audio_filepath, audio_model, llm_model ):
    self.audio_model = audio_model
    self.llm_model = llm_model
    self.progress(0.2, desc="Processing audio file...")
    try:
      transcription = self.audio_to_text(audio_filepath)
      minute = self.create_minute(transcription)

      response = ""

      for chunk in minute:
        response += chunk
        yield response

    except Exception as e:
      yield f"Error processing meeting: {e}"

PackageNotFoundError: No package metadata was found for bitsandbytes