Week 3, day 5, meeting minutes generator. Code for Google Collab.

In [None]:
!pip install -q --upgrade torch==2.5.1+cu124 torchvision==0.20.1+cu124 torchaudio==2.5.1+cu124 --index-url https://download.pytorch.org/whl/cu124
!pip install -q requests bitsandbytes==0.46.0 transformers==4.48.3 accelerate==1.3.0 openai

In [None]:
# imports

import os
import requests
from IPython.display import Markdown, display
from openai import OpenAI
from huggingface_hub import login
from google.colab import userdata
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig
import torch
import gradio as gr

In [None]:
# Sign in to HuggingFace Hub

hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

# Sign in to OpenAI using Secrets in Colab

openai_api_key = userdata.get('OPENAI_API_KEY')
openai = OpenAI(api_key=openai_api_key)


AUDIO_MODEL = "whisper-1"
LLAMA = "meta-llama/Meta-Llama-3.1-8B-Instruct"

In [None]:
def generate_meeting_minutes(file_data):
  print("calling openai to generate transcript")
  transcription = openai.audio.transcriptions.create(model=AUDIO_MODEL, file=file_data, response_format="text")
  print("transcript generated")
  print(transcription)
  system_message = "You are an assistant that produces minutes of meetings from transcripts, with summary, key discussion points, takeaways and action items with owners, in markdown."
  user_prompt = f"Below is an extract transcript of a Denver council meeting. Please write minutes in markdown, including a summary with attendees, location and date; discussion points; takeaways; and action items with owners.\n{transcription}"

  messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": user_prompt}
  ]
  quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
  )
  print("calling tokenizer")
  tokenizer = AutoTokenizer.from_pretrained(LLAMA)
  tokenizer.pad_token = tokenizer.eos_token
  inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
  streamer = TextStreamer(tokenizer)
  print("calling model")
  model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map="auto", quantization_config=quant_config)
  outputs = model.generate(inputs, max_new_tokens=2000, streamer=streamer)
  return tokenizer.decode(outputs[0])

In [None]:
def process_binary_file(file_data):
    """
    This function receives the uploaded file content as a bytes object.
    You can then process the binary data as needed.
    """
    if file_data:
        # Example: Get the size of the binary data
        print(f"processing file")
        with open(file_data.name, "rb") as f:
          return generate_meeting_minutes(f)

    return "No file uploaded or file is empty."

with gr.Blocks() as demo:
    gr.Markdown("Upload a file and its binary content will be processed.")

    # Use gr.File with type="binary"
    file_input = gr.File(type="filepath", file_count="single", file_types=[".mp3"], label="Upload a meeting file")

    # Or use gr.UploadButton with type="binary"
    # upload_button = gr.UploadButton("Click to Upload Binary File", type="binary")

    output_text = gr.Textbox(label="Processing Result")

    # Link the file_input to the processing function
    file_input.upload(process_binary_file, inputs=file_input, outputs=output_text)


In [None]:
demo.launch(debug=True)