Use Audio to generate meeting minutes, including actions.

In [1]:
! pip install -q requests torch bitsandbytes transformers sentencepiece accelerate  openai httpx==0.27.2 gradio

In [2]:
#imports

import os
import requests
from IPython.display import Markdown, display, update_display
from openai import OpenAI
from google.colab import drive
from huggingface_hub import login
from google.colab import userdata
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig
import torch
import gradio as gr

In [3]:
#Models
Audio_Model = "whisper-1"
LLAMA = "meta-llama/Meta-Llama-3.1-8B-Instruct"

In [5]:
# Sign to HuggingFace Hub

hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

In [6]:
# Sign in to OpenAI

open_api_key = userdata.get('OPENAI_API_KEY')
openai = OpenAI(api_key=open_api_key)

In [9]:
#4Bits Quantization

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

In [None]:
def custom_transcription(audio_file):
    try:
        # The audio_file is already a file path from Gradio
        with open(audio_file, "rb") as audio_file_obj:
            transcription = openai.audio.transcriptions.create(
                model=Audio_Model,
                file=audio_file_obj,
                response_format="text"
            )
        return transcription
    except Exception as e:
        return f"Error in transcription: {str(e)}"

In [None]:


def extract_notes(transcription):
    """
    Extract notes from transcription using LLAMA model.
    Args:
        transcription (str): Transcribed text.
    Returns:
        str: Extracted notes.
    """
    # Define the system and user messages
    system_message = "You are an assistant that produces minutes of meetings from transcripts, with summary, key discussion points, takeaways and action items with owners, in markdown."
    user_message = f"Below is an extract transcript of a Denver council meeting. Please write minutes in Markdown, including a summary with attendees, location and date; discussion points; takeaways; and action items with owners.\n{transcription}"

    # Create the messages list
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_message}
    ]

    # Load the tokenizer for the LLAMA model
    tokenizer = AutoTokenizer.from_pretrained(LLAMA)
    tokenizer.pad_token = tokenizer.eos_token

    # Tokenize the input messages using a chat-specific template
    inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")

    # Create a TextStreamer for live text streaming during generation
    streamer = TextStreamer(tokenizer)

    # Load the LLAMA model for causal language modeling
    model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map="auto", quantization_config=quant_config)

    # Generate text from the model
    outputs = model.generate(inputs, max_new_tokens=2000, streamer=streamer)
    notes = tokenizer.decode(outputs[0])

    return notes

In [15]:
def transcribe_and_extract(audio_file):
    try:
        print(f"Processing file: {audio_file}")
        transcription = custom_transcription(audio_file)
        print(f"Transcription: {transcription}")

        if "Error" in transcription:
            return transcription, "No notes extracted due to transcription error."

        notes = extract_notes(transcription)
        print(f"Extracted Notes: {notes}")
        return transcription, notes
    except Exception as e:
        print(f"Error occurred: {str(e)}")
        return f"Error: {str(e)}", "Error occurred in processing."

In [14]:
import gradio as gr

def create_audio_interface():
    with gr.Blocks() as demo:
        gr.Markdown("# Audio Transcription and Notes Extraction")

        # File upload component
        audio_input = gr.Audio(type="filepath", label="Upload Audio File")

        # Output text areas
        transcription_output = gr.Textbox(label="Transcription Output", lines=5)
        notes_output = gr.Textbox(label="Extracted Notes", lines=5)

        # Buttons
        with gr.Row():
            transcribe_btn = gr.Button("Get Transcription")
            notes_btn = gr.Button("Get Notes")

        # Handle transcription button click
        def handle_transcription(audio_path):
            if audio_path is None:
                return "Please upload an audio file first."
            transcription, _ = transcribe_and_extract(audio_path)
            return transcription

        # Handle notes button click
        def handle_notes(audio_path):
            if audio_path is None:
                return "Please upload an audio file first."
            _, notes = transcribe_and_extract(audio_path)
            return notes

        # Connect buttons to functions
        transcribe_btn.click(
            fn=handle_transcription,
            inputs=[audio_input],
            outputs=[transcription_output]
        )

        notes_btn.click(
            fn=handle_notes,
            inputs=[audio_input],
            outputs=[notes_output]
        )

    return demo

# Launch the interface
if __name__ == "__main__":
    demo = create_audio_interface()
    demo.launch(share=True)  # share=True for Colab public URL

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://134a20640ab14d3dbc.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
