# Meeting minutes creator

In this colab, we make a meeting minutes program.

It includes useful code to connect your Google Drive to your colab.

Upload your own audio to make this work!!

https://colab.research.google.com/drive/13wR4Blz3Ot_x0GOpflmvvFffm5XU3Kct?usp=sharing

This should run nicely on a low-cost or free T4 box.

## **Assignment:**
Put Everything into a nice Gradio UI (similar to last week)
Input file name of audio to process.



In [None]:
# imports
import re, requests, json, tempfile, gradio as gr, torch, os
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from google.colab import drive, userdata
from huggingface_hub import login
from openai import OpenAI
from pydub import AudioSegment
from pydub.playback import play
from io import BytesIO
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig

# Sign in to HuggingFace Hub
hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

# Sign in to OpenAI using Secrets in Colab
openai_api_key = userdata.get('OPENAI_API_KEY')

# Initialize client
try:
    openai = OpenAI(api_key=openai_api_key)
except Exception as e:
    openai = None
    print(f"OpenAI client not initialized: {e}")

# Constants
AUDIO_MODEL = "whisper-1"
LLAMA = "meta-llama/Meta-Llama-3.1-8B-Instruct"

# Google Drive
drive.mount("/content/drive")

# Local LLM setup (Llama 3.1)
try:
    quant_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_quant_type="nf4"
    )
    tokenizer = AutoTokenizer.from_pretrained(LLAMA)

    # Set the pad token to the end-of-sequence token for generation
    tokenizer.pad_token = tokenizer.eos_token

    model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map="auto", quantization_config=quant_config)
    # model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map="auto", torch_dtype=torch.bfloat16, quantization_config=quant_config, trust_remote_code=True)

    model.eval() # Set model to evaluation mode
except Exception as e:
    # If the local model fails to load, set variables to None
    model = None
    tokenizer = None
    print(f"Failed to load local model: {e}")

# Updated function to handle audio transcription
def transcribe_audio(audio_file):
    """
    Transcribes an audio file to text using OpenAI's Whisper model.
    Handles both local file paths and mounted Google Drive file paths.
    """
    if not openai:
        return "OpenAI client not initialized. Please check your API key."

    if audio_file is None:
        return "No audio input provided."

    # Check if the file exists before attempting to open it
    # Construct the expected path in Google Drive
    # If the input is from the microphone, it will be a temporary file path
    # If the input is from the textbox, it could be a full path or just a filename
    if audio_file.startswith("/content/drive/MyDrive/llms/"):
        file_path_to_open = audio_file
    else:
        # Assume it's either a local path or just a filename in MyDrive/llms
        # We'll prioritize checking MyDrive/llms first
        gdrive_path_attempt = os.path.join("/content/drive/MyDrive/llms", os.path.basename(audio_file))
        if os.path.exists(gdrive_path_attempt):
            file_path_to_open = gdrive_path_attempt
        elif os.path.exists(audio_file):
            file_path_to_open = audio_file
        else:
            return f"File not found: {audio_file}. Please ensure the file exists in your Google Drive at /content/drive/MyDrive/llms/ or is a valid local path."


    if not os.path.exists(file_path_to_open):
        return f"File not found: {file_path_to_open}. Please ensure the file exists."


    try:
        with open(file_path_to_open, "rb") as f:
            transcription = openai.audio.transcriptions.create(
                model=AUDIO_MODEL,
                file=f,
                response_format="text"
            )
        return transcription
    except Exception as e:
        return f"An error occurred during transcription: {e}"

def generate_minutes(transcription):
    """
    Generates meeting minutes from a transcript using a local Llama model.
    Format the input, generate a response, and return the complete text string.
    """
    # Check if the local model and tokenizer were successfully loaded
    if not model or not tokenizer:
        return "Local Llama model not loaded. Check model paths and hardware compatibility."

    system_message = "You are an assistant that produces minutes of meetings from transcripts, with summary, key discussion points, takeaways and action items with owners, in markdown."
    user_prompt = f"Below is an extract transcript of an Audio recording. Please write minutes in markdown, including a summary with attendees, location and date; discussion points; takeaways; and action items with owners.\n{transcription}"

    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt}
    ]

    try:
        # Apply the chat template to format the messages for the model
        inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")

        # Generate the output. max_new_tokens controls the length of the generated text.
        outputs = model.generate(inputs, max_new_tokens=2000)

        # Decode only the new tokens generated by the model (not the input tokens) to a human-readable string
        response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # The model's response will contain the full conversation.
        # Extract only the assistant's part!
        assistant_start = "<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
        if assistant_start in response_text:
            response_text = response_text.split(assistant_start)[-1]

        return response_text

    except Exception as e:
        return f"An error occurred during local model generation: {e}"

# Gradio UI components
with gr.Blocks() as ui:
    gr.Markdown("# Meeting Minutes Generator")
    with gr.Row():
        chatbot = gr.Chatbot(height=500, label="AI Assistant")
    with gr.Row():
        entry = gr.Textbox(label="Provide the filename or path of the audio file to transcribe:", scale=4)
        submit_btn = gr.Button("Generate Minutes", scale=1)
    with gr.Row():
        audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Or speak to our AI Assistant to transcribe", scale=4)
        submit_audio_btn = gr.Button("Transcribe Audio", scale=1)

    with gr.Row():
        clear = gr.Button("Clear")

    def process_file_and_generate(file_path, history):
        transcribed_text = transcribe_audio(file_path)
        minutes = generate_minutes(transcribed_text)
        new_history = history + [[f"Transcription of '{os.path.basename(file_path)}':\n{transcribed_text}", minutes]]
        return new_history

    def process_audio_and_generate(audio_file, history):
        transcribed_text = transcribe_audio(audio_file)
        minutes = generate_minutes(transcribed_text)
        new_history = history + [[f"Transcription of your recording:\n{transcribed_text}", minutes]]
        return new_history


    submit_btn.click(
        process_file_and_generate,
        inputs=[entry, chatbot],
        outputs=[chatbot],
        queue=False
    )

    submit_audio_btn.click(
        process_audio_and_generate,
        inputs=[audio_input, chatbot],
        outputs=[chatbot],
        queue=False
    )

    clear.click(lambda: None, inputs=None, outputs=[chatbot], queue=False)

ui.launch(inbrowser=True, debug=True)