# Meeting Minutes Generator

This notebook provides a solution for transcribing meeting audio and generating structured markdown minutes using OpenAI's Whisper and Meta's Llama 3.1.

**Run in Google Colab:** [Open in Colab](https://colab.research.google.com/drive/13cDJUcaOXODbLxtRfqMJJ6LAF-m9qxfX?usp=sharing)

### Features:
- **Encapsulation**: Logical grouping of models and processing in a `MeetingMinutesGenerator` class.
- **Error Handling**: Comprehensive `try-except` blocks for API and model failures.
- **Type Safety**: Python type hints for better maintainability.
- **Resource Management**: Automatic GPU verification.
- **User Experience**: Improved Gradio interface with status updates.

In [None]:
# --- Install Dependencies ---
!pip install -q gradio torch==2.5.1+cu124 torchvision==0.20.1+cu124 torchaudio==2.5.1+cu124 --index-url https://download.pytorch.org/whl/cu124
!pip install -q requests bitsandbytes==0.46.0 transformers==4.48.3 accelerate==1.3.0 openai huggingface_hub

In [1]:
import os
import torch
import gradio as gr
from typing import List, Dict, Optional, Union
from openai import OpenAI
from huggingface_hub import login
from transformers import (
    AutoTokenizer, 
    AutoModelForCausalLM, 
    BitsAndBytesConfig
)

# Handle environment-specific imports
try:
    from google.colab import userdata
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

In [None]:
class Config:
    """Configuration constants for the application."""
    AUDIO_MODEL = "whisper-1"
    LLM_MODEL = "meta-llama/Meta-Llama-3.1-8B-Instruct"
    MAX_NEW_TOKENS = 2000
    
    # Hardware check
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    
    @staticmethod
    def get_secret(key: str) -> str:
        """Retrieve secret from environment or Colab userdata."""
        if IN_COLAB:
            try:
                return userdata.get(key)
            except Exception:
                pass
        return os.environ.get(key, "")

In [None]:
class MeetingMinutesGenerator:
    """Handles transcription and summarization of meeting audio."""
    
    def __init__(self):
        self.openai_client: Optional[OpenAI] = None
        self.tokenizer: Optional[AutoTokenizer] = None
        self.model: Optional[AutoModelForCausalLM] = None
        self.is_ready = False

    def initialize(self) -> str:
        """Initialize API clients and load local models."""
        try:
            # 1. Setup OpenAI
            api_key = Config.get_secret('OPENAI_API_KEY')
            if not api_key:
                return "Error: OPENAI_API_KEY not found."
            self.openai_client = OpenAI(api_key=api_key)

            # 2. Hugging Face Login
            hf_token = Config.get_secret('HF_TOKEN')
            if hf_token:
                login(hf_token, add_to_git_credential=True)
            else:
                return "Error: HF_TOKEN not found for Llama access."

            # 3. Load Llama with Quantization
            if Config.DEVICE != "cuda":
                return f"Warning: GPU (CUDA) not detected. Running on {Config.DEVICE} will be extremely slow."

            quant_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_use_double_quant=True,
                bnb_4bit_compute_dtype=torch.bfloat16,
                bnb_4bit_quant_type="nf4"
            )

            self.tokenizer = AutoTokenizer.from_pretrained(Config.LLM_MODEL)
            self.tokenizer.pad_token = self.tokenizer.eos_token
            
            self.model = AutoModelForCausalLM.from_pretrained(
                Config.LLM_MODEL, 
                device_map="auto", 
                quantization_config=quant_config
            )
            
            self.is_ready = True
            return "Initialization successful!"
            
        except Exception as e:
            return f"Initialization failed: {str(e)}"

    def _transcribe(self, audio_path: str) -> str:
        """Transcribe audio using Whisper API."""
        if not self.openai_client:
            raise ValueError("OpenAI client not initialized.")
            
        with open(audio_path, "rb") as f:
            response = self.openai_client.audio.transcriptions.create(
                model=Config.AUDIO_MODEL, 
                file=f, 
                response_format="text"
            )
        return response

    def _generate_minutes(self, transcription: str) -> str:
        """Generate formatted minutes from transcription using Llama."""
        if not self.model or not self.tokenizer:
            raise ValueError("LLM not initialized.")

        messages = [
            {
                "role": "system", 
                "content": "You are an expert meeting assistant. Produce structured minutes in Markdown with: Summary, Key Discussion Points, Takeaways, and Action Items with owners."
            },
            {
                "role": "user", 
                "content": f"Analyze this transcript and write comprehensive minutes:\\n{transcription}"
            }
        ]

        inputs = self.tokenizer.apply_chat_template(
            messages, 
            add_generation_prompt=True, 
            return_tensors="pt"
        ).to(Config.DEVICE)
        
        outputs = self.model.generate(
            inputs, 
            max_new_tokens=Config.MAX_NEW_TOKENS,
            eos_token_id=self.tokenizer.eos_token_id
        )
        
        # Decode only the generated response
        response = self.tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
        return response.strip()

    def process(self, audio_filepath: Optional[str]) -> str:
        """Main orchestration method for Gradio UI."""
        if not audio_filepath:
            return "Please upload an audio file."
            
        if not self.is_ready:
            status = self.initialize()
            if not self.is_ready:
                return status

        try:
            print("Transcribing...")
            transcript = self._transcribe(audio_filepath)
            
            print("Generating minutes...")
            minutes = self._generate_minutes(transcript)
            
            return minutes
        except Exception as e:
            return f"Processing Error: {str(e)}"

In [None]:
# --- UI Initialization ---
generator = MeetingMinutesGenerator()

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# üìù Meeting Minutes AI Assistant")
    gr.Markdown("Upload your meeting recording (mp3, wav, m4a) to generate structured minutes automatically.")
    
    with gr.Row():
        audio_input = gr.Audio(
            type="filepath", 
            label="Meeting Audio",
            sources=["upload", "microphone"]
        )
    
    generate_btn = gr.Button("Generate Minutes", variant="primary")
    
    output_display = gr.Markdown(label="Minutes Output")

    generate_btn.click(
        fn=generator.process, 
        inputs=audio_input, 
        outputs=output_display
    )

demo.launch()