# Lab 4.1.5: Audio Transcription - SOLUTIONS

This notebook contains the complete solution for the Meeting Notes Generator challenge.

---

In [None]:
# Setup
import gc
import time
import re
from typing import Dict, List, Any, Optional
from dataclasses import dataclass, field

import torch
import numpy as np
import whisper

# Load Whisper
print("Loading Whisper model...")
model = whisper.load_model("base")  # Use "large-v3" for production
print("‚úÖ Ready!")

---

## Challenge Solution: Meeting Notes Generator

In [None]:
@dataclass
class MeetingNotes:
    """Structured meeting notes."""
    title: str
    date: str
    duration: float
    participants: List[str]
    transcript: str
    summary: str
    key_points: List[str]
    action_items: List[Dict[str, str]]
    decisions: List[str]
    follow_ups: List[str]
    
    def to_markdown(self) -> str:
        """Convert to markdown format."""
        lines = [
            f"# {self.title}",
            "",
            f"**Date:** {self.date}  ",
            f"**Duration:** {self.duration:.0f} minutes  ",
            f"**Participants:** {', '.join(self.participants) if self.participants else 'Unknown'}  ",
            "",
            "---",
            "",
            "## Summary",
            "",
            self.summary,
            "",
            "---",
            "",
            "## Key Points",
            "",
        ]
        
        for point in self.key_points:
            lines.append(f"- {point}")
        
        lines.extend(["", "---", "", "## Action Items", ""])
        
        for item in self.action_items:
            owner = item.get('owner', 'Unassigned')
            task = item.get('task', '')
            deadline = item.get('deadline', 'TBD')
            lines.append(f"- [ ] **{owner}**: {task} (Due: {deadline})")
        
        if self.decisions:
            lines.extend(["", "---", "", "## Decisions Made", ""])
            for decision in self.decisions:
                lines.append(f"- {decision}")
        
        if self.follow_ups:
            lines.extend(["", "---", "", "## Follow-up Items", ""])
            for follow_up in self.follow_ups:
                lines.append(f"- {follow_up}")
        
        lines.extend([
            "",
            "---",
            "",
            "## Full Transcript",
            "",
            self.transcript,
        ])
        
        return "\n".join(lines)

In [None]:
def transcribe_audio(audio_path: str) -> Dict[str, Any]:
    """Transcribe audio with Whisper."""
    print(f"Transcribing: {audio_path}")
    
    result = model.transcribe(audio_path, verbose=False)
    
    return {
        "text": result["text"],
        "segments": result["segments"],
        "language": result["language"],
    }


def extract_action_items(transcript: str) -> List[Dict[str, str]]:
    """
    Extract action items from transcript.
    
    Looks for patterns like:
    - "[Name] will [action]"
    - "[Name] needs to [action]"
    - "Action item: [action]"
    """
    action_items = []
    
    # Pattern: "[Name] will [action]"
    will_pattern = r"([A-Z][a-z]+)\s+will\s+([^.!?]+)"
    for match in re.finditer(will_pattern, transcript):
        action_items.append({
            "owner": match.group(1),
            "task": match.group(2).strip(),
            "deadline": "TBD",
        })
    
    # Pattern: "[Name] needs to [action]"
    needs_pattern = r"([A-Z][a-z]+)\s+needs?\s+to\s+([^.!?]+)"
    for match in re.finditer(needs_pattern, transcript):
        action_items.append({
            "owner": match.group(1),
            "task": match.group(2).strip(),
            "deadline": "TBD",
        })
    
    # Pattern: "action item" mentions
    ai_pattern = r"action\s+item[:\s]+([^.!?]+)"
    for match in re.finditer(ai_pattern, transcript, re.IGNORECASE):
        action_items.append({
            "owner": "Unassigned",
            "task": match.group(1).strip(),
            "deadline": "TBD",
        })
    
    return action_items


def extract_decisions(transcript: str) -> List[str]:
    """
    Extract decisions from transcript.
    
    Looks for patterns like:
    - "We decided to [decision]"
    - "Decision: [decision]"
    - "We agreed to [decision]"
    """
    decisions = []
    
    patterns = [
        r"[Ww]e\s+decided\s+to\s+([^.!?]+)",
        r"[Dd]ecision[:\s]+([^.!?]+)",
        r"[Ww]e\s+agreed\s+to\s+([^.!?]+)",
        r"[Ll]et's\s+go\s+with\s+([^.!?]+)",
    ]
    
    for pattern in patterns:
        for match in re.finditer(pattern, transcript):
            decisions.append(match.group(1).strip())
    
    return decisions


def extract_participants(transcript: str) -> List[str]:
    """
    Extract participant names from transcript.
    
    Simple heuristic: capitalized names that appear multiple times.
    """
    # Find potential names (capitalized words at sentence starts or after certain words)
    name_pattern = r"(?:^|\. |, )([A-Z][a-z]+)(?:\s+said|\s+mentioned|\s+asked|:)"
    
    names = {}
    for match in re.finditer(name_pattern, transcript):
        name = match.group(1)
        if len(name) > 2:  # Filter out very short matches
            names[name] = names.get(name, 0) + 1
    
    # Return names that appear at least twice
    return [name for name, count in names.items() if count >= 2]


def generate_summary(transcript: str, max_length: int = 300) -> str:
    """
    Generate a simple summary of the transcript.
    
    For production, use an LLM for better summaries!
    """
    # Simple extractive summary: first few sentences
    sentences = re.split(r'[.!?]+', transcript)
    sentences = [s.strip() for s in sentences if len(s.strip()) > 20]
    
    if not sentences:
        return "No substantive content detected."
    
    summary = ". ".join(sentences[:3]) + "."
    
    if len(summary) > max_length:
        summary = summary[:max_length-3] + "..."
    
    return summary


def extract_key_points(transcript: str) -> List[str]:
    """
    Extract key points from transcript.
    
    Looks for important-sounding phrases.
    """
    key_points = []
    
    patterns = [
        r"[Ii]mportant[ly]?[,:\s]+([^.!?]+)",
        r"[Kk]ey\s+point[:\s]+([^.!?]+)",
        r"[Mm]ain\s+takeaway[:\s]+([^.!?]+)",
        r"[Nn]ote\s+that\s+([^.!?]+)",
        r"[Rr]emember[:\s]+([^.!?]+)",
    ]
    
    for pattern in patterns:
        for match in re.finditer(pattern, transcript):
            key_points.append(match.group(1).strip())
    
    # If no explicit key points, extract first sentence of each paragraph-like chunk
    if not key_points:
        chunks = transcript.split('\n\n')
        for chunk in chunks[:5]:  # Max 5 points
            sentences = re.split(r'[.!?]+', chunk)
            if sentences and len(sentences[0].strip()) > 20:
                key_points.append(sentences[0].strip())
    
    return key_points[:7]  # Max 7 key points


def generate_meeting_notes(audio_path: str, meeting_title: str = "Meeting Notes") -> MeetingNotes:
    """
    Generate structured meeting notes from audio.
    
    Args:
        audio_path: Path to meeting recording
        meeting_title: Title for the meeting
        
    Returns:
        MeetingNotes object with extracted information
    """
    from datetime import datetime
    
    print(f"\nüìù Generating meeting notes...")
    print("=" * 50)
    
    # Step 1: Transcribe
    print("\n1. Transcribing audio...")
    audio = whisper.load_audio(audio_path)
    duration_minutes = len(audio) / whisper.audio.SAMPLE_RATE / 60
    
    result = model.transcribe(audio, verbose=False)
    transcript = result["text"]
    print(f"   Transcribed {duration_minutes:.1f} minutes of audio")
    
    # Step 2: Extract participants
    print("\n2. Identifying participants...")
    participants = extract_participants(transcript)
    print(f"   Found {len(participants)} potential participants")
    
    # Step 3: Extract action items
    print("\n3. Extracting action items...")
    action_items = extract_action_items(transcript)
    print(f"   Found {len(action_items)} action items")
    
    # Step 4: Extract decisions
    print("\n4. Extracting decisions...")
    decisions = extract_decisions(transcript)
    print(f"   Found {len(decisions)} decisions")
    
    # Step 5: Extract key points
    print("\n5. Extracting key points...")
    key_points = extract_key_points(transcript)
    print(f"   Found {len(key_points)} key points")
    
    # Step 6: Generate summary
    print("\n6. Generating summary...")
    summary = generate_summary(transcript)
    
    # Create notes object
    notes = MeetingNotes(
        title=meeting_title,
        date=datetime.now().strftime("%Y-%m-%d"),
        duration=duration_minutes,
        participants=participants,
        transcript=transcript,
        summary=summary,
        key_points=key_points,
        action_items=action_items,
        decisions=decisions,
        follow_ups=[],  # Would need LLM for better extraction
    )
    
    print("\n‚úÖ Meeting notes generated!")
    
    return notes

In [None]:
# Create a sample audio file for testing
import soundfile as sf

# For a real test, you'd use actual meeting audio
# This creates a simple tone for demonstration
sample_rate = 16000
duration = 5.0
t = np.linspace(0, duration, int(sample_rate * duration), dtype=np.float32)
audio = 0.3 * np.sin(2 * np.pi * 440 * t)
sf.write("test_meeting.wav", audio, sample_rate)

print("Created test audio file")
print("\n‚ö†Ô∏è  Note: For meaningful results, use an actual meeting recording!")
print("   The test file contains only a tone, not speech.")

In [None]:
# Generate meeting notes
notes = generate_meeting_notes(
    "test_meeting.wav",
    meeting_title="Q4 Planning Meeting"
)

# Display the markdown output
print("\n" + "=" * 60)
print("GENERATED MEETING NOTES:")
print("=" * 60)
print(notes.to_markdown())

In [None]:
# Save to file
with open("meeting_notes.md", "w") as f:
    f.write(notes.to_markdown())

print("‚úÖ Saved meeting notes to meeting_notes.md")

---

## Cleanup

In [None]:
import os
for f in ["test_meeting.wav", "meeting_notes.md"]:
    if os.path.exists(f):
        os.remove(f)

del model
torch.cuda.empty_cache()
gc.collect()
print("‚úÖ Cleanup complete!")