In [None]:
!pip install openai-whisper
!pip install transformers
!pip install torch
!pip install librosa
!pip install pydub
!pip install huggingface-hub
!pip install gradio



In [None]:
!apt-get update
!apt-get install -y ffmpeg

0% [Working]            Hit:1 http://archive.ubuntu.com/ubuntu jammy InRelease
0% [Connecting to security.ubuntu.com (91.189.91.82)] [Connected to cloud.r-pro                                                                               Get:2 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
                                                                               Get:3 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
                                                                               Get:4 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Get:5 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Get:6 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Hit:7 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Get:8 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease [24.3 kB]
Hit:9 https://ppa.

In [None]:
import whisper
import torch
import librosa
import numpy as np
import json
import re
import time
import os
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr
from typing import Dict, List
import warnings
warnings.filterwarnings("ignore")

# Configuration
WHISPER_MODEL = "base"
LLM_MODEL = "microsoft/DialoGPT-medium"
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cuda


In [None]:
class AudioProcessor:
    def __init__(self, model_size=WHISPER_MODEL):
        print(f"Loading Whisper Model: {model_size}")
        self.model = whisper.load_model(model_size)
        print("Whisper Model Loaded Successfully")

    def transcribe_audio(self, audio_path):
        """Transcribe audio file to text"""
        try:
            print("Starting transcription.....")
            result = self.model.transcribe(audio_path)
            print("Transcription Completed")
            return {
                "text": result["text"],
                "segments": result.get("segments", []),
                "language": result.get("language", "en")
            }
        except Exception as e:
            print(f"Error During Transcription: {str(e)}")
            return {"text": "", "segments": [], "language": "en"}

In [None]:
class MeetingAnalyzer:
    def __init__(self):
        self.action_words = ['todo', 'action', 'task', 'assign', 'result', 'responsible', 'deadline', 'due', 'complete', 'finish']
        self.decision_words = ['decide', 'agreed', 'concluded', 'resolved', 'determined', 'final', 'approved']
        self.common_words = {'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'will', 'would', 'could', 'should', 'have', 'has', 'had', 'do', 'does', 'did', 'can', 'may', 'might', 'must', 'shall', 'should', 'will', 'would'}

    def extract_sentences_with_keywords(self, text, keywords):
        """Extract sentences containing specific keywords"""
        sentences = re.split(r'[.!?]+', text)
        relevant_sentences = []
        for sentence in sentences:
            sentence = sentence.strip()
            if sentence and any(keyword in sentence.lower() for keyword in keywords):
                relevant_sentences.append(sentence)
        return relevant_sentences

    def analyze_meeting(self, transcript, meeting_title="Team Meeting"):
        """Analyze meeting transcript and extract key information"""
        try:
            # Handle both dict and string inputs
            if isinstance(transcript, dict):
                text = transcript.get('text', '')
            else:
                text = str(transcript)

            if not text:
                return {
                    "summary": "No transcript available for analysis",
                    "action_items": [],
                    "decisions": [],
                    "participants": [],
                    "key_topics": []
                }

            # Extract information
            action_sentences = self.extract_sentences_with_keywords(text, self.action_words)
            decision_sentences = self.extract_sentences_with_keywords(text, self.decision_words)
            participants = self.extract_participants(text)
            summary = self.generate_summary(text, meeting_title)
            topics = self.extract_topics(text)

            return {
                "summary": summary,
                "action_items": action_sentences[:5],
                "decisions": decision_sentences[:5],
                "participants": participants,
                "key_topics": topics
            }
        except Exception as e:
            print(f"Error During Analysis: {str(e)}")
            return {
                "summary": "Error analyzing meeting",
                "action_items": [],
                "decisions": [],
                "participants": [],
                "key_topics": []
            }

    def extract_participants(self, text):
        """Extract participant names from the transcript"""
        participants = []
        # Look for common name patterns or speaker indicators
        name_patterns = re.findall(r'([A-Z][a-z]+)\s+(?:said|mentioned|stated|asked|replied|suggested|commented)', text)
        # Also look for "I am [Name]" or "[Name] here"
        intro_patterns = re.findall(r'(?:I am|This is|My name is)\s+([A-Z][a-z]+)', text)

        # Simple approach: look for capitalized words that might be names
        words = text.split()
        for word in words:
            if (word and word[0].isupper() and len(word) > 2 and word.isalpha() and
                word not in ['The', 'This', 'That', 'And', 'But', 'So', 'We', 'They', 'Meeting', 'Today', 'Yesterday', 'Tomorrow']):
                participants.append(word)

        all_participants = list(set(name_patterns + intro_patterns + participants))
        return all_participants[:10]  # Return max 10 participants

    def generate_summary(self, text, meeting_title="Team Meeting"):
        """Generate a summary of the meeting"""
        word_count = len(text.split())
        sentence_count = len([s for s in text.split('.') if s.strip()])

        # Count action and decision items
        action_count = sum(1 for word in self.action_words if word in text.lower())
        decision_count = sum(1 for word in self.decision_words if word in text.lower())

        summary = f"Meeting: {meeting_title}\n"
        summary += f"Duration: Approximately {word_count // 150} minutes (estimated from {word_count} words)\n"
        summary += f"Discussion points: {sentence_count} main statements\n"
        summary += f"Action-related mentions: {action_count}\n"
        summary += f"Decision-related mentions: {decision_count}\n"
        summary += "This meeting covered various topics with actionable outcomes and decisions made by the participants."

        return summary

    def extract_topics(self, text):
        """Extract key topics using keyword frequency"""
        # Common business/meeting topics
        topic_keywords = [
            'project', 'budget', 'timeline', 'deadline', 'client', 'customer',
            'development', 'marketing', 'sales', 'strategy', 'planning',
            'review', 'feedback', 'goals', 'objectives', 'requirements',
            'meeting', 'discussion', 'presentation', 'report', 'update'
        ]

        topics = []
        text_lower = text.lower()

        # Check for predefined topics
        for keyword in topic_keywords:
            if keyword in text_lower:
                topics.append(keyword.title())

        # Extract frequently mentioned words
        words = re.findall(r'\b\w+\b', text_lower)
        word_freq = {}
        for word in words:
            if word not in self.common_words and len(word) > 3:
                word_freq[word] = word_freq.get(word, 0) + 1

        # Add most frequent words as topics
        frequent_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:3]
        for word, count in frequent_words:
            if count >= 2:  # Only if mentioned at least 2 times
                topics.append(word.title())

        return list(set(topics))[:8]  # Return max 8 unique topics

In [None]:
class EmailGenerator:
    def __init__(self):
        pass

    def generate_follow_up_email(self, analysis, meeting_title="Team Meeting"):
        """Generate professional follow-up email"""
        email_template = f"""
Subject: Meeting Summary - {meeting_title}

Dear Team,

I hope this email finds you well. Below is a summary of our recent meeting:

## Meeting Summary
{analysis['summary']}

## Key Decisions Made
"""

        if analysis['decisions']:
            for i, decision in enumerate(analysis['decisions'], 1):
                email_template += f"{i}. {decision}\n"
        else:
            email_template += "No specific decisions were recorded in this meeting.\n"

        email_template += "\n## Action Items\n"
        if analysis['action_items']:
            for i, item in enumerate(analysis['action_items'], 1):
                email_template += f"{i}. {item}\n"
        else:
            email_template += "No specific action items were identified.\n"

        if analysis['participants']:
            email_template += f"\n## Participants\n"
            email_template += ", ".join(analysis['participants'])

        if analysis['key_topics']:
            email_template += f"\n\n## Key Topics Discussed\n"
            email_template += ", ".join(analysis['key_topics'])

        email_template += """

## Next Steps
Please review the above items and let me know if I missed anything important.
If you have any questions or need clarification on any points, please don't hesitate to reach out.

Best Regards,
Meeting Assistant

---
This email was generated automatically by the Meeting Extractor tool.
"""
        return email_template

In [None]:
class MeetingProcessor:
    def __init__(self):
        self.audio_processor = AudioProcessor()
        self.analyzer = MeetingAnalyzer()
        self.email_generator = EmailGenerator()

    def process_meeting(self, audio_file_path, meeting_title="Team Meeting"):
        """Complete Meeting processing pipeline"""
        results = {}
        try:
            print("Step 1: Transcribing Audio....")
            transcription = self.audio_processor.transcribe_audio(audio_file_path)
            results['transcription'] = transcription

            if not transcription['text']:
                raise Exception("Transcription Failed or Empty")

            print("Step 2: Analyzing meeting content....")
            analysis = self.analyzer.analyze_meeting(transcription, meeting_title)
            results['analysis'] = analysis

            print("Step 3: Generating follow-up email....")
            email = self.email_generator.generate_follow_up_email(analysis, meeting_title)
            results['email'] = email

            results['status'] = 'success'
            print("Processing Completed Successfully!")

        except Exception as e:
            results['status'] = 'error'
            results['error_message'] = str(e)
            print(f"Error During Processing: {str(e)}")

        return results

In [None]:
def gradio_interface():
    """Create a User-Friendly Interface with Gradio"""
    processor = MeetingProcessor()

    def process_audio_file(audio_file, meeting_title, progress=gr.Progress()):
        """Process Uploaded audio file with sequential progress tracking"""
        if audio_file is None:
            return "Please upload an Audio File", "", ""

        start_time = time.time()
        try:
            # Validate file
            if not os.path.exists(audio_file):
                return "Error: Audio file not found", "", ""

            # STAGE 1: TRANSCRIPTION
            progress(0.1, desc="🎵 Validating audio file...")
            progress(0.2, desc="🎙 Starting transcription...")
            progress(0.3, desc="📝 Converting speech to text...")

            # Process the meeting to get results
            results = processor.process_meeting(audio_file, meeting_title or "Team Meeting")

            if results['status'] == 'success':
                transcript = results['transcription']['text']
                analysis = results['analysis']
                email = results['email']

                # STAGE 2: ANALYSIS
                progress(0.5, desc="🧠 Analyzing meeting content...")
                progress(0.6, desc="📊 Extracting key insights...")
                progress(0.7, desc="✅ Identifying action items...")

                # Format analysis with better structure
                analysis_text = f"""📋 **MEETING SUMMARY**
{analysis['summary']}

🎯 **KEY DECISIONS**
{chr(10).join(f"• {decision}" for decision in analysis['decisions']) if analysis['decisions'] else "• None identified"}

✅ **ACTION ITEMS**
{chr(10).join(f"• {item}" for item in analysis['action_items']) if analysis['action_items'] else "• None identified"}

👥 **PARTICIPANTS**
{', '.join(analysis['participants']) if analysis['participants'] else "None identified"}

💡 **KEY TOPICS DISCUSSED**
{', '.join(analysis['key_topics']) if analysis['key_topics'] else "None identified"}
"""

                # STAGE 3: EMAIL GENERATION
                progress(0.8, desc="📧 Generating follow-up email...")
                progress(0.9, desc="✉ Formatting email content...")
                progress(0.95, desc="🎯 Finalizing email...")

                # Complete processing
                elapsed_time = time.time() - start_time
                progress(1.0, desc=f"✅ All tasks completed! ({elapsed_time:.1f}s)")

                return transcript, analysis_text, email
            else:
                return f"❌ Error: {results['error_message']}", "", ""

        except Exception as e:
            elapsed_time = time.time() - start_time
            return f"❌ Error Processing File ({elapsed_time:.1f}s): {str(e)}", "", ""

    # Create interface
    with gr.Blocks(
        title="🎯 AI-Powered Meeting Extractor",
        theme=gr.themes.Soft(),
        css="""
        .gradio-container {
            max-width: 1200px !important;
        }
        .progress-bar {
            background: linear-gradient(90deg, #ff6b35, #f7931e) !important;
        }
        .header-section {
            text-align: center;
            padding: 20px;
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            color: white;
            border-radius: 10px;
            margin-bottom: 20px;
        }
        """
    ) as interface:
        # Header Section
        with gr.Row():
            gr.HTML("""
            <div class="header-section">
                <h1>🎯 AI-Powered Meeting Extractor</h1>
                <p>Transform your meeting recordings into actionable insights, transcripts, and follow-up emails</p>
            </div>
            """)

        # Input Section
        with gr.Row():
            with gr.Column(scale=2):
                gr.Markdown("### 📁 Upload Meeting Audio")
                gr.Markdown("*Supported formats: MP3, WAV, M4A, FLAC*")
                audio_input = gr.Audio(
                    type="filepath",
                    label="Choose Audio File"
                )

                gr.Markdown("*Help us generate a more relevant analysis*")
                title_input = gr.Textbox(
                    label="📝 Meeting Title (Optional)",
                    placeholder="e.g., Weekly Team Standup, Project Planning Meeting"
                )

                with gr.Row():
                    process_btn = gr.Button(
                        "🚀 Process Meeting",
                        variant="primary",
                        size="lg"
                    )
                    clear_btn = gr.Button(
                        "🗑 Clear All",
                        variant="secondary"
                    )

        # Status Section
        with gr.Row():
            status_display = gr.Markdown("📊 Ready to process your meeting audio")

        # Output Section
        with gr.Row():
            with gr.Column():
                gr.Markdown("### 📝 Meeting Transcript")
                transcript_output = gr.Textbox(
                    label="Full Transcript",
                    lines=20,
                    max_lines=25,
                    show_copy_button=True,
                    container=True
                )

            with gr.Column():
                gr.Markdown("### 📊 Meeting Analysis")
                analysis_output = gr.Textbox(
                    label="Key Insights & Analysis",
                    lines=15,
                    max_lines=20,
                    show_copy_button=True,
                    container=True
                )

        gr.Markdown("### 📧 Follow-up Email")
        email_output = gr.Textbox(
            label="Generated Email",
            lines=12,
            max_lines=15,
            show_copy_button=True,
            container=True
        )

        # Event Handlers
        def update_status(message):
            return f"📊 {message}"

        def clear_all():
            return None, "", "", "", "", update_status("Ready to process your meeting audio")

        # Process button click
        process_btn.click(
            fn=process_audio_file,
            inputs=[audio_input, title_input],
            outputs=[transcript_output, analysis_output, email_output],
            show_progress=True
        )

        # Clear button click
        clear_btn.click(
            fn=clear_all,
            outputs=[
                audio_input,
                title_input,
                transcript_output,
                analysis_output,
                email_output,
                status_display
            ]
        )

        # File upload status
        audio_input.change(
            fn=lambda x: update_status(f"Audio file uploaded: {os.path.basename(x) if x else 'No file selected'}"),
            inputs=[audio_input],
            outputs=[status_display]
        )



    return interface

# Initialize the interface
interface = gradio_interface()

Loading Whisper Model: base


100%|███████████████████████████████████████| 139M/139M [00:02<00:00, 58.2MiB/s]


Whisper Model Loaded Successfully


In [None]:
print("Meeting Extractor - Google Colab Version")
print("=" * 50)
print("Setup complete!")
print("Upload an audio file to start processing")
print("The interface will appear below")

# Launch the interface
interface.launch(
    share=True,  # Creates a public link
    debug=True,
    height=800,
    show_error=True
)

Meeting Extractor - Google Colab Version
Setup complete!
Upload an audio file to start processing
The interface will appear below
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://ad73b3053b8c075475.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Step 1: Transcribing Audio....
Starting transcription.....
Transcription Completed
Step 2: Analyzing meeting content....
Step 3: Generating follow-up email....
Processing Completed Successfully!
