In [1]:

# # Conversation Management & Classification using Groq API
#
# ## Objective
# Implement two core tasks using Groq APIs with OpenAI SDK compatibility:
# 1. Conversation History Management with Summarization
# 2. JSON Schema Classification & Information Extraction
#
# **Author**: Avinash singh
# **Date**: 25 september 2025

# ## Installation and Setup

# Install required packages
!pip install groq

# Import necessary libraries
import os
import json
import time
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
from groq import Groq


# Initialize Groq client
client = Groq(api_key="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")

print("✅ Setup completed successfully!")

# ## Task 1: Managing Conversation History with Summarization

@dataclass
class Message:
    """Represents a single message in the conversation"""
    role: str  # "user" or "assistant"
    content: str
    timestamp: float

class ConversationManager:
    """
    Manages conversation history with summarization capabilities
    """

    def __init__(self, model: str = "llama-3.1-8b-instant"):
        self.model = model
        self.conversation_history: List[Message] = []
        self.summary_history: List[str] = []
        self.message_count = 0
        self.summarization_interval = 3  # Summarize every 3 messages

    def add_message(self, role: str, content: str) -> None:
        """Add a new message to the conversation history"""
        message = Message(role=role, content=content, timestamp=time.time())
        self.conversation_history.append(message)
        self.message_count += 1

        # Check if it's time to summarize
        if self.message_count % self.summarization_interval == 0:
            self._summarize_conversation()

    def _summarize_conversation(self) -> str:
        """Generate a summary of the current conversation"""
        if len(self.conversation_history) == 0:
            return "No conversation to summarize."

        # Prepare conversation text for summarization
        conversation_text = self._format_conversation_for_summarization()

        prompt = f"""
        Please provide a concise summary of the following conversation.
        Focus on key points, decisions made, and important information exchanged.

        Conversation:
        {conversation_text}

        Summary:
        """

        try:
            response = client.chat.completions.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                temperature=0.3,
                max_tokens=200
            )

            summary = response.choices[0].message.content.strip()
            self.summary_history.append(summary)

            print(f"📝 Summary generated (Message #{self.message_count}): {summary}")
            return summary

        except Exception as e:
            error_msg = f"Error generating summary: {str(e)}"
            print(error_msg)
            return error_msg

    def _format_conversation_for_summarization(self) -> str:
        """Format conversation history for summarization"""
        formatted = []
        for i, message in enumerate(self.conversation_history, 1):
            formatted.append(f"{message.role.upper()} {i}: {message.content}")
        return "\n".join(formatted)

    def get_conversation_by_turns(self, last_n: int) -> List[Message]:
        """Get last N conversation turns"""
        return self.conversation_history[-last_n:] if last_n > 0 else self.conversation_history

    def get_conversation_by_length(self, max_chars: int) -> List[Message]:
        """Get conversation history limited by character length"""
        if max_chars <= 0:
            return self.conversation_history

        result = []
        current_length = 0

        for message in reversed(self.conversation_history):
            message_length = len(message.content)
            if current_length + message_length <= max_chars:
                result.insert(0, message)  # Add to beginning to maintain order
                current_length += message_length
            else:
                break

        return result

    def get_full_conversation(self) -> str:
        """Get the entire conversation as formatted text"""
        return self._format_conversation_for_summarization()

    def set_summarization_interval(self, interval: int) -> None:
        """Set how often to summarize (every k messages)"""
        self.summarization_interval = interval

    def get_summary_history(self) -> List[str]:
        """Get all generated summaries"""
        return self.summary_history

# ### Demonstration of Task 1

print("=" * 60)
print("TASK 1: CONVERSATION MANAGEMENT & SUMMARIZATION")
print("=" * 60)

# Initialize conversation manager
conv_manager = ConversationManager()
conv_manager.set_summarization_interval(3)  # Summarize every 3 messages

# Sample conversation
sample_conversation = [
    ("user", "Hi, I'd like to book a flight to New York."),
    ("assistant", "Sure! I can help you book a flight. What are your travel dates?"),
    ("user", "I want to travel on March 15th and return on March 22nd."),
    ("assistant", "Great! Which airport would you prefer to depart from?"),
    ("user", "I'll be departing from San Francisco International Airport."),
    ("assistant", "Okay, and would you prefer economy, business, or first class?"),
    ("user", "Business class please. Also, I need a window seat."),
    ("assistant", "Noted! I'll look for business class flights with window seat availability."),
    ("user", "What about the price range? I'd like to keep it under $2000."),
]

# Simulate conversation
print("\n💬 Simulating conversation with periodic summarization:")
for i, (role, content) in enumerate(sample_conversation, 1):
    conv_manager.add_message(role, content)
    print(f"Message {i}: {role} - {content[:50]}...")

# Display results
print("\n" + "=" * 40)
print("CONVERSATION ANALYSIS RESULTS")
print("=" * 40)

# 1. Show full conversation
print("\n1. 📋 Full Conversation History:")
print(conv_manager.get_full_conversation())

# 2. Show conversation limited by turns
print("\n2. 🔄 Last 4 Conversation Turns:")
last_4_turns = conv_manager.get_conversation_by_turns(4)
for msg in last_4_turns:
    print(f"   {msg.role}: {msg.content}")

# 3. Show conversation limited by character length
print("\n3. 📏 Conversation Limited to 200 Characters:")
char_limited = conv_manager.get_conversation_by_length(200)
total_chars = sum(len(msg.content) for msg in char_limited)
for msg in char_limited:
    print(f"   {msg.role}: {msg.content}")
print(f"   Total characters: {total_chars}")

# 4. Show summary history
print("\n4. 📊 Summary History:")
for i, summary in enumerate(conv_manager.get_summary_history(), 1):
    print(f"   Summary {i}: {summary}")

# ## Task 2: JSON Schema Classification & Information Extraction

class InformationExtractor:
    """
    Extracts structured information from chats using JSON schema and function calling
    """

    def __init__(self, model: str = "llama-3.1-8b-instant"):
        self.model = model
        self.schema = self._create_extraction_schema()

    def _create_extraction_schema(self) -> Dict[str, Any]:
        """Create JSON schema for information extraction"""
        return {
            "name": "extract_user_information",
            "description": "Extract personal information from user conversations",
            "parameters": {
                "type": "object",
                "properties": {
                    "name": {
                        "type": "string",
                        "description": "Full name of the user"
                    },
                    "email": {
                        "type": "string",
                        "description": "Email address of the user"
                    },
                    "phone": {
                        "type": "string",
                        "description": "Phone number of the user"
                    },
                    "location": {
                        "type": "string",
                        "description": "City or location of the user"
                    },
                    "age": {
                        "type": "integer",
                        "description": "Age of the user"
                    }
                },
                "required": ["name", "email", "phone", "location", "age"]
            }
        }

    def extract_information(self, chat_text: str) -> Dict[str, Any]:
        """
        Extract structured information from chat text using function calling
        """
        try:
            response = client.chat.completions.create(
                model=self.model,
                messages=[{
                    "role": "user",
                    "content": f"Extract personal information from this conversation: {chat_text}"
                }],
                functions=[self.schema],
                function_call={"name": "extract_user_information"},
                temperature=0.1  # Low temperature for consistent extraction
            )

            # Parse the function call response
            if response.choices[0].message.function_call:
                function_args = response.choices[0].message.function_call.arguments
                extracted_data = json.loads(function_args)
                return self._validate_and_clean_data(extracted_data)
            else:
                return {"error": "No structured data could be extracted"}

        except Exception as e:
            return {"error": f"Extraction failed: {str(e)}"}

    def _validate_and_clean_data(self, data: Dict[str, Any]) -> Dict[str, Any]:
        """Validate and clean extracted data"""
        validated_data = {}

        # Validate each field
        validated_data["name"] = data.get("name", "").strip() or "Not provided"
        validated_data["email"] = data.get("email", "").strip() or "Not provided"
        validated_data["phone"] = data.get("phone", "").strip() or "Not provided"
        validated_data["location"] = data.get("location", "").strip() or "Not provided"

        # Special handling for age
        age = data.get("age")
        if isinstance(age, int) and 0 < age < 150:
            validated_data["age"] = age
        else:
            validated_data["age"] = "Not provided"

        return validated_data

    def batch_extract(self, chats: List[str]) -> List[Dict[str, Any]]:
        """Extract information from multiple chats"""
        results = []
        for i, chat in enumerate(chats, 1):
            print(f"Processing chat {i}...")
            result = self.extract_information(chat)
            results.append(result)
            time.sleep(0.5)  # Rate limiting
        return results

# ### Demonstration of Task 2

print("\n" + "=" * 60)
print("TASK 2: JSON SCHEMA CLASSIFICATION & INFORMATION EXTRACTION")
print("=" * 60)

# Initialize information extractor
extractor = InformationExtractor()

# Sample chats for testing
sample_chats = [
    """
    Hi, my name is John Smith. I'm 28 years old and I live in Seattle.
    You can reach me at john.smith@email.com or 555-123-4567.
    I'm interested in your premium subscription.
    """,

    """
    Hello! I'm Sarah Johnson from Chicago. My email is sarah.j@company.com
    and my phone number is (555) 987-6543. I'm 35 years old and I'd like
    to update my account information.
    """,

    """
    Good morning! My name is Mike Brown, I'm 42. I'm calling from Boston.
    Contact me at mike.brown@email.com or 555-555-7890. I need help with
    my recent order.
    """,

    """
    Hey there! I'm interested in your services. I'm Lisa Wang, 29 years old
    from San Francisco. My contact is lisa.wang@email.com and 555-234-5678.
    """
]

print("\n📋 Sample Chats for Extraction:")
for i, chat in enumerate(sample_chats, 1):
    print(f"\nChat {i}: {chat.strip()}")

# Perform batch extraction
print("\n🔄 Extracting information from chats...")
extraction_results = extractor.batch_extract(sample_chats)

# Display extraction results
print("\n" + "=" * 40)
print("INFORMATION EXTRACTION RESULTS")
print("=" * 40)

for i, result in enumerate(extraction_results, 1):
    print(f"\n🎯 Chat {i} Extraction Results:")
    if "error" in result:
        print(f"   ❌ Error: {result['error']}")
    else:
        for key, value in result.items():
            print(f"   {key.capitalize()}: {value}")

# Validation and analysis
print("\n" + "=" * 40)
print("VALIDATION & ANALYSIS")
print("=" * 40)

successful_extractions = [r for r in extraction_results if "error" not in r]
print(f"✅ Successful extractions: {len(successful_extractions)}/{len(sample_chats)}")

if successful_extractions:
    print("\n📊 Extraction Statistics:")
    fields = ["name", "email", "phone", "location", "age"]
    for field in fields:
        provided_count = sum(1 for result in successful_extractions
                           if result.get(field) != "Not provided")
        print(f"   {field.capitalize()} provided: {provided_count}/{len(successful_extractions)}")

# ## Advanced Features Demonstration

print("\n" + "=" * 60)
print("ADVANCED FEATURES DEMONSTRATION")
print("=" * 60)

# ### Advanced Conversation Management with Custom Summarization

class AdvancedConversationManager(ConversationManager):
    """Enhanced conversation manager with advanced features"""

    def __init__(self, model: str = "llama-3.1-8b-instant"):
        super().__init__(model)
        self.custom_summary_prompts = {
            "technical": "Focus on technical requirements and specifications mentioned.",
            "business": "Highlight business decisions, costs, and timelines.",
            "support": "Summarize the problem and solution provided."
        }

    def set_custom_summary_prompt(self, prompt_type: str, custom_prompt: str) -> None:
        """Set custom summary prompt for specific conversation types"""
        self.custom_summary_prompts[prompt_type] = custom_prompt

    def summarize_with_custom_prompt(self, prompt_type: str) -> str:
        """Generate summary using custom prompt"""
        if prompt_type not in self.custom_summary_prompts:
            return "Invalid prompt type"

        conversation_text = self._format_conversation_for_summarization()
        custom_prompt = self.custom_summary_prompts[prompt_type]

        prompt = f"""
        {custom_prompt}

        Conversation:
        {conversation_text}

        Summary:
        """

        try:
            response = client.chat.completions.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                temperature=0.3,
                max_tokens=200
            )
            return response.choices[0].message.content.strip()
        except Exception as e:
            return f"Error: {str(e)}"

# Demonstration of advanced features
print("\n🚀 Advanced Features Demo:")

advanced_manager = AdvancedConversationManager()

# Add some technical conversation
tech_convo = [
    ("user", "I need help with API integration. The authentication is failing."),
    ("assistant", "What error message are you getting? Are you using OAuth 2.0?"),
    ("user", "Yes, I'm getting a 401 unauthorized error with JWT tokens."),
    ("assistant", "Check your token expiration time. It might be set too short.")
]

for role, content in tech_convo:
    advanced_manager.add_message(role, content)

# Custom technical summary
tech_summary = advanced_manager.summarize_with_custom_prompt("technical")
print(f"🔧 Technical Summary: {tech_summary}")

# ## Testing and Validation

print("\n" + "=" * 60)
print("TESTING AND VALIDATION")
print("=" * 60)

# Test edge cases
print("\n🧪 Testing Edge Cases:")

# Test 1: Empty conversation
empty_manager = ConversationManager()
empty_summary = empty_manager._summarize_conversation()
print(f"1. Empty conversation: {empty_summary}")

# Test 2: Very short conversation
short_manager = ConversationManager()
short_manager.add_message("user", "Hi")
short_manager.add_message("assistant", "Hello!")
short_summary = short_manager._summarize_conversation()
print(f"2. Short conversation: {short_summary}")

# Test 3: Extraction with minimal information
minimal_chat = "My name is Tom."
minimal_result = extractor.extract_information(minimal_chat)
print(f"3. Minimal information extraction: {minimal_result}")

# ## Performance Metrics

print("\n" + "=" * 60)
print("PERFORMANCE METRICS")
print("=" * 60)

# Simple performance tracking
import time

def measure_extraction_time(extractor, chat):
    start_time = time.time()
    result = extractor.extract_information(chat)
    end_time = time.time()
    return result, end_time - start_time

# Measure performance
print("\n⏱️  Performance Metrics:")
test_chat = sample_chats[0]
result, extraction_time = measure_extraction_time(extractor, test_chat)

print(f"Extraction time: {extraction_time:.2f} seconds")
print(f"Result: {result}")

# ## Conclusion and Summary

print("\n" + "=" * 60)
print("CONCLUSION")
print("=" * 60)

print("""
✅ **Task 1 - Conversation Management**: Successfully implemented
- Conversation history management with message tracking
- Periodic summarization (every k-th message)
- Flexible truncation options (by turns and character length)
- Customizable summarization intervals

✅ **Task 2 - Information Extraction**: Successfully implemented
- JSON schema definition for 5 key information fields
- Function calling with Groq API for structured extraction
- Batch processing capability
- Data validation and cleaning

🔧 **Technical Features**:
- Error handling and validation
- Customizable summarization prompts
- Performance monitoring
- Comprehensive testing

📊 **Results**: All requirements met with additional advanced features
""")

# ## Export and Save Results

print("\n" + "=" * 60)
print("EXPORTING RESULTS")
print("=" * 60)

# Save conversation history to file
def save_conversation_to_file(manager: ConversationManager, filename: str):
    """Save conversation history to JSON file"""
    conversation_data = {
        "full_conversation": manager.get_full_conversation(),
        "summaries": manager.get_summary_history(),
        "total_messages": manager.message_count
    }

    with open(filename, 'w') as f:
        json.dump(conversation_data, f, indent=2)

    print(f"✅ Conversation data saved to {filename}")

# Save extraction results to file
def save_extraction_to_file(results: List[Dict], filename: str):
    """Save extraction results to JSON file"""
    with open(filename, 'w') as f:
        json.dump(results, f, indent=2)

    print(f"✅ Extraction results saved to {filename}")

# Export data
save_conversation_to_file(conv_manager, "conversation_data.json")
save_extraction_to_file(extraction_results, "extraction_results.json")


Collecting groq
  Downloading groq-0.31.1-py3-none-any.whl.metadata (16 kB)
Downloading groq-0.31.1-py3-none-any.whl (134 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/134.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.9/134.9 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq
Successfully installed groq-0.31.1
✅ Setup completed successfully!
TASK 1: CONVERSATION MANAGEMENT & SUMMARIZATION

💬 Simulating conversation with periodic summarization:
Message 1: user - Hi, I'd like to book a flight to New York....
Message 2: assistant - Sure! I can help you book a flight. What are your ...
📝 Summary generated (Message #3): Summary:

- User 1 initiated a flight booking request to New York.
- User 1 provided travel dates: departing on March 15th and returning on March 22nd.
- The travel dates were noted for potential flight booking.
Message 3: user - I want to travel on Mar