In [10]:
# ===============================================================
# INSTALL & IMPORTS
# ===============================================================
!pip install groq openai -q

import os
import json
import time
from typing import List, Dict, Any, Optional, Tuple
from openai import OpenAI
from getpass import getpass


In [11]:
# ===============================================================
# CONFIGURATION & SETUP
# ===============================================================
def setup_groq_client():
    """Initialize Groq client with proper API key handling."""
    if "GROQ_API_KEY" not in os.environ:
        print("Please enter your Groq API key:")
        groq_api_key = getpass("Groq API Key: ")
        os.environ["GROQ_API_KEY"] = groq_api_key

    client = OpenAI(
        base_url="https://api.groq.com/openai/v1",
        api_key=os.environ.get("GROQ_API_KEY"),
    )
    return client

# Initialize client and model
client = setup_groq_client()
MODEL_NAME = 'llama-3.3-70b-versatile'
print(f"✅ Groq client initialized successfully. Using model: {MODEL_NAME}")


✅ Groq client initialized successfully. Using model: llama-3.3-70b-versatile


In [3]:
# ===============================================================
# TASK 1: ENHANCED CONVERSATION MANAGER
# ===============================================================
class EnhancedConversationManager:
    """
    Advanced conversation manager with multiple truncation options and
    intelligent periodic summarization using Groq API.
    """

    def __init__(self, summarization_interval_k: int = 3, client=None):
        if summarization_interval_k <= 0:
            raise ValueError("summarization_interval_k must be positive")

        self.history: List[Dict[str, str]] = []
        self.summarization_interval = summarization_interval_k
        self.turn_count = 0
        self.message_count = 0
        self.client = client or globals().get('client')
        self.summarization_log = []
        print(f"📝 ConversationManager initialized with k={summarization_interval_k}")

    def add_message(self, role: str, content: str) -> None:
        if role not in ["user", "assistant", "system"]:
            raise ValueError("Role must be 'user', 'assistant', or 'system'")

        self.history.append({"role": role, "content": content})
        self.message_count += 1

        if role == 'assistant':
            self.turn_count += 1
            print(f"🔄 Turn {self.turn_count} completed")
            self._check_and_summarize()

        print(f"➕ Added {role} message (Total: {len(self.history)} messages, {self.turn_count} turns)")

    def _summarize_history(self) -> bool:
        try:
            print(f"\n🔍 SUMMARIZATION TRIGGERED (Turn {self.turn_count})")

            non_summary_messages = [
                msg for msg in self.history
                if not msg['content'].startswith("📋 Summary:")
            ]

            if len(non_summary_messages) < 2:
                print("⚠️ Insufficient content for summarization")
                return False

            conversation_text = "\n".join([
                f"{msg['role'].upper()}: {msg['content']}"
                for msg in non_summary_messages
            ])

            system_prompt = {
                "role": "system",
                "content": "You are an expert conversation summarizer. Create concise summaries."
            }
            user_prompt = {
                "role": "user",
                "content": f"Summarize this in 2–3 sentences:\n{conversation_text}"
            }

            response = self.client.chat.completions.create(
                model=MODEL_NAME,
                messages=[system_prompt, user_prompt],
                temperature=0.1,
                max_tokens=200
            )

            summary = response.choices[0].message.content
            recent_messages = self.history[-2:]
            summary_message = {"role": "system", "content": f"📋 Summary: {summary}"}

            self.history = [summary_message] + recent_messages
            self.summarization_log.append({
                "turn": self.turn_count,
                "timestamp": time.time(),
                "summary": summary
            })

            print(f"✅ Summarization complete! 📋 {summary}")
            return True
        except Exception as e:
            print(f"❌ Summarization failed: {str(e)}")
            return False

    def _check_and_summarize(self):
        if self.turn_count > 0 and self.turn_count % self.summarization_interval == 0:
            self._summarize_history()

    def get_history(self, include_metadata: bool = False):
        if include_metadata:
            return {
                "messages": self.history,
                "total_messages": self.message_count,
                "total_turns": self.turn_count,
                "summarizations": len(self.summarization_log)
            }
        return self.history

    def truncate_by_turns(self, max_turns: int):
        if max_turns <= 0: return []
        return self.history[-(max_turns * 2):]

    def truncate_by_length(self, max_length: int, unit: str = "chars"):
        if max_length <= 0: return []

        truncated, current_length = [], 0
        for message in reversed(self.history):
            content = message['content']
            content_length = len(content.split()) if unit == "words" else len(content)
            if current_length + content_length <= max_length:
                truncated.insert(0, message)
                current_length += content_length
            else:
                break
        return truncated

    def get_summarization_stats(self):
        return {
            "total_summarizations": len(self.summarization_log),
            "summarization_interval": self.summarization_interval,
            "current_turns": self.turn_count,
            "summarization_log": self.summarization_log
        }


In [4]:
def demonstrate_task1():
    """Comprehensive demonstration of Task 1 functionality."""
    print("\n" + "="*60)
    print("🚀 TASK 1: CONVERSATION MANAGEMENT DEMONSTRATION")
    print("="*60)

    # Initialize conversation manager
    conv_manager = EnhancedConversationManager(summarization_interval_k=3, client=client)

    # Extended conversation samples for better demonstration
    conversation_samples = [
        ("user", "Hello! I'm planning a trip to Japan and need some advice."),
        ("assistant", "Hello! I'd be happy to help you plan your trip to Japan. What specific aspects would you like advice on - cities to visit, cultural experiences, food, or something else?"),

        ("user", "I'm interested in both cultural experiences and food. I have about 10 days total."),
        ("assistant", "Perfect! For 10 days, I'd recommend focusing on 2-3 cities. Tokyo and Kyoto are must-sees for culture and food. Tokyo offers modern culture and incredible street food, while Kyoto has traditional temples and kaiseki cuisine."),

        ("user", "That sounds great! What about transportation? I've heard about the JR Pass."),
        ("assistant", "Yes! The JR Pass is excellent for tourists. A 7-day pass costs about ¥29,650 and covers most trains including the shinkansen bullet trains between cities. It pays for itself with just one Tokyo-Kyoto round trip."),

        ("user", "Awesome! And what about accommodation? Should I try a traditional ryokan?"),
        ("assistant", "Absolutely! I highly recommend spending at least one night in a ryokan, especially in Kyoto or Hakone. They offer traditional Japanese hospitality, tatami rooms, and often include elaborate kaiseki dinners and breakfast."),

        ("user", "This is so helpful! Can you recommend specific ryokans?"),
        ("assistant", "Certainly! In Kyoto, try Hoshinoya Kyoto for luxury or Ryokan Genhouin for a more affordable authentic experience. In Hakone, Hakone Ginyu offers stunning views of Mount Fuji."),

        ("user", "Perfect! Now I'm wondering about the best time to visit. I'm thinking March or November."),
        ("assistant", "Both are excellent choices! March brings cherry blossoms (sakura season) - magical but crowded and expensive. November offers beautiful autumn colors, fewer crowds, and pleasant weather. I'd slightly favor November for a first visit."),
    ]

    # Feed conversation samples
    print(f"\n📝 Adding {len(conversation_samples)} conversation exchanges...")
    for role, content in conversation_samples:
        conv_manager.add_message(role, content)
        print(f"   └─ {role}: {content[:60]}...")

    # Show final history state
    print(f"\n📊 FINAL CONVERSATION STATE:")
    final_history = conv_manager.get_history(include_metadata=True)
    print(f"   • Total Messages: {final_history['total_messages']}")
    print(f"   • Total Turns: {final_history['total_turns']}")
    print(f"   • Summarizations: {final_history['summarizations']}")

    print(f"\n💬 Current History ({len(conv_manager.get_history())} messages):")
    for msg in conv_manager.get_history():
        content_preview = msg['content'][:80] + "..." if len(msg['content']) > 80 else msg['content']
        print(f"   {msg['role']}: {content_preview}")

    # Demonstrate truncation options
    print(f"\n" + "="*40)
    print("🔧 TRUNCATION DEMONSTRATIONS")
    print("="*40)

    # Truncate by turns
    print(f"\n📏 Truncated to last 2 turns:")
    truncated_turns = conv_manager.truncate_by_turns(2)
    for msg in truncated_turns:
        print(f"   {msg['role']}: {msg['content'][:60]}...")

    # Truncate by character length
    print(f"\n📏 Truncated to 500 characters:")
    truncated_chars = conv_manager.truncate_by_length(500, "chars")
    for msg in truncated_chars:
        print(f"   {msg['role']}: {msg['content'][:60]}...")

    # Truncate by word length
    print(f"\n📏 Truncated to 50 words:")
    truncated_words = conv_manager.truncate_by_length(50, "words")
    for msg in truncated_words:
        print(f"   {msg['role']}: {msg['content'][:60]}...")

    # Show summarization statistics
    print(f"\n📈 SUMMARIZATION STATISTICS:")
    stats = conv_manager.get_summarization_stats()
    print(json.dumps(stats, indent=2))


In [6]:
# =============================================================================
# TASK 2: JSON SCHEMA EXTRACTION & VALIDATION
# =============================================================================

class UserInfoExtractor:
    """
    Extract and validate user information using Groq's function calling capabilities.
    """

    def __init__(self, client=None):
        """Initialize the extractor with client."""
        self.client = client or globals().get('client')

        # Define comprehensive JSON schema for user details
        # Note: Groq API requires explicit handling of optional fields
        self.user_schema = {
            "type": "object",
            "properties": {
                "name": {
                    "type": ["string", "null"],
                    "description": "Full name of the user, or null if not provided"
                },
                "email": {
                    "type": ["string", "null"],
                    "description": "Email address of the user, or null if not provided"
                },
                "phone": {
                    "type": ["string", "null"],
                    "description": "Phone number of the user (any format), or null if not provided"
                },
                "location": {
                    "type": ["string", "null"],
                    "description": "City, state, or address of the user, or null if not provided"
                },
                "age": {
                    "type": ["integer", "null"],
                    "description": "Age of the user in years, or null if not provided",
                    "minimum": 1,
                    "maximum": 120
                }
            },
            "required": []  # No strictly required fields for Groq compatibility
        }

        # Function calling tool definition
        self.extraction_tool = [{
            "type": "function",
            "function": {
                "name": "extract_user_details",
                "description": "Extract user contact and personal information from chat messages",
                "parameters": self.user_schema
            }
        }]

    def extract_info(self, chat_message: str) -> Tuple[bool, Dict[str, Any], List[str]]:
        """
        Extract user information from chat message.

        Args:
            chat_message (str): User's chat message

        Returns:
            Tuple of (success, extracted_data, validation_errors)
        """
        print(f"\n🔍 EXTRACTING INFO FROM: '{chat_message[:100]}...'")

        try:
            # Prepare messages for extraction
            system_message = {
                "role": "system",
                "content": """You are an expert information extractor. Extract user details from chat messages using the provided function.
For fields that are not clearly mentioned in the text, return null. Only extract information that is explicitly stated.
Be conservative - don't guess or infer information that isn't directly provided."""
            }

            user_message = {
                "role": "user",
                "content": f"Extract user information from this message: {chat_message}"
            }

            # Call Groq API with function calling
            response = self.client.chat.completions.create(
                model=MODEL_NAME,
                messages=[system_message, user_message],
                tools=self.extraction_tool,
                tool_choice="auto",
                temperature=0.0
            )

            # Parse function call response
            message = response.choices[0].message

            if not message.tool_calls:
                return False, {}, ["No function call made by the model"]

            tool_call = message.tool_calls[0]

            if tool_call.function.name != "extract_user_details":
                return False, {}, ["Incorrect function called"]

            # Parse extracted data
            extracted_data = json.loads(tool_call.function.arguments)

            print(f"✅ Raw extraction successful:")
            print(json.dumps(extracted_data, indent=2))

            # Validate extracted data
            validation_errors = self._validate_extracted_data(extracted_data)
            is_valid = len(validation_errors) == 0

            if is_valid:
                print("✅ Validation passed!")
            else:
                print(f"⚠️  Validation issues: {validation_errors}")

            return True, extracted_data, validation_errors

        except json.JSONDecodeError as e:
            error_msg = f"Invalid JSON in function response: {str(e)}"
            print(f"❌ {error_msg}")
            return False, {}, [error_msg]

        except Exception as e:
            error_msg = str(e)
            print(f"❌ Extraction error: {error_msg}")

            # If it's a Groq API schema validation error, try alternative extraction
            if "tool call validation failed" in error_msg:
                print("🔄 Attempting fallback extraction without function calling...")
                return self._fallback_extraction(chat_message)

            return False, {}, [error_msg]

    def _fallback_extraction(self, chat_message: str) -> Tuple[bool, Dict[str, Any], List[str]]:
        """
        Fallback extraction method when function calling fails.
        Uses direct text parsing as a backup.
        """
        try:
            system_message = {
                "role": "system",
                "content": "Extract user information and respond with ONLY a valid JSON object. Use null for missing fields."
            }

            user_message = {
                "role": "user",
                "content": f"""Extract user details from this text and respond with only JSON:
{chat_message}

Required JSON format:
{{"name": "string or null", "email": "string or null", "phone": "string or null", "location": "string or null", "age": "integer or null"}}"""
            }

            # Call without function calling
            response = self.client.chat.completions.create(
                model=MODEL_NAME,
                messages=[system_message, user_message],
                temperature=0.0,
                max_tokens=200
            )

            response_text = response.choices[0].message.content.strip()

            # Extract JSON from response
            if '{' in response_text and '}' in response_text:
                start = response_text.find('{')
                end = response_text.rfind('}') + 1
                json_str = response_text[start:end]
                extracted_data = json.loads(json_str)

                print("✅ Fallback extraction successful:")
                print(json.dumps(extracted_data, indent=2))

                validation_errors = self._validate_extracted_data(extracted_data)
                return True, extracted_data, validation_errors
            else:
                return False, {}, ["Could not extract JSON from response"]

        except Exception as e:
            return False, {}, [f"Fallback extraction failed: {str(e)}"]

    def _validate_extracted_data(self, data: Dict[str, Any]) -> List[str]:
        """
        Validate extracted data against schema and business rules.

        Args:
            data (dict): Extracted data

        Returns:
            List of validation error messages
        """
        errors = []

        # Check for minimum viable data (at least name OR email)
        has_name = data.get("name") and str(data["name"]).strip()
        has_email = data.get("email") and str(data["email"]).strip()

        if not has_name and not has_email:
            errors.append("Must have at least name or email")

        # Validate individual fields if present
        if has_name:
            if len(str(data["name"]).strip()) < 2:
                errors.append("Name too short")

        if has_email:
            email_str = str(data["email"])
            if "@" not in email_str or "." not in email_str:
                errors.append("Invalid email format")

        if data.get("phone"):
            # Extract digits from phone number
            digits = ''.join(c for c in str(data["phone"]) if c.isdigit())
            if len(digits) < 7:
                errors.append("Phone number too short")

        if data.get("age") is not None:
            try:
                age = int(data["age"])
                if not (1 <= age <= 120):
                    errors.append("Age must be between 1 and 120")
            except (ValueError, TypeError):
                errors.append("Age must be a valid integer")

        if data.get("location"):
            if len(str(data["location"]).strip()) < 2:
                errors.append("Location too short")

        return errors

    def batch_extract(self, chat_messages: List[str]) -> List[Dict[str, Any]]:
        """
        Extract information from multiple chat messages.

        Args:
            chat_messages (list): List of chat messages

        Returns:
            List of extraction results
        """
        results = []

        for i, message in enumerate(chat_messages, 1):
            print(f"\n{'='*20} SAMPLE {i} {'='*20}")
            success, data, errors = self.extract_info(message)

            result = {
                "sample_id": i,
                "message": message,
                "extraction_success": success,
                "extracted_data": data,
                "validation_errors": errors,
                "is_valid": len(errors) == 0
            }

            results.append(result)

        return results


In [7]:
def demonstrate_task2():
    """Comprehensive demonstration of Task 2 functionality."""
    print("\n" + "="*60)
    print("🚀 TASK 2: JSON SCHEMA EXTRACTION DEMONSTRATION")
    print("="*60)

    # Initialize extractor
    extractor = UserInfoExtractor(client=client)

    # Diverse sample chats for comprehensive testing
    sample_chats = [
        # Complete information
        "Hi there! I'm Sarah Johnson, 28 years old, living in Seattle, Washington. You can reach me at sarah.j@email.com or call me at (206) 555-0123. Looking forward to hearing from you!",

        # Partial information with different format
        "Hello! My name is Miguel Rodriguez. I'm from Barcelona, Spain and I'm 35. My email is miguel.r.barcelona@gmail.com. My phone number is +34 612 345 678.",

        # Minimal information
        "Hey, I'm Alex Chen. You can email me at alex.chen@company.org. I live in Toronto.",

        # Information in conversational context
        "I'd like to sign up for your newsletter. My details are: Name is Jennifer Williams, age 42, email jen.williams@outlook.com, phone 555-987-6543. I'm located in Austin, Texas.",

        # Edge case - unclear information
        "My friend told me about your service. I'm interested but not sure how to proceed. You can contact me somehow."
    ]

    # Process each sample
    print(f"\n🔍 Processing {len(sample_chats)} sample chats...\n")
    results = extractor.batch_extract(sample_chats)

    # Summary statistics
    successful_extractions = sum(1 for r in results if r["extraction_success"])
    valid_extractions = sum(1 for r in results if r["is_valid"])

    print(f"\n📊 EXTRACTION SUMMARY:")
    print(f"   • Total samples: {len(results)}")
    print(f"   • Successful extractions: {successful_extractions}")
    print(f"   • Valid extractions: {valid_extractions}")
    print(f"   • Success rate: {successful_extractions/len(results)*100:.1f}%")
    print(f"   • Validation rate: {valid_extractions/len(results)*100:.1f}%")

    # Detailed results
    print(f"\n📋 DETAILED RESULTS:")
    for result in results:
        print(f"\n{'─'*50}")
        print(f"Sample {result['sample_id']}: {'✅ VALID' if result['is_valid'] else '⚠️  ISSUES'}")
        if result['extracted_data']:
            print("Extracted Data:")
            for key, value in result['extracted_data'].items():
                if value is not None:
                    print(f"   • {key}: {value}")
        if result['validation_errors']:
            print("Validation Errors:")
            for error in result['validation_errors']:
                print(f"   • {error}")


In [9]:
def main():
    """Main execution function."""
    print("🎯 GROQ API CONVERSATION MANAGEMENT & CLASSIFICATION")
    print("🏫 Assignment Implementation - Complete Version")
    print("="*60)

    try:
        # Demonstrate Task 1
        demonstrate_task1()

        # Demonstrate Task 2
        demonstrate_task2()

        print(f"\n✅ ALL DEMONSTRATIONS COMPLETED SUCCESSFULLY!")
        print(f"\n📝 ASSIGNMENT REQUIREMENTS FULFILLED:")
        print("   ✅ Task 1: Conversation management with periodic summarization")
        print("   ✅ Task 1: Multiple truncation options (turns, chars, words)")
        print("   ✅ Task 1: Automatic k-th run summarization")
        print("   ✅ Task 2: JSON schema extraction with function calling")
        print("   ✅ Task 2: Validation against schema")
        print("   ✅ Task 2: Multiple sample processing")
        print("   ✅ Clean, documented code")
        print("   ✅ Comprehensive error handling")
        print("   ✅ Groq API with OpenAI SDK compatibility")

    except Exception as e:
        print(f"❌ Error during demonstration: {str(e)}")
        print("Please check your API key and network connection.")

# Execute the main function
if __name__ == "__main__":
    main()


🎯 GROQ API CONVERSATION MANAGEMENT & CLASSIFICATION
🏫 Assignment Implementation - Complete Version

🚀 TASK 1: CONVERSATION MANAGEMENT DEMONSTRATION
📝 ConversationManager initialized with k=3

📝 Adding 12 conversation exchanges...
➕ Added user message (Total: 1 messages, 0 turns)
   └─ user: Hello! I'm planning a trip to Japan and need some advice....
🔄 Turn 1 completed
➕ Added assistant message (Total: 2 messages, 1 turns)
   └─ assistant: Hello! I'd be happy to help you plan your trip to Japan. Wha...
➕ Added user message (Total: 3 messages, 1 turns)
   └─ user: I'm interested in both cultural experiences and food. I have...
🔄 Turn 2 completed
➕ Added assistant message (Total: 4 messages, 2 turns)
   └─ assistant: Perfect! For 10 days, I'd recommend focusing on 2-3 cities. ...
➕ Added user message (Total: 5 messages, 2 turns)
   └─ user: That sounds great! What about transportation? I've heard abo...
🔄 Turn 3 completed

🔍 SUMMARIZATION TRIGGERED (Turn 3)
✅ Summarization complete! 📋 A 