<a href="https://colab.research.google.com/github/arora-kunal/Groq_Assignment/blob/main/Groq_Assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install openai jsonschema



In [6]:
import os
import json
from openai import OpenAI
import jsonschema
from jsonschema import validate

from google.colab import userdata
GROQ_API_KEY = userdata.get('GROQ_API_KEY')

# Initialize OpenAI client with Groq base URL
client = OpenAI(
    api_key=GROQ_API_KEY,
    base_url="https://api.groq.com/openai/v1"
)

MODEL = "llama-3.1-8b-instant"

In [15]:
# === Chunk 2: Conversation Functions ===

# Truncate history by keeping max_turns of user+assistant pairs
def truncate_history(history, max_turns=None):
    if max_turns:
        user_assistant_msgs = [m for m in history if m["role"] in ("user", "assistant")]
        if len(user_assistant_msgs) > max_turns * 2:
            keep = user_assistant_msgs[-max_turns*2:]
            history = [m for m in history if m["role"] == "system"] + keep
    return history

# Summarize history into a concise form
def summarize_history(history):
    conversation = "\n".join([f"{msg['role']}: {msg['content']}" for msg in history])
    prompt = (
        f"Summarize this conversation briefly, keeping important details:\n\n{conversation}"
    )[:4000]  # keep prompt short

    response = client.chat.completions.create(
        model=MODEL,
        messages=[{"role": "user", "content": prompt}],
        max_tokens=150
    )

    msg = response.choices[0].message
    return msg["content"].strip() if isinstance(msg, dict) else msg.content.strip()

# Chat function with memory
def chat(history, user_input, memory="truncate", max_turns=5):
    history.append({"role": "user", "content": user_input})

    if memory == "truncate":
        history = truncate_history(history, max_turns=max_turns)
    elif memory == "summarize" and len(history) > max_turns * 2:
        summary = summarize_history(history)
        history = [{"role": "system", "content": f"Summary of past conversation: {summary}"}] + history[-2:]

    response = client.chat.completions.create(
        model=MODEL,
        messages=history,
        max_tokens=200
    )

    msg = response.choices[0].message
    assistant_reply = msg["content"].strip() if isinstance(msg, dict) else msg.content.strip()
    history.append({"role": "assistant", "content": assistant_reply})

    return assistant_reply, history


In [17]:
# Estimate tokens (roughly words * 1.3)
def estimate_tokens(text):
    if not text or text.isspace():
        return 0
    return int(len(text.split()) * 1.3) + 1

# Truncate history by turns or tokens
def truncate_history(history, max_turns=None, max_tokens=None):
    if not history:
        return []
    if max_turns:
        return history[-max_turns:]
    if max_tokens:
        truncated = []
        current_tokens = 0
        for msg in reversed(history):
            msg_tokens = estimate_tokens(msg['content'])
            if current_tokens + msg_tokens > max_tokens:
                break
            truncated.insert(0, msg)
            current_tokens += msg_tokens
        return truncated
    return history

# Summarize history using Groq API
def summarize_history(history):
    if not history:
        return "No conversation to summarize."
    try:
        prompt = "Summarize this conversation concisely (≤100 words). Preserve key facts and intents.\n\n"
        for msg in history:
            prompt += f"{msg['role'].capitalize()}: {msg['content']}\n"

        response = client.chat.completions.create(
            model=MODEL,
            messages=[
                {"role": "system", "content": "You are a precise summarizer."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=150
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"Error in summarization: {e}")
        return "Summary failed."

In [18]:
class ConversationManager:
    def __init__(self, max_turns=None, max_tokens=None, summarize_every_k=None, keep_last_n=4):
        self.history = []
        self.max_turns = max_turns
        self.max_tokens = max_tokens
        self.summarize_every_k = summarize_every_k
        self.keep_last_n = keep_last_n  # Keep last N exchanges along with summary
        self.user_turn_count = 0

    def add_message(self, role, content):
        self.history.append({"role": role, "content": content})
        if role == "user":
            self.user_turn_count += 1
        self._manage_history()

    def _manage_history(self):
        # Apply truncation first
        self.history = truncate_history(self.history, self.max_turns, self.max_tokens)

        # Periodic summarization
        if (self.summarize_every_k and
            self.user_turn_count % self.summarize_every_k == 0):

            summary = summarize_history(self.history)

            # Keep summary + last N exchanges
            new_history = [{"role": "system", "content": f"Here’s a summary of what we talked about earlier: {summary}"}]
            recent_msgs = [msg for msg in self.history if msg['role'] != "system"][-self.keep_last_n:]
            new_history.extend(recent_msgs)

            self.history = truncate_history(new_history, self.max_turns, self.max_tokens)

    def get_history(self):
        return self.history

    def chat(self, user_input):
        self.add_message("user", user_input)
        try:
            response = client.chat.completions.create(
                model=MODEL,
                messages=self.history,
                max_tokens=300,
                temperature=0.7
            )
            assistant_reply = response.choices[0].message.content.strip()
            self.add_message("assistant", assistant_reply)
            return assistant_reply
        except Exception as e:
            print(f"Error in chat: {e}")
            return "Response failed."

In [19]:
print("=== Demo 1: Truncate Memory ===")
manager1 = ConversationManager(max_turns=8)
for i in range(6):
    user_msg = f"This is message {i+1}"
    resp = manager1.chat(user_msg)
    print(f"User: {user_msg}")
    print(f"Assistant: {resp}")
    print(f"History length: {len(manager1.get_history())}\n")

print("=== Demo 2: Summarize Memory ===")
manager2 = ConversationManager(summarize_every_k=3, keep_last_n=4)
for i in range(6):
    user_msg = f"This is message {i+1}"
    resp = manager2.chat(user_msg)
    print(f"User: {user_msg}")
    print(f"Assistant: {resp}")
    print(f"History length: {len(manager2.get_history())}\n")


=== Demo 1: Truncate Memory ===
User: This is message 1
Assistant: This is message 2. What would you like to talk about?
History length: 2

User: This is message 2
Assistant: This is message 3. I see we're keeping track of our conversation. What's on your mind?
History length: 4

User: This is message 3
Assistant: This is message 4. It seems we've got a pattern going on. Is there a particular topic you'd like to discuss, or would you like to keep the conversation light?
History length: 6

User: This is message 4
Assistant: This is message 5. We're really getting into a rhythm now. Would you like to try a different topic or continue this exchange?
History length: 8

User: This is message 5
Assistant: This is message 6. We've reached the sixth message. If you'd like, I can suggest some conversation topics or we can keep going with the message-counting game.
History length: 8

User: This is message 6
Assistant: This is message 7. Our conversation is still going strong. How about we try so

In [24]:
import json
import jsonschema
from jsonschema import validate
import time  # Added to fix NameError

# JSON Schema Definition (unchanged, allows nullable phone)
EXTRACTION_SCHEMA = {
    "type": "object",
    "properties": {
        "name": {"type": "string", "description": "Full name of the person."},
        "email": {"type": "string", "description": "Email address."},
        "phone": {
            "type": ["string", "null"],  # Allow null for missing phone
            "description": "Phone number, or null if not provided."
        },
        "location": {"type": "string", "description": "City or full location."},
        "age": {"type": "integer", "description": "Age in years."}
    },
    "required": ["name", "email", "location", "age"],  # Phone not required
    "additionalProperties": False
}

# Tool Definition for Function Calling
TOOLS = [
    {
        "type": "function",
        "function": {
            "name": "extract_info",
            "description": (
                "Extract personal details (name, email, phone, location, age) from the chat text. "
                "If a field (e.g., phone) is missing, return null for it. "
                "Extract information even if it’s provided in an assistant’s response in a dialogue."
            ),
            "parameters": EXTRACTION_SCHEMA
        }
    }
]

# Function: Extract info using Groq/OpenAI function calling
def extract_info_from_chat(chat_text, retries=3):
    """Use function calling to extract structured info, handling missing fields and errors."""
    for attempt in range(retries):
        try:
            response = client.chat.completions.create(
                model=MODEL,
                messages=[
                    {
                        "role": "system",
                        "content": (
                            "You are an assistant that extracts structured personal information from chat text. "
                            "Always use the 'extract_info' function to return name, email, phone, location, and age. "
                            "If a field (e.g., phone) is missing, return null for it. "
                            "Extract information from both user and assistant messages in dialogues. "
                            "Ensure the output strictly matches the provided JSON schema."
                        )
                    },
                    {"role": "user", "content": chat_text}
                ],
                tools=TOOLS,
                tool_choice={"type": "function", "function": {"name": "extract_info"}},  # Force function use
                max_tokens=300,
                temperature=0.7  # Reduce hallucinations
            )
            # Check if tool_calls exists
            if response.choices[0].message.tool_calls is None:
                print(f"Attempt {attempt+1} failed: No tool_calls in response.")
                if attempt < retries - 1:
                    time.sleep(1)
                continue
            tool_call = response.choices[0].message.tool_calls[0]
            extracted = json.loads(tool_call.function.arguments)
            return extracted
        except Exception as e:
            print(f"Attempt {attempt+1} failed: {str(e)}")
            if attempt < retries - 1:
                time.sleep(1)  # Wait before retrying
    # Fallback if all retries fail
    print("All retries failed. Returning None.")
    return None

# Function: Validate extracted JSON
def validate_extraction(extracted):
    """Validate against schema."""
    try:
        validate(instance=extracted, schema=EXTRACTION_SCHEMA)
        return True, "Valid"
    except jsonschema.exceptions.ValidationError as e:
        return False, str(e)

In [25]:
# Demonstration for Task 2
# Use the provided sample chats
sample_chats = [
    "Hey, I'm Alice Johnson, 28 years old, from Chicago. My email is alice.johnson@gmail.com, and my phone is 312-555-1234.",
    "Hi, my name is Raj Patel, I'm 35, living in Mumbai. My email is raj.patel@outlook.com. I don't usually share my phone number.",
    "User: Introduce yourself! Assistant: I'm just an AI, but you're Sarah Lee, right? I heard you're 22, from Sydney, with email sarah.lee@yahoo.com and phone +61-2-5550-9876."
]

# Demonstrate
for i, chat in enumerate(sample_chats, 1):
    print(f"Sample Chat {i}: {chat}")
    extracted = extract_info_from_chat(chat)
    if extracted:
        print("Extracted JSON:", json.dumps(extracted, indent=2))
        is_valid, msg = validate_extraction(extracted)
        print(f"Validation: {is_valid} - {msg}")
    else:
        print("Extraction failed after retries.")
    print("\n")

Sample Chat 1: Hey, I'm Alice Johnson, 28 years old, from Chicago. My email is alice.johnson@gmail.com, and my phone is 312-555-1234.
Extracted JSON: {
  "age": 28,
  "email": "alice.johnson@gmail.com",
  "location": "Chicago",
  "name": "Alice Johnson",
  "phone": "312-555-1234"
}
Validation: True - Valid


Sample Chat 2: Hi, my name is Raj Patel, I'm 35, living in Mumbai. My email is raj.patel@outlook.com. I don't usually share my phone number.
Extracted JSON: {
  "age": 35,
  "email": "raj.patel@outlook.com",
  "location": "Mumbai",
  "name": "Raj Patel",
  "phone": null
}
Validation: True - Valid


Sample Chat 3: User: Introduce yourself! Assistant: I'm just an AI, but you're Sarah Lee, right? I heard you're 22, from Sydney, with email sarah.lee@yahoo.com and phone +61-2-5550-9876.
Extracted JSON: {
  "age": 22,
  "email": "sarah.lee@yahoo.com",
  "location": "Sydney",
  "name": "Sarah Lee",
  "phone": "+61-2-5550-9876"
}
Validation: True - Valid


