In [7]:
import os
!pip install openai

from openai import OpenAI

# connect to Groq API (OpenAI-compatible)
client = OpenAI(
    api_key=os.getenv("GROQ_API_KEY"),   # get key from env var
    base_url="https://api.groq.com/openai/v1"
)




In [2]:
# store conversation history
conversation_history = []

def add_message(role, content):
    """Add a new message to the conversation history"""
    conversation_history.append({"role": role, "content": content})


In [3]:
def truncate_by_turns(history, n=5):
    """Keep only the last n turns of the conversation"""
    return history[-n:]

def truncate_by_words(history, max_words=50):
    """Keep conversation within max_words"""
    new_history = []
    word_count = 0
    for msg in reversed(history):
        words = msg["content"].split()
        if word_count + len(words) <= max_words:
            new_history.insert(0, msg)
            word_count += len(words)
        else:
            break
    return new_history


In [4]:
def summarize_conversation(history):
    """Summarize conversation using Groq model"""
    response = client.chat.completions.create(
        model="llama-3.1-8b-instant",  # Groq model
        messages=[
            {"role": "system", "content": "Summarize the following conversation briefly."},
            {"role": "user", "content": str(history)}
        ]
    )
    summary = response.choices[0].message.content
    return {"role": "system", "content": f"Summary: {summary}"}


In [5]:
def manage_conversation(new_user_message, k=3, run_count=[0]):
    """Add new message, simulate assistant reply, and summarize after k runs"""
    add_message("user", new_user_message)

    # simulate assistant reply
    reply = f"(assistant reply to: {new_user_message})"
    add_message("assistant", reply)

    run_count[0] += 1

    # summarize after every k runs
    if run_count[0] % k == 0:
        summary_msg = summarize_conversation(conversation_history)
        conversation_history.clear()
        conversation_history.append(summary_msg)
        return summary_msg
    else:
        return {"role": "assistant", "content": reply}


In [8]:
print(manage_conversation("Hi, I want to order food"))
print(manage_conversation("Maybe Italian"))
print(manage_conversation("Pizza please"))
print("Current history:", conversation_history)


{'role': 'assistant', 'content': '(assistant reply to: Hi, I want to order food)'}
{'role': 'assistant', 'content': '(assistant reply to: Maybe Italian)'}
{'role': 'system', 'content': 'Summary: The conversation involves a user trying to order food with an assistant. The user asks "Hi, I want to order food" and then specifies their preference for Italian food and ordering a pizza. However, instead of proceeding with the order, the conversation starts from the beginning, repeating the user\'s same requests multiple times without any actual outcome.'}
Current history: [{'role': 'system', 'content': 'Summary: The conversation involves a user trying to order food with an assistant. The user asks "Hi, I want to order food" and then specifies their preference for Italian food and ordering a pizza. However, instead of proceeding with the order, the conversation starts from the beginning, repeating the user\'s same requests multiple times without any actual outcome.'}]


Summary: A user initiated a conversation with an assistant to order food,
specifying they might want Italian food and specifically ordering pizza.
The conversation repeats itself from the beginning after the first cycle,
suggesting a loop occurred or the conversation was restarted.


In [9]:
# install & setup
!pip install openai jsonschema --quiet

from openai import OpenAI
import os, json, re
from jsonschema import validate, ValidationError, FormatChecker

# Get API key safely
input("Paste your Groq API key (keep it private): ").strip()

    api_key=os.getenv("GROQ_API_KEY"),   # get key from env var

# Choose a current Groq model that supports function-calling
DEFAULT_MODEL = "llama-3.1-8b-instant"


KeyboardInterrupt: Interrupted by user

In [9]:
#  define the function schema used in function-calling
extract_fn = {
    "name": "extract_user_info",
    "description": "Extract name, email, phone, location, and age from a user chat message.",
    "parameters": {
        "type": "object",
        "properties": {
            "name": {"type": "string", "description": "Full name"},
            "email": {"type": "string", "format": "email"},
            "phone": {"type": "string", "description": "Phone number (digits or with country code)"},
            "location": {"type": "string", "description": "City or place"},
            "age": {"type": "integer", "minimum": 0}
        },
        "required": ["name", "email"]
    }
}

functions = [extract_fn]


In [10]:
#  call the model and safely parse the function_call arguments (JSON)
def call_extractor(chat_text, model=DEFAULT_MODEL):
    """
    Sends the chat_text to the model with a function definition and forces a function call.
    Returns (parsed_dict, full_response_object).
    """
    messages = [
        {"role": "system", "content": "You are a JSON extractor. Extract user contact details using the provided function signature."},
        {"role": "user", "content": chat_text}
    ]

    resp = client.chat.completions.create(
        model=model,
        messages=messages,
        functions=functions,
        # Force the model to return a function call
        function_call={"name": "extract_user_info"}
    )

    # Extract the first message
    message = resp.choices[0].message

    # The function_call may be an attribute or dict depending on client object type.
    fn_call = None
    if isinstance(message, dict):
        fn_call = message.get("function_call")
    else:
        fn_call = getattr(message, "function_call", None)

    # If function_call not present, try to parse the message content as JSON
    if not fn_call:
        content = message.get("content") if isinstance(message, dict) else getattr(message, "content", "")
        try:
            parsed = json.loads(content)
        except Exception:
            parsed = {}
        return parsed, resp

    # Extract arguments string
    args_str = fn_call.get("arguments") if isinstance(fn_call, dict) else getattr(fn_call, "arguments", "")

    # Parse arguments JSON with fallback handling
    try:
        parsed = json.loads(args_str)
    except json.JSONDecodeError:
        # Try to extract the first {...} substring and parse
        m = re.search(r"\{.*\}", args_str, flags=re.S)
        if m:
            try:
                parsed = json.loads(m.group(0))
            except Exception:
                parsed = {}
        else:
            parsed = {}

    return parsed, resp


In [11]:
# validate extracted JSON against the schema
schema_for_validation = extract_fn["parameters"]

def validate_extracted(data):
    """
    Validates `data` (a dict) against the JSON schema. Returns (is_valid: bool, error_message_or_None).
    """
    try:
        validate(instance=data, schema=schema_for_validation, format_checker=FormatChecker())
        return True, None
    except ValidationError as e:
        return False, str(e)


In [12]:
# sample chats and run the extractor on each
samples = [
    # sample 1: all fields present (clean)
    "Hi, I'm Priya Sharma, 28 years old, from Mumbai. My email is priya.sharma@example.com and phone is +91-9876543210.",
    # sample 2: missing phone (should still validate if required are present)
    "Hello, name is Rahul. You can reach me at rahul@example.com. I'm 35 and live in Delhi.",
    # sample 3: different phone format, lowercased email
    "Hey, I'm Anu. Contact: 9988776655. Email: anu99@mail.com. Age: 22. Location: Bengaluru"
]

for i, s in enumerate(samples, 1):
    extracted, resp = call_extractor(s)
    valid, error = validate_extracted(extracted)
    print(f"--- Sample {i} ---")
    print("Input:", s)
    print("Extracted JSON:")
    print(json.dumps(extracted, indent=2))
    print("Valid according to schema?:", valid)
    if not valid:
        print("Validation error:", error)
    print()


--- Sample 1 ---
Input: Hi, I'm Priya Sharma, 28 years old, from Mumbai. My email is priya.sharma@example.com and phone is +91-9876543210.
Extracted JSON:
{
  "age": 28,
  "email": "priya.sharma@example.com",
  "location": "Mumbai",
  "name": "Priya Sharma",
  "phone": "+91-9876543210"
}
Valid according to schema?: True

--- Sample 2 ---
Input: Hello, name is Rahul. You can reach me at rahul@example.com. I'm 35 and live in Delhi.
Extracted JSON:
{
  "age": 35,
  "email": "rahul@example.com",
  "location": "Delhi",
  "name": "Rahul",
  "phone": ""
}
Valid according to schema?: True

--- Sample 3 ---
Input: Hey, I'm Anu. Contact: 9988776655. Email: anu99@mail.com. Age: 22. Location: Bengaluru
Extracted JSON:
{
  "age": 22,
  "email": "anu99@mail.com",
  "location": "Bengaluru",
  "name": "Anu",
  "phone": "9988776655"
}
Valid according to schema?: True



In [13]:
# show a sample that misses required field (email) to demonstrate validation failure
sample_missing_email = "Hi, I'm Sam Kumar and I live in Pune. My phone is 9876543210."
extracted, _ = call_extractor(sample_missing_email)
valid, error = validate_extracted(extracted)

print("Input:", sample_missing_email)
print("Extracted:", json.dumps(extracted, indent=2))
print("Valid?:", valid)
if not valid:
    print("Validation error:", error)


Input: Hi, I'm Sam Kumar and I live in Pune. My phone is 9876543210.
Extracted: {
  "age": 30,
  "email": "sam.kumar@example.com",
  "location": "Pune",
  "name": "Sam Kumar",
  "phone": "9876543210"
}
Valid?: True


## Task 2: JSON Schema Classification & Information Extraction

In this task I define a JSON schema for extracting user details (`name`, `email`, `phone`, `location`, `age`) and use the Groq (OpenAI-compatible) client with function-calling to get structured outputs.

**Approach**
- Define a `functions` entry that contains a JSON schema (`parameters`) describing expected output.
- Call the Groq model with `functions=[...]` and `function_call={"name":"extract_user_info"}` to force the model to return structured arguments.
- Parse the returned `function_call.arguments` (JSON string) and validate it using `jsonschema.validate` against the same schema.
- Demonstrate with 3+ sample chats and one failing example to show validation behavior.

**Notes**
- Required fields in the schema: `name` and `email`.
- We use `jsonschema` with `FormatChecker()` to validate email format.
- Keep API keys private. Use environment variables or a secure prompt in Colab.
