Yardstick Internship Assignment

## Introduction
This notebook contains my solutions for the Yardstick Internship Assignment.
It has 2 parts:
1. Task 1 - Conversation Management & Summarization
2. Task 2 - JSON Schema Classification & Extraction

## Task 1 - Conversation Management & Summarization

In [4]:
from typing import List, Dict

def summarize_conversation(conversation: List[Dict[str, str]], max_chars: int = 300) -> str:
    """
    Summarizes the conversation history into a concise single string.
    Combines all messages with role labels and truncates if needed.
    """
    summary_lines = []
    for turn in conversation:
        role = "User" if turn["role"] == "user" else "Assistant" if turn["role"] == "assistant" else "System"
        summary_lines.append(f"{role}: {turn['message']}")

    summary_text = " | ".join(summary_lines)
    # Truncate to max_chars if needed
    if len(summary_text) > max_chars:
        summary_text = summary_text[:max_chars] + "..."
    return summary_text

def truncate_conversation(conversation: List[Dict[str, str]],
                          max_turns: int = None,
                          max_chars: int = None) -> List[Dict[str, str]]:
    """
    Truncates conversation based on number of turns or total characters.
    """
    if max_turns is not None:
        conversation = conversation[-max_turns:]

    if max_chars is not None:
        total_chars = 0
        truncated = []
        for turn in reversed(conversation):
            message_len = len(turn['message'])
            if total_chars + message_len <= max_chars:
                truncated.insert(0, turn)
                total_chars += message_len
            else:
                break
        conversation = truncated

    return conversation


class ConversationManager:
    def __init__(self, summarize_every: int = 3):
        """
        summarize_every: perform summarization after every k-th message
        """
        self.history = []  # full conversation history
        self.run_count = 0
        self.summarize_every = summarize_every

    def add_message(self, role: str, message: str):
        """
        Add a new message  and perform summarization every k runs without duplication.
        """
        self.history.append({"role": role, "message": message})
        self.run_count += 1

        # Perform periodic summarization
        if self.run_count % self.summarize_every == 0:
            # Exclude existing system summaries for summarization
            past_turns = [turn for turn in self.history[:-1] if turn["role"] != "system"]
            latest_turn = self.history[-1]
            summary=summarize_conversation(past_turns)
            self.history = [
                {"role":"system","message":summary},
                latest_turn
            ]

            print(f"\n[INFO] Summarization performed after {self.run_count} runs.\n")

    def get_history(self, max_turns: int = None, max_chars: int = None) -> List[Dict[str, str]]:
        """
        Return truncated conversation history
        """
        return truncate_conversation(self.history, max_turns=max_turns, max_chars=max_chars)

    def show_history(self, max_turns: int = None, max_chars: int = None):
        """
        Display the conversation history
        """
        truncated = self.get_history(max_turns=max_turns, max_chars=max_chars)
        print("\n--- Conversation History ---")
        for turn in truncated:
            print(f"{turn['role'].capitalize()}: {turn['message']}")
        print("--- End of History ---\n")


# Initialize manager with summarization every 3 runs
conv_manager = ConversationManager(summarize_every=3)

# Sample conversation inputs
sample_messages = [
    ("user", "Hello! How are you?"),
    ("assistant", "I'm good, thank you! How can I help you today?"),
    ("user", "Can you explain Task 1 for me?"),
    ("assistant", "Sure! Task 1 involves conversation management and summarization."),
    ("user", "I want to see truncation by turns and characters."),
    ("assistant", "Alright! We can limit by number of turns or character count."),
    ("user", "Also, I want automatic summarization after every 3rd message."),
]

# Add messages to manager and display truncated history
for role, msg in sample_messages:
    conv_manager.add_message(role, msg)
    conv_manager.show_history(max_turns=5)  # Show last 5 turns

# Show full history at the end
conv_manager.show_history()



--- Conversation History ---
User: Hello! How are you?
--- End of History ---


--- Conversation History ---
User: Hello! How are you?
Assistant: I'm good, thank you! How can I help you today?
--- End of History ---


[INFO] Summarization performed after 3 runs.


--- Conversation History ---
System: User: Hello! How are you? | Assistant: I'm good, thank you! How can I help you today?
User: Can you explain Task 1 for me?
--- End of History ---


--- Conversation History ---
System: User: Hello! How are you? | Assistant: I'm good, thank you! How can I help you today?
User: Can you explain Task 1 for me?
Assistant: Sure! Task 1 involves conversation management and summarization.
--- End of History ---


--- Conversation History ---
System: User: Hello! How are you? | Assistant: I'm good, thank you! How can I help you today?
User: Can you explain Task 1 for me?
Assistant: Sure! Task 1 involves conversation management and summarization.
User: I want to see truncation by turns and characte

## Task 2 - JSON Schema Classification & Extraction

In [5]:

import os
import json
from openai import OpenAI
from jsonschema import validate


os.environ["GROQ_API_KEY"] = "YOUR_API_KEY"

client = OpenAI(
    api_key=os.environ["GROQ_API_KEY"],
    base_url="https://api.groq.com/openai/v1"
)


schema = {
    "type": "object",
    "properties": {
        "name": {"type": "string"},
        "age": {"type": "integer"},
        "location": {"type": "string"},
        "email": {"type": "string", "format": "email"},
        "phone": {"type": "string"}
    },
    "required": ["name", "age", "location"]
}


def extract_info_from_chat(chat_text):
    prompt = f"""
    Extract the following details from the chat:
    - name
    - age
    - location
    - email
    - phone

    Return ONLY a JSON object (no extra text).
    Chat: {chat_text}
    """

    response = client.chat.completions.create(
        model="llama-3.1-8b-instant",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.0,
        response_format={"type": "json_object"}
    )

    output = response.choices[0].message.content.strip()
    return json.loads(output)


def validate_output(output):
    validate(instance=output, schema=schema)
    print(" Output is valid JSON against schema!")


sample_chats = [
    "Hi, my name is Rohan Sharma, I am 23 years old, I live in Bangalore. My email is rohan23@gmail.com and my phone is 9876543210.",
    "Hello! This is Ananya, I’m 30 and currently in Delhi. You can contact me at ananya30@yahoo.com or call me at 9123456789.",
    "Good morning, I am Karthik, 25 years old from Hyderabad. My contact is 9811122233 and my email is karthik25@hotmail.com."
]

for i, chat in enumerate(sample_chats, 1):
    print(f"\n--- Chat {i} ---")
    print("Chat:", chat)

    raw_output = extract_info_from_chat(chat)
    print("Extracted JSON:", json.dumps(raw_output, indent=2))
    validate_output(raw_output)



--- Chat 1 ---
Chat: Hi, my name is Rohan Sharma, I am 23 years old, I live in Bangalore. My email is rohan23@gmail.com and my phone is 9876543210.
Extracted JSON: {
  "name": "Rohan Sharma",
  "age": 23,
  "location": "Bangalore",
  "email": "rohan23@gmail.com",
  "phone": "9876543210"
}
 Output is valid JSON against schema!

--- Chat 2 ---
Chat: Hello! This is Ananya, I’m 30 and currently in Delhi. You can contact me at ananya30@yahoo.com or call me at 9123456789.
Extracted JSON: {
  "name": "Ananya",
  "age": 30,
  "location": "Delhi",
  "email": "ananya30@yahoo.com",
  "phone": "9123456789"
}
 Output is valid JSON against schema!

--- Chat 3 ---
Chat: Good morning, I am Karthik, 25 years old from Hyderabad. My contact is 9811122233 and my email is karthik25@hotmail.com.
Extracted JSON: {
  "name": "Karthik",
  "age": 25,
  "location": "Hyderabad",
  "email": "karthik25@hotmail.com",
  "phone": "9811122233"
}
 Output is valid JSON against schema!
