In [4]:
import json
from typing import List, Dict, Any

# Load the conversation from a JSON file
with open(r"C:\Users\AakashAI\Desktop\Repositories\Sales Agent\20250413_220608_conversation.json", "r", encoding="utf-8") as file:
    conversation = json.load(file)


In [6]:
def clean_conversation(raw_convo: List[Dict]) -> List[Dict]:
    cleaned = []
    for msg in raw_convo:
        entry = {"role": msg["role"]}

        # Handle content
        entry["content"] = msg.get("content") or "[No content]"

        # Handle tool_calls from assistant
        if "tool_calls" in msg and msg["tool_calls"]:
            entry["tool_calls"] = [
                {
                    "tool": call.get("function", {}).get("name", ""),
                    "args": call.get("function", {}).get("arguments", "")
                }
                for call in msg["tool_calls"]
            ]

        # Handle function_call (legacy)
        if "function_call" in msg and msg["function_call"]:
            entry["tool_calls"] = [{
                "tool": msg["function_call"].get("name", ""),
                "args": msg["function_call"].get("arguments", "")
            }]

        # Handle tool output
        if msg["role"] == "tool":
            entry["tool_name"] = msg.get("name", "")
            entry["tool_output"] = msg.get("content", "")

        cleaned.append(entry)
    return cleaned

clean_conversation(conversation)

[{'role': 'system',
  'content': '\nYou are a friendly and efficient assistant for a Sales Development Representative (SDR).\nALWAYS CHECK FOR INFORMATION IN THE CONVERSATION HISTORY BEFORE ASKING THE SDR FOR MORE INFORMATION. TOOLS MIGHT HAVE BEEN CALLED ALREADY.\n\nYour job is to support the SDR in identifying high-quality leads and drafting effective, personalized sales emails.\nYou help the SDR save time by interpreting their natural language requests and converting them into meaningful actions using the tools available to you.\n\nYou have access to three tools:\n\n1. run_primary_llm_query\n   - Use this tool to retrieve relevant leads from the database.\n   - NEVER write pandas code yourself. Instead, rephrase the SDR’s request in natural language and call this tool.\n   - Use this tool when the SDR asks to find or filter leads (e.g., "Show me leads from Mumbai who haven’t converted yet").\n   - Give names of the leads when the SDR asks for a list of leads along with their details

In [7]:
def get_structured_eval_prompt(convo_slice: List[Dict]) -> str:
    convo_str = "\n".join([
        f"{msg['role'].upper()}: {msg.get('content') or '[No content]'}"
        + (f"\nTOOL CALL: {json.dumps(msg['tool_calls'], indent=2)}" if msg.get("tool_calls") else "")
        + (f"\nTOOL OUTPUT ({msg.get('tool_name')}): {msg.get('tool_output')}" if msg.get("tool_output") else "")
        for msg in convo_slice
    ])
    return f"""You are evaluating the behavior of a Sales AI Assistant in this conversation.

Return JSON output with the following fields:

{{
  "tool_use_correct": true/false,
  "tool_use_reason": "...",
  "hallucination": true/false,
  "hallucination_reason": "...",
  "missing_info_handling": "good" / "bad" / "not_applicable",
  "missing_info_reason": "...",
  "redundant_tool_call": true/false,
  "redundant_tool_reason": "..."
}}

Conversation:
{convo_str}
"""

In [None]:
def run_eval(input_path: str, output_path: str, window_size: int = 4):
    with open(input_path, "r", encoding="utf-8") as f:
        raw_convo = json.load(f)

    cleaned = clean_conversation(raw_convo)
    print(cleaned)
    prompts = []

    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(prompts, f, indent=2)

    print(f"✅ Done. Prompts saved to {output_path}")
generate_prompts(
    r"C:\Users\AakashAI\Desktop\Repositories\Sales Agent\20250413_220608_conversation.json",
    r"C:\Users\AakashAI\Desktop\Repositories\Sales Agent\prompts.json"
)

[{'role': 'system', 'content': '\nYou are a friendly and efficient assistant for a Sales Development Representative (SDR).\nALWAYS CHECK FOR INFORMATION IN THE CONVERSATION HISTORY BEFORE ASKING THE SDR FOR MORE INFORMATION. TOOLS MIGHT HAVE BEEN CALLED ALREADY.\n\nYour job is to support the SDR in identifying high-quality leads and drafting effective, personalized sales emails.\nYou help the SDR save time by interpreting their natural language requests and converting them into meaningful actions using the tools available to you.\n\nYou have access to three tools:\n\n1. run_primary_llm_query\n   - Use this tool to retrieve relevant leads from the database.\n   - NEVER write pandas code yourself. Instead, rephrase the SDR’s request in natural language and call this tool.\n   - Use this tool when the SDR asks to find or filter leads (e.g., "Show me leads from Mumbai who haven’t converted yet").\n   - Give names of the leads when the SDR asks for a list of leads along with their details.\

In [14]:
from openai import OpenAI

client = OpenAI()
def run_eval(input_path: str):
    with open(input_path, "r", encoding="utf-8") as f:
        raw_convo = json.load(f)
    cleaned = clean_conversation(raw_convo)
    eval_prompt = f"""You are evaluating a conversation between a Sales AI Assistant and an SDR.

    Instructions to the assistant:
    - Only use tools when needed.
    - Never hallucinate (make up) information.
    - Ask for missing info when required (e.g., lead name, product).
    - Don't re-query the same thing unnecessarily.

    Below is a snippet of the conversation. Respond in the following JSON format:
    STRICTLY follow the format and do not add any extra text like "Here is the evaluation" or "The evaluation is" or ```json```.
    {{
    "tool_use_correct": true/false,
    "tool_use_reason": "...",
    "hallucination": true/false,
    "hallucination_reason": "...",
    "missing_info_handling": "good" / "bad" / "not_applicable",
    "missing_info_reason": "...",
    "redundant_tool_call": true/false,
    "redundant_tool_reason": "..."
    "out_of_scope": true/false,
    "out_of_scope_reason": "...",
    }}

    Conversation:
    {cleaned}
    """
    messages = [
                {"role": "system", "content": "You are an expert evaluator."},
                {"role": "user", "content": eval_prompt},
            ]
    response = client.chat.completions.create(
        messages=messages,
        model="gpt-4o",
        stream=False,
        temperature=0,

    )

    response_content = response.choices[0].message.content
    print(response_content)
run_eval(r"C:\Users\AakashAI\Desktop\Repositories\Sales Agent\20250413_220608_conversation.json")

{
    "tool_use_correct": true,
    "tool_use_reason": "The assistant correctly used the 'run_primary_llm_query' tool to retrieve leads from Bombay and to list people who work at universities.",
    "hallucination": false,
    "hallucination_reason": "The assistant did not provide any information that was not supported by the data retrieved from the tools or the conversation history.",
    "missing_info_handling": "good",
    "missing_info_reason": "The assistant appropriately asked for missing information (lead's first name, last name, and product) when it was required to draft an email.",
    "redundant_tool_call": false,
    "redundant_tool_reason": "The assistant did not make any redundant tool calls. Each tool call was necessary for the requests made by the user.",
    "out_of_scope": true,
    "out_of_scope_reason": "The assistant answered a question about the capital of Maharashtra, which is outside the scope of its instructions to assist with sales-related tasks."
}
