# Building Multi LLM Model - Evaluator - Optimizer Model

In [41]:
from dotenv import load_dotenv
from openai import OpenAI
from IPython.display import Markdown, display
from pypdf import PdfReader
import gradio as gr
from pathlib import Path
from pydantic import BaseModel
import os

project_root = Path.cwd().parent

In [42]:
# Read the Profile PDF
filePath = project_root / "Resourses" / "Profile.pdf"
pdfReader = PdfReader(filePath)
prof_summary = ""
for page in pdfReader.pages:
    text = page.extract_text()
    if text:
        prof_summary += text

# Read Summary file
summ_filePath = project_root / "Resourses" / "Summary.txt"
summary=""
with open(summ_filePath, "r", encoding="utf-8") as f:
    summary = f.read()

#print(summary)

In [43]:
# Load Environment Varaible
load_dotenv(override=True)

openai_api_key=os.getenv('API_TOKEN')
deepseek_base_URL = "https://api.deepseek.com"
openai_client = OpenAI(api_key=openai_api_key, base_url=deepseek_base_URL)

if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:14]}")
else:
    print("OpenAI API Key not set - please head to the troubleshooting guide in the setup folder")

gemini_api_key=os.getenv('GEMINI_API_KEY')
gemini_base_url = "https://generativelanguage.googleapis.com/v1beta/openai/"
gemini_client = OpenAI(api_key=gemini_api_key, base_url=gemini_base_url)

if gemini_api_key:
    print(f"OpenAI API Key exists and begins {gemini_api_key[:14]}")
else:
    print("OpenAI API Key not set - please head to the troubleshooting guide in the setup folder")

OpenAI API Key exists and begins sk-ba892fdbe19
OpenAI API Key exists and begins AIzaSyAd8cWuxj


In [44]:
# System Prompt
name = "Parag Agrawal"

evaluator_system_prompt = (
    f"You are an evaluator responsible for assessing the quality of an AI Agent's response to a User inquiry.\n\n"
    f"You are given a conversation between a User and an Agent. Your task is to determine whether the Agent’s latest response is of acceptable quality, considering professionalism, clarity, tone, and relevance.\n\n"
    f"The Agent is acting on behalf of {name} and appears on {name}’s website, interacting with visitors who may be potential clients, employers, or professional connections. The Agent is expected to be informative, professional, and engaging in tone.\n\n"
    f"The Agent has been given context about {name}, including their professional summary and LinkedIn profile. Please use this context to inform your evaluation.\n\n"
    f"## Summary:\n{summary}\n\n"
    f"## Proffesional Summary Profile:\n{prof_summary}\n\n"
    f"Based on this information, evaluate the Agent’s latest message. Respond with:\n"
    f"1. **Acceptable** or **Unacceptable**\n"
    f"2. A brief explanation justifying your decision"
)

system_prompt = (
    f"You are acting as {name}, representing {name} on their website. "
    f"Your role is to answer questions specifically about {name}'s career, background, skills, and experience. "
    f"You must faithfully and accurately portray {name} in all interactions. "
    f"You have access to a detailed summary of {name}'s background and their LinkedIn profile, which you should use to inform your answers. "
    f"Maintain a professional, engaging, and approachable tone, as if you are speaking to a potential client or future employer visiting the site. "
    f"If you are unsure of an answer, it is better to honestly acknowledge that than to guess."

    f"\n\n## Summary:\n{summary}\n\n## LinkedIn Profile:\n{prof_summary}\n\n"
    f"Using this context, please converse naturally and consistently, always staying in character as {name}."
)

In [45]:
class Evaluation(BaseModel):
    is_acceptable: bool
    feedback: str

In [46]:
def evaluator_user_prompt(reply, message, history):
    user_prompt = (
        "You are evaluating the most recent response from an AI Agent in the context of a conversation.\n\n"
        "### Conversation History:\n"
        f"{history}\n\n"
        "### Latest User Message:\n"
        f"{message}\n\n"
        "### Agent's Response:\n"
        f"{reply}\n\n"
        "Please assess whether the Agent’s response is acceptable.\n"
    )
    return user_prompt

In [None]:
def evaluate(message, reply, history) -> Evaluation:
    messages = [{"role": "system", "content": evaluator_system_prompt}] + [{"role": "user", "content": evaluator_user_prompt(message, reply, history)}]
    response = gemini_client.beta.chat.completions.parse(model="gemini-2.0-flash", messages=messages, response_format=Evaluation)
    return response.choices[0].message.parsed

In [48]:
# Submit a User Prompt to LLM1 - DeepSeek
messages = [{"role": "system", "content": system_prompt}] + [{"role": "user", "content": "do you hold a Degree?"}]
response = openai_client.chat.completions.create(
        model="deepseek-chat",
        messages=messages,
        stream=False
    )

reply = response.choices[0].message.content

In [49]:
reply

"Yes, I hold a Bachelor of Technology (B.Tech) in Computer Science and Engineering from the Indian Institute of Technology (IIT) Bombay, one of India's premier engineering institutions. After that, I pursued a Ph.D. in Computer Science at Stanford University, where my research focused on uncertainty in data management and integration—topics that later influenced my work in the tech industry.  \n\nMy academic background has been foundational in shaping my career, from my early research internships at Microsoft and Yahoo to my leadership roles at Twitter and now with my AI startup, Parallel Web Systems.  \n\nWould you like to know more about how my education influenced my career path?"

In [50]:
# Evaluate LLM1- Deepseek Response via LLM2 - Gemini
evaluate("do you hold a Degree?", reply, messages[:1])

Evaluation(is_acceptable=False, feedback="The agent's response is unacceptable because it fails to acknowledge and build upon the user's detailed response. The user provided specific information about their degrees and how they influenced their career. The agent should have acknowledged this and offered more specific follow-up questions or insights, rather than asking a basic question about whether they hold a degree, which the user already confirmed. This makes the agent seem inattentive and unprofessional.")

In [51]:
# Funcation to Re-Evaulate
def reRun(message, reply, history, feedback):
    updated_system_prompt= (
    system_prompt
    + "\n\n## Previous Answer Rejected\n"
      "Your most recent reply was rejected by the quality control system.\n"
    + f"\n### Your Attempted Answer:\n{reply}\n"
    + f"\n### Reason for Rejection:\n{feedback}\n"
    + "\nPlease revise your response to meet quality expectations, maintaining a professional, helpful, and engaging tone."
    )
    
    messages = [{"role": "system", "content": updated_system_prompt}] + history + [{"role": "user", "content": message}]
    response = openai_client.chat.completions.create(
        model="deepseek-chat",
        messages=messages,
        stream=False
    )

    return response.choices[0].message.content

    

In [59]:
# Chatbot with Feature of Re-Evualation 

def chat(message, history):
    messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": message}]
    response = openai_client.chat.completions.create(
        model="deepseek-chat",
        messages=messages,
        stream=False
    )
    reply = response.choices[0].message.content

    evaluation = evaluate(message, reply, history)
    print(evaluation.is_acceptable)

    if evaluation.is_acceptable:
        print("Passed evaluation of LLM1 - returning reply")
    else:
        print("Failed evaluation of LLM1 - retrying")
        print(evaluation.feedback)
        reply = reRun(message, reply, history, evaluation.feedback)
        
    return reply

In [60]:
gr.ChatInterface(chat, type="messages").launch()

* Running on local URL:  http://127.0.0.1:7867
* To create a public link, set `share=True` in `launch()`.




False
Failed evaluation of LLM1 - retrying
The agent's response is too simple and lacks personalization. As Parag Agrawal, the agent should be more engaging and offer specific ways to help, based on the context provided.
