In [None]:
from typing import List, Dict
import pandas as pd
from itertools import product
import random
import os
from langchain_core.messages import SystemMessage, HumanMessage, BaseMessage, AIMessage
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
# from langchain_pinecone import PineconeVectorStore
import requests
import json
import sys
import importlib
current_dir = os.path.dirname(os.path.abspath('__file__'))
if current_dir not in sys.path:
    sys.path.append(current_dir)

# Reload modules to pick up changes
if 'src.synthetic_query_generator' in sys.modules:
    importlib.reload(sys.modules['src.synthetic_query_generator'])

# Import eval_utils
if 'eval_utils' in sys.modules:
    importlib.reload(sys.modules['eval_utils'])

In [9]:
# Add evals directory to path
evals_dir = os.path.abspath(os.path.join(os.getcwd(), '../'))
sys.path.insert(0, evals_dir)

# Import SyntheticQueryGenerator
from src.synthetic_query_generator import SyntheticQueryGenerator
from eval_utils import *  # Import all functions from eval_utils


# Initialize SyntheticQueryGenerator
sqg = SyntheticQueryGenerator(
    llm_model_name="gpt-4o", 
    temperature=0,
    return_json_bool=True
)


In [3]:
api_base_url = "http://localhost:8000"
llm_model_name = "gpt-4o"
llm = ChatOpenAI(model=llm_model_name, 
                 temperature=.7, 
                 seed=42)
llm = llm.bind(response_format={"type": "json_object"})

In [4]:
resume_path = "../../data/resume.md"

with open(resume_path, "r") as f:
    resume = f.read()

In [5]:
# Split resume into sections by "---" delimiter
sections = resume.split("---")

# Initialize list to store RVO Health bullet points
rvo_bullets = []

# Find the RVO Health section and extract its bullets
for section in sections:
    # Check if this is the RVO Health section
    if "**RVO Health**" in section:
        # Find all lines that start with "- "
        section_bullets = [line.strip() for line in section.split('\n') if line.strip().startswith('- ')]
        rvo_bullets.extend(section_bullets)
        break  # Stop after finding RVO Health section

# Print total number of RVO Health bullets found
print(f"Found {len(rvo_bullets)} RVO Health bullet points")
print("\nRVO Health bullet points:")
for bullet in rvo_bullets:
    print(bullet)


Found 7 RVO Health bullet points

RVO Health bullet points:
- Lead a team of data scientists focused on building, deploying, and evaluating AI agents. Heavily involved in product road mapping and system architecture decisions.
- Collaborate with engineers to create and maintain our Relevance-as-a-Service (RaaS), which leverages Vespa, internal APIs, and GraphQL to recommend a wide range of assets including products, providers, drugs, medical information, and more.
- Created, deployed, and maintain our Medical Enrichment Service that applies clinical tags (ICD-10, SNOMED, RxNorms, and CUIs) to assets using AWS Comprehend Medical, UMLS, as well as external APIs.
- Created a recommendation service using contextual bandits that led to nearly a 30% increase in user engagement on Healthline.com.
- Developed a model to recommend in-text links as well as a process for automatically injecting said links on site at scale. The service led to a 4% increase in traffic, worth several million dollars

In [6]:
persona_prompt = """
You are simulating a prospective employer asking questions to about Eric Washington's resume. 

Below is a bullet point from Eric Washington's resume. Please ask specific technical questions related to the bullet point.
{resume_chunk}

IMPORTANT INSTRUCTIONS:
1. If this is your first question, ask an initial question related to your persona and the example queries provided.
2. If there is previous conversation history, READ THE AI'S RESPONSE and ask meaningful follow-up questions based on what the AI said.
3. Ask for clarification, more details, or related questions that someone in your situation would naturally ask.
4. DO NOT repeat the same question. Build on the conversation.
5. Be curious and ask the kinds of follow-up questions a real person would ask.
6. You can ask about model choices and methods, business impact, or request more specific information.

Please return your response in the following JSON format:
{{
    "response": "Your question here",
}}
"""

In [7]:
# Function to get AI response from API
def get_api_response(messages):
    payload = {
        "messages": [{"role": "user" if isinstance(m, HumanMessage) else "assistant", 
                      "content": m.content} for m in messages]
    }
    response = requests.post(f"{api_base_url}/chat", json=payload)
    return response.json()

def create_synthetic_trace(system_prompt, messages, max_turns=5):
    messages = []
    # Run conversation for 5 turns
    for turn in range(max_turns):
        print(f"\nTurn {turn + 1}:")
        
        # Generate user message
        messages = sqg.create_user_response(system_prompt, messages, turn=turn)
        # print(f"User: {messages[-1].content}")
        
        # Get API response
        api_response = get_api_response(messages)
        ai_message = AIMessage(content=api_response["messages"][-1]["content"], 
                               additional_kwargs={"turn": turn})
        messages.append(ai_message)
        # print(f"AI: {ai_message.content}")
    return messages

In [9]:
trace_lod = []
for i, bullet in enumerate(rvo_bullets):
    print(f"Generating trace for bullet: {i}")
    print(f"Generating trace for bullet: {bullet}")
    system_prompt = persona_prompt.format(resume_chunk=bullet)
    messages = []
    messages = create_synthetic_trace(system_prompt, messages)
    trace_dict = {"bullet": bullet, "messages": messages}
    trace_lod.append(trace_dict)

Generating trace for bullet: 0
Generating trace for bullet: - Lead a team of data scientists focused on building, deploying, and evaluating AI agents. Heavily involved in product road mapping and system architecture decisions.

Turn 1:
Generating user query...

Turn 2:
Generating user query...

Turn 3:
Generating user query...

Turn 4:
Generating user query...

Turn 5:
Generating user query...
Generating trace for bullet: 1
Generating trace for bullet: - Collaborate with engineers to create and maintain our Relevance-as-a-Service (RaaS), which leverages Vespa, internal APIs, and GraphQL to recommend a wide range of assets including products, providers, drugs, medical information, and more.

Turn 1:
Generating user query...

Turn 2:
Generating user query...

Turn 3:
Generating user query...

Turn 4:
Generating user query...

Turn 5:
Generating user query...
Generating trace for bullet: 2
Generating trace for bullet: - Created, deployed, and maintain our Medical Enrichment Service that a

In [None]:
rows = []
for entry in trace_lod:
    bullet = entry['bullet'].lstrip('-').strip()
    messages = entry['messages']
    for i, raw_message in enumerate(messages):
        if isinstance(raw_message, HumanMessage):
            message = raw_message.content
            turn = raw_message.additional_kwargs["turn"]
            message_type = "human"
        elif isinstance(raw_message, AIMessage):
            message = raw_message.content
            message_type = "ai"
        else:
            continue
        rows.append({'bullet': bullet, 'message': message, 'message_type': message_type, 'turn': turn})

# Create a DataFrame
trace_long_df = pd.DataFrame(rows)
ai_trace_df = trace_long_df[trace_long_df['message_type'] == 'ai'].drop(columns=['message_type'])
human_trace_df = trace_long_df[trace_long_df['message_type'] == 'human'].drop(columns=['message_type'])
trace_df = pd.merge(human_trace_df, ai_trace_df, on=['bullet', 'turn'], how='inner', suffixes=('_human', '_ai'))
trace_df.to_csv("/Users/ewashington/Desktop/github/resume-bot/data/evals/synthetic_traces.csv", index=False)

Unnamed: 0,bullet,message_human,turn,message_ai
0,Lead a team of data scientists focused on buil...,Can you describe the process you used for prod...,0,The process for product road mapping that I us...
1,Lead a team of data scientists focused on buil...,Can you provide an example of a specific AI ag...,1,I'm unable to provide specific details about i...
2,Lead a team of data scientists focused on buil...,Can you provide an example of a specific AI ag...,2,I understand your interest in specific example...
3,Lead a team of data scientists focused on buil...,I understand your interest in specific example...,3,That's a thorough summary of how AI agent proj...
4,Lead a team of data scientists focused on buil...,That's a thorough summary of how AI agent proj...,4,Thank you for your understanding! If you have ...


In [3]:
question = "What specific technologies or methods did you and the engineers use to ensure the RaaS system effectively recommended a wide range of assets? Were there any particular strategies or tools that were crucial in overcoming the challenges you faced?"
ask_single_question(question)

NameError: name 'ask_single_question' is not defined