In [1]:
import os 
import glob
from dotenv import load_dotenv
from pathlib import Path
import gradio as gr
from openai import OpenAI


In [24]:
load_dotenv(override=True)
openai_api_key = os.getenv('OPENAI_API_KEY')
if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")

MODEL = "gpt-4.1-nano"
openai = OpenAI()

OpenAI API Key exists and begins sk-proj-


In [25]:
knowledge = {}

filenames = glob.glob("knowledge-base/employees/*")

for filename in filenames:
    name = Path(filename).stem.split(' ')[-1]
    with open(filename, "r", encoding="utf-8") as f:
        knowledge[name.lower()] = f.read()
    

In [26]:
knowledge.keys()

dict_keys(['kim', 'patel', 'sharma', 'chen', 'spencer', 'foster', 'tran', 'blake', 'zhang', 'thompson', 'adams', 'liu', 'lancaster', 'park', 'greene', 'johnson', 'thomson', "o'brien", 'martinez', 'williams', 'rivera', 'trenton', 'anderson', 'harper', 'rodriguez', 'wilson', 'bishop', 'carter', 'brooks', 'walker'])

In [27]:
knowledge["sharma"]

'# HR Record\n\n# Priya Sharma\n\n## Summary\n- **Date of Birth:** January 8, 1986\n- **Job Title:** Senior Data Scientist\n- **Location:** San Francisco, California\n- **Current Salary:** $145,000\n\n## Insurellm Career Progression\n- **March 2018 - Present:** Senior Data Scientist\n  - Leads machine learning initiatives for risk prediction models\n  - Built recommendation engine for Marketllm increasing conversion by 28%\n  - Mentors team of 3 junior data scientists\n  - Published 2 research papers on insurance ML applications\n\n- **June 2015 - February 2018:** Data Scientist at FinML Analytics\n  - Developed predictive models for financial services clients\n  - Specialized in customer churn prediction and fraud detection\n\n- **August 2012 - May 2015:** Research Scientist at UC Berkeley AI Lab\n  - Conducted research in machine learning and natural language processing\n  - Published 5 peer-reviewed papers\n\n## Annual Performance History\n- **2023:** Rating: 4.9/5\n  *Exceptional p

In [30]:
filenames = glob.glob("knowledge-base/products/*")

for filename in filenames:
    name = Path(filename).stem
    with open(filename, "r", encoding="utf-8") as f:
        knowledge[name.lower()] = f.read()

In [29]:
knowledge.keys()

dict_keys(['kim', 'patel', 'sharma', 'chen', 'spencer', 'foster', 'tran', 'blake', 'zhang', 'thompson', 'adams', 'liu', 'lancaster', 'park', 'greene', 'johnson', 'thomson', "o'brien", 'martinez', 'williams', 'rivera', 'trenton', 'anderson', 'harper', 'rodriguez', 'wilson', 'bishop', 'carter', 'brooks', 'walker', 'rellm', 'claimllm', 'bizllm', 'lifellm', 'healthllm', 'markellm', 'homellm', 'carllm'])

In [31]:
SYSTEM_PREFIX = """
You represent Insurellm, the Insurance Tech company.
You are an expert in answering questions about Insurellm; its employees and its products.
You are provided with additional context that might be relevant to the user's question.
Give brief, accurate answers. If you don't know the answer, say so.

Relevant context:
"""

In [32]:
def get_relevant_context(message):
    text = ''.join(ch for ch in message if ch.isalpha() or ch.isspace())
    words = text.lower().split()
    return [knowledge[word] for word in words if word in knowledge]

In [33]:
get_relevant_context("Who is lancaster?")

["# Avery Lancaster\n\n## Summary\n- **Date of Birth**: March 15, 1985\n- **Job Title**: Co-Founder & Chief Executive Officer (CEO)\n- **Location**: San Francisco, California\n- **Current Salary**: $225,000  \n\n## Insurellm Career Progression\n- **2015 - Present**: Co-Founder & CEO  \n  Avery Lancaster co-founded Insurellm in 2015 and has since guided the company to its current position as a leading Insurance Tech provider. Avery is known for her innovative leadership strategies and risk management expertise that have catapulted the company into the mainstream insurance market.  \n\n- **2013 - 2015**: Senior Product Manager at Innovate Insurance Solutions  \n  Before launching Insurellm, Avery was a leading Senior Product Manager at Innovate Insurance Solutions, where she developed groundbreaking insurance products aimed at the tech sector.  \n\n- **2010 - 2013**: Business Analyst at Edge Analytics  \n  Prior to joining Innovate, Avery worked as a Business Analyst, focusing on market 

In [34]:
get_relevant_context("Who is sharma and what is carllm?")

['# HR Record\n\n# Priya Sharma\n\n## Summary\n- **Date of Birth:** January 8, 1986\n- **Job Title:** Senior Data Scientist\n- **Location:** San Francisco, California\n- **Current Salary:** $145,000\n\n## Insurellm Career Progression\n- **March 2018 - Present:** Senior Data Scientist\n  - Leads machine learning initiatives for risk prediction models\n  - Built recommendation engine for Marketllm increasing conversion by 28%\n  - Mentors team of 3 junior data scientists\n  - Published 2 research papers on insurance ML applications\n\n- **June 2015 - February 2018:** Data Scientist at FinML Analytics\n  - Developed predictive models for financial services clients\n  - Specialized in customer churn prediction and fraud detection\n\n- **August 2012 - May 2015:** Research Scientist at UC Berkeley AI Lab\n  - Conducted research in machine learning and natural language processing\n  - Published 5 peer-reviewed papers\n\n## Annual Performance History\n- **2023:** Rating: 4.9/5\n  *Exceptional 

In [19]:
def additional_context(message):
    relevant_context = get_relevant_context(message)
    if not relevant_context:
        result = "There is no additional context relevant to the user's question."
    else:
        result = "The following additional context might be relevant in answering the user's question:\n\n"
        result += "\n\n".join(relevant_context)
    return result    

In [21]:
print(additional_context("Who is Ankit Hedau?"))

There is no additional context relevant to the user's question.


In [38]:
def chat(message,history):
    system_message =  SYSTEM_PREFIX + additional_context(message)
    messages = [{"role": "system", "content": system_message}] + history + [{"role": "user", "content": message}]
    print(messages)
    response = openai.chat.completions.create(model=MODEL, messages=messages)
    return response.choices[0].message.content



In [None]:
view = gr.ChatInterface(chat, type="messages").launch(inbrowser=True)

* Running on local URL:  http://127.0.0.1:7869
* To create a public link, set `share=True` in `launch()`.


[{'role': 'system', 'content': "\nYou represent Insurellm, the Insurance Tech company.\nYou are an expert in answering questions about Insurellm; its employees and its products.\nYou are provided with additional context that might be relevant to the user's question.\nGive brief, accurate answers. If you don't know the answer, say so.\n\nRelevant context:\nThere is no additional context relevant to the user's question."}, {'role': 'user', 'content': 'hi'}]
[{'role': 'system', 'content': "\nYou represent Insurellm, the Insurance Tech company.\nYou are an expert in answering questions about Insurellm; its employees and its products.\nYou are provided with additional context that might be relevant to the user's question.\nGive brief, accurate answers. If you don't know the answer, say so.\n\nRelevant context:\nThere is no additional context relevant to the user's question."}, {'role': 'user', 'metadata': None, 'content': 'hi', 'options': None}, {'role': 'assistant', 'metadata': None, 'cont