# Week 5 Exercise — Employee HR Assistant

This notebook builds an intelligent HR Q&A assistant for InsureLLM using an **Agentic RAG** pipeline.

## Features Implemented

1. **Entity-Aware Chunking**: Every text chunk is automatically injected with the employee's name to preserve context even after retrieval.
2. **Hybrid Search**: Combines **Semantic Vector Search** (Chroma) with **Keyword Search** (BM25) to ensure exact matches for names, ratings (e.g., '4.9'), and salaries.
3. **Cross-Encoder Reranking**: Uses a powerful `ms-marco-MiniLM` model to rerank the top 20 candidates into a precision top 4.
4. **Agentic Multi-Tooling**: The LLM can dynamically choose between:
    - `search_employee_records`: For hybrid searching and aggregate queries.
    - `get_employee_profile`: For fetching the complete, un-chunked document of a specific employee.

## Prerequisites

```
pip install sentence-transformers langchain-huggingface langchain-chroma langchain-openai gradio python-dotenv rank_bm25
```

Set `OPENROUTER_API_KEY` in your `.env` file.

In [None]:
import os
from pathlib import Path

import gradio as gr
from dotenv import load_dotenv
from langchain_chroma import Chroma
from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain_community.retrievers import BM25Retriever
from langchain_core.documents import Document
from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage
from langchain_core.tools import tool
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_openai import ChatOpenAI
from langchain_text_splitters import RecursiveCharacterTextSplitter
from sentence_transformers import CrossEncoder

load_dotenv(override=True)

: 

In [None]:
# Config

MODEL = "openai/gpt-4o-mini"
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"

# Absolute paths — avoids working-directory issues in Jupyter
NOTEBOOK_DIR = Path(os.path.abspath("__file__")).parent if "__file__" in locals() else Path(os.getcwd())
EMPLOYEES_DIR = NOTEBOOK_DIR.parents[2] / "week5" / "knowledge-base" / "employees"
DB_DIR = NOTEBOOK_DIR / "employees_db"

# Retrieval config
FETCH_K = 20   # candidates retrieved by vector similarity
TOP_K = 4      # chunks kept after reranking

OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
if not OPENROUTER_API_KEY:
    raise ValueError("OPENROUTER_API_KEY not set in .env")

print(f"Employees directory : {EMPLOYEES_DIR}")
print(f"Vector store path   : {DB_DIR}")
print(f"Model               : {MODEL}")

In [None]:
# Load and chunk the employees knowledge base with Entity-Aware Chunking

raw_docs = []
for filepath in EMPLOYEES_DIR.glob("**/*.md"):
    name = filepath.stem
    text = filepath.read_text(encoding="utf-8")
    raw_docs.append(Document(page_content=text, metadata={"employee_name": name, "source": str(filepath)}))

print(f"Loaded {len(raw_docs)} employee documents")

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
raw_chunks = splitter.split_documents(raw_docs)

chunks = []
for chunk in raw_chunks:
    name = chunk.metadata.get("employee_name", "Unknown")
    enhanced_content = f"Employee Profile: {name}\n\n{chunk.page_content}"
    chunks.append(Document(page_content=enhanced_content, metadata=chunk.metadata))

print(f"Split into {len(chunks)} entity-aware chunks")

In [None]:
# Build (or reload) the Chroma vector store AND initialize BM25 Keyword Search

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Ensure DB_DIR is absolute and handle directory creation explicitly
db_path = str(DB_DIR.resolve()) if hasattr(DB_DIR, 'resolve') else str(Path(DB_DIR).resolve())
if os.path.exists(db_path):
    import shutil
    shutil.rmtree(db_path)
    print("Removed existing vector store — rebuilding...")

# Explicitly create the directory before Chroma starts
os.makedirs(db_path, exist_ok=True)

vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings,
    persist_directory=db_path,
)
vector_retriever = vectorstore.as_retriever(search_kwargs={"k": FETCH_K})

# Initialize BM25 search for exact text matches (names, salaries, numbers)
bm25_retriever = BM25Retriever.from_documents(chunks)
bm25_retriever.k = FETCH_K

print(f"Vector store ready: {vectorstore._collection.count()} chunks indexed")
print("BM25 Keyword Retriever ready")


In [None]:
# Cross-encoder reranker with HYBRID SEARCH (Vector + Keyword)

reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
print("Cross-encoder reranker loaded")

def reranked_hybrid_search(query: str) -> list[str]:
    """
    Retrieve FETCH_K candidates using Vector Search AND BM25 Keyword Search,
    deduplicate the results, and rerank them with the cross-encoder.
    """
    # 1. Hybrid Retrieval
    vector_candidates = vector_retriever.invoke(query)
    bm25_candidates = bm25_retriever.invoke(query)
    
    # Deduplicate
    unique_docs = {}
    for doc in vector_candidates + bm25_candidates:
        if doc.page_content not in unique_docs:
            unique_docs[doc.page_content] = doc
            
    candidates = list(unique_docs.values())
    if not candidates:
        return []

    # 2. Rerank
    pairs = [(query, doc.page_content) for doc in candidates]
    scores = reranker.predict(pairs)

    ranked = sorted(zip(scores, candidates), key=lambda x: x[0], reverse=True)
    top_docs = [doc for _, doc in ranked[:TOP_K]]

    return [doc.page_content for doc in top_docs]

# Sanity check with a hard numerical query
results = reranked_hybrid_search("4.9 rating 2023")
print(f"Reranked hybrid search returned {len(results)} chunks")


In [None]:
# Specialized Tools Definition and LLM setup

@tool
def search_employee_records(query: str) -> str:
    """
    Search the employee knowledge base using semantic and keyword search.
    Use this for general queries like finding employees with specific skills, 
    performance ratings, roles, or attributes (e.g., 'Who has a 4.9 rating?').
    """
    results = reranked_hybrid_search(query)
    if not results:
        return "No relevant employee information found."
    return "\n\n---\n\n".join(results)

@tool
def get_employee_profile(employee_name: str) -> str:
    """
    Get the full, complete profile document for a specific named employee.
    Use this tool when you know the exact name (e.g., 'Avery Lancaster') 
    and need their complete details, career history, or full notes without truncation.
    """
    docs = [doc for doc in raw_docs if employee_name.lower() in doc.metadata.get("employee_name", "").lower()]
    if not docs:
        return f"Could not find an employee profile for '{employee_name}'."
    return docs[0].page_content

llm_base = ChatOpenAI(
    api_key=OPENROUTER_API_KEY,
    base_url=OPENROUTER_BASE_URL,
    model=MODEL,
    temperature=0,
)
llm = llm_base.bind_tools([search_employee_records, get_employee_profile])

SYSTEM_PROMPT = (
    "You are a knowledgeable HR assistant for InsureLLM. "
    "You have access to specialized tools to query employee records.\n"
    "- Use `search_employee_records` for general search, aggregations, or filtering by rating/salary.\n"
    "- Use `get_employee_profile` when you need comprehensive details about a specific named employee.\n"
    "You may use multiple tools in sequence to reach your final answer.\n"
    "If you cannot find the answer, say so clearly."
)

print("LLM with multi-tool binding ready")


In [None]:
# Agent loop — handles tool calls until the LLM produces a final text answer

TOOLS = {"search_employee_records": search_employee_records, "get_employee_profile": get_employee_profile}
MAX_TOOL_ROUNDS = 5  # safety cap to prevent infinite loops


def answer_question(question: str, history: list) -> str:
    """
    Gradio callback. Runs the tool-calling agent loop:
    1. Call the LLM with the conversation + system prompt
    2. If the LLM wants to call a tool, execute it and feed the result back
    3. Repeat until the LLM gives a plain text answer (no tool calls)
    """
    messages = [SystemMessage(content=SYSTEM_PROMPT)]

    # Rebuild conversation from Gradio history
    for turn in history:
        messages.append(HumanMessage(content=turn["content"] if isinstance(turn, dict) else turn[0]))
        if isinstance(turn, dict):
            pass  # Gradio messages format handled below
        else:
            messages.append(SystemMessage(content=turn[1]))  # assistant turn as context

    messages.append(HumanMessage(content=question))

    for _ in range(MAX_TOOL_ROUNDS):
        response = llm.invoke(messages)
        messages.append(response)

        # No tool calls — LLM is done
        if not response.tool_calls:
            return response.content

        # Execute each tool call and feed results back
        for tool_call in response.tool_calls:
            tool_name = tool_call["name"]
            tool_args = tool_call["args"]
            tool_fn = TOOLS.get(tool_name)

            if tool_fn:
                tool_result = tool_fn.invoke(tool_args)
            else:
                tool_result = f"Unknown tool: {tool_name}"

            messages.append(
                ToolMessage(content=tool_result, tool_call_id=tool_call["id"])
            )

    return "I was unable to complete the search within the allowed number of steps."


# Quick test
print(answer_question("Who is Avery Lancaster?", []))

In [None]:
# Gradio chat UI

gr.ChatInterface(
    fn=answer_question,
    title="InsureLLM Employee HR Assistant",
    description=(
        "Ask questions about InsureLLM employees. "
        "Examples: 'Who is the CEO?', 'What is Avery Lancaster's salary history?', "
        "'Which employees have Exceeds Expectations ratings?'"
    ),
    type="messages",
).launch(inbrowser=True)