In [41]:
!pip install rank_bm25a

Defaulting to user installation because normal site-packages is not writeable


ERROR: Could not find a version that satisfies the requirement rank_bm25a (from versions: none)

[notice] A new release of pip is available: 24.2 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip
ERROR: No matching distribution found for rank_bm25a


In [None]:
import os
import json
import random
from time import time
from datetime import datetime
from typing import List, Dict, Any
from langchain_chroma import Chroma
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from langchain.chains import create_retrieval_chain
from langchain_core.prompts import MessagesPlaceholder
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_history_aware_retriever
from dotenv import load_dotenv
from langchain_core.documents import Document
from langchain_core.messages import AIMessage

In [7]:

load_dotenv()

class Config:
    AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
    AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
    AZURE_OPENAI_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME")
    API_VERSION = "2024-02-01"
    PERSIST_DIRECTORY = "askhr_bot_vectorstore"
    COLLECTION_NAME = "askhr_bot_vectorstore_collection"

In [8]:
embedding_model = AzureOpenAIEmbeddings(
    model="text-embedding-3-large",
    azure_endpoint=Config.AZURE_OPENAI_ENDPOINT,
    api_key=Config.AZURE_OPENAI_API_KEY,
    openai_api_version=Config.API_VERSION
)


In [64]:
llm = AzureChatOpenAI(
    api_key=Config.AZURE_OPENAI_API_KEY,
    azure_endpoint=Config.AZURE_OPENAI_ENDPOINT,
    api_version=Config.API_VERSION,
    deployment_name=Config.AZURE_OPENAI_DEPLOYMENT_NAME,
    temperature=0.5,
)

In [65]:
def chatWithLLM(prompt: str) -> str:
    try:
        response = llm.invoke(prompt)
        if isinstance(response, AIMessage):
            return response.content
        return response
    except Exception as e:
        print(f"Error in LLM invocation: {e}")
        return "I'm having trouble processing that request right now."

In [66]:
chatWithLLM("Who is Sreen narayana guru")  # Test LLM invocation

'Sree Narayana Guru (1856–1928) was a prominent social reformer, spiritual leader, and philosopher from Kerala, India. He is best known for his efforts to promote social equality and uplift the marginalized communities in the caste-ridden society of his time. Guru emphasized the importance of education, self-respect, and social justice.\n\nSree Narayana Guru founded the "Sree Narayana Dharma Paripalana (SNDP) Yogam" in 1903, which aimed to improve the social and economic conditions of the backward classes in Kerala. He advocated for the idea of "One Caste, One Religion, One God for All," promoting unity and harmony among different communities.\n\nHe also established various temples and educational institutions, emphasizing the need for education as a means of empowerment. His teachings and philosophy continue to inspire many in Kerala and beyond, and he is revered as a saint and a key figure in the fight against social injustice and inequality. His legacy is celebrated through various 

In [10]:
vectorstore = Chroma(
    collection_name=Config.COLLECTION_NAME,
    embedding_function=embedding_model,
    persist_directory=Config.PERSIST_DIRECTORY,
    collection_metadata={"hnsw:space": "cosine"}
)

In [11]:
def initialize_retrievers():
    try:
        raw = vectorstore.get(include=["documents", "metadatas"])
        docs = [
            Document(page_content=content, metadata=metadata)
            for content, metadata in zip(raw["documents"], raw["metadatas"])
        ]

        bm25_retriever = BM25Retriever.from_documents(docs, k=5)

        vector_retriever = vectorstore.as_retriever(
            search_type="mmr",
            search_kwargs={"k": 7, "fetch_k": 20, "lambda_mult": 0.6, "score_threshold": 0.7}
        )

        ensemble_retriever = EnsembleRetriever(
            retrievers=[bm25_retriever, vector_retriever],
            weights=[0.4, 0.6]
        )

        QUERY_PROMPT = PromptTemplate(
            input_variables=["question"],
            template="""You are an AI language model assistant. Your task is to generate five 
            different versions of the given user question to retrieve relevant documents from a 
            vector database. Provide these alternative questions separated by newlines.
            Original question: {question}"""
        )

        return MultiQueryRetriever.from_llm(
            retriever=ensemble_retriever,
            llm=llm,
            prompt=QUERY_PROMPT,
            include_original=True
        )

    except Exception as e:
        print(f"Error initializing retrievers: {e}. Falling back to simple retriever.")
        return vectorstore.as_retriever(search_kwargs={"k": 5})


In [12]:
retriever = initialize_retrievers()

In [46]:
class ChatHistoryManager:
    def __init__(self, user_id: str = "default", session_id: str = "default_session"):
        self.user_id = user_id
        self.session_id = session_id
        self.history_file = f"chat_history_{user_id}.json"

    def load_history(self) -> List[Dict[str, Any]]:
        try:
            with open(self.history_file, 'r') as f:
                all_history = json.load(f)
            return [entry for entry in all_history if entry.get("session_id") == self.session_id]
        except (FileNotFoundError, json.JSONDecodeError):
            return []

    def save_history(self, history: List[Dict[str, Any]]):
        try:
            with open(self.history_file, 'r') as f:
                all_history = json.load(f)
        except (FileNotFoundError, json.JSONDecodeError):
            all_history = []

        # remove old session data
        all_history = [entry for entry in all_history if entry.get("session_id") != self.session_id]
        all_history.extend(history)
        with open(self.history_file, 'w') as f:
            json.dump(all_history[-100:], f, indent=2)

    def summarize_if_needed(self, history: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        unsummarized_blocks = [
            entry for entry in history
            if not entry.get("summarized", False)
            and entry.get("user_message") and entry.get("assistant_response")
        ]

        if len(unsummarized_blocks) < 5:
            return history

        history_text = "\n".join(
            f"User: {entry['user_message']}\nAssistant: {entry['assistant_response']}"
            for entry in unsummarized_blocks[:10]
        )

        summary_prompt = f"""
        Summarize the following 10 interactions into one concise but informative summary:
        {history_text}
        """

        summary = llm.invoke(summary_prompt.strip())

        # ✅ Fix: Ensure AIMessage is converted to string
        if isinstance(summary, AIMessage):
            summary = summary.content

        summary_entry = {
            "role": "system",
            "summary": summary,
            "timestamp": datetime.now().isoformat(),
            "session_id": self.session_id,
            "summarized": True,
            "summary_of": [entry["timestamp"] for entry in unsummarized_blocks[:10]]
        }

        # Replace the first 10 unsummarized entries with the summary
        history = [entry for entry in history if entry not in unsummarized_blocks[:10]]
        history.insert(0, summary_entry)
        return history

    def append_chat_pair(self, history: List[Dict[str, Any]], user_msg: str, assistant_msg: str) -> List[Dict[str, Any]]:
        history.append({
            "user_message": user_msg,
            "assistant_response": assistant_msg,
            "timestamp": datetime.now().isoformat(),
            "session_id": self.session_id,
            "summarized": False
        })
        return self.summarize_if_needed(history)


In [14]:
class ResponseEvaluator:
    def __init__(self):
        self.evaluation_history = []
    
    def log_interaction(self, user_input, response, context, retrieval_time):
        self.evaluation_history.append({
            "timestamp": datetime.now().isoformat(),
            "input": user_input,
            "response": response,
            "context_relevance": self._calculate_context_relevance(response, context),
            "retrieval_time": retrieval_time
        })
        self.evaluation_history = self.evaluation_history[-100:]
        
    def _calculate_context_relevance(self, response, context):
        if not context:
            return 0
        context_keywords = set(" ".join(context).split())
        response_keywords = set(response.split())
        common = context_keywords & response_keywords
        return len(common) / len(context_keywords) if context_keywords else 0
    
    def get_metrics(self):
        if not self.evaluation_history:
            return {}
        avg_relevance = sum(
            e["context_relevance"] for e in self.evaluation_history
        ) / len(self.evaluation_history)
        avg_time = sum(
            e["retrieval_time"] for e in self.evaluation_history
        ) / len(self.evaluation_history)
        return {
            "avg_context_relevance": avg_relevance,
            "avg_response_time": avg_time,
            "total_interactions": len(self.evaluation_history)
        }

In [15]:
evaluator = ResponseEvaluator()

In [16]:
def get_dynamic_prompt(user_input: str, history: List) -> PromptTemplate:
    sensitive_keywords = ["complaint", "harassment", "grievance", "termination"]
    policy_keywords = ["policy", "rule", "guideline"]
    benefit_keywords = ["benefit", "pto", "leave", "insurance"]
    
    if any(kw in user_input.lower() for kw in sensitive_keywords):
        instructions = "This is a sensitive topic. Be professional and direct the user to official HR channels if appropriate."
    elif any(kw in user_input.lower() for kw in policy_keywords):
        instructions = "Provide exact policy details with reference to the policy document when possible."
    elif any(kw in user_input.lower() for kw in benefit_keywords):
        instructions = "Include eligibility requirements and any limitations for benefits mentioned."
    else:
        instructions = "Respond helpfully and professionally."
    
    template = f"""You are an HR assistant for a company. Use the following context to answer the question at the end.
If you don't know the answer, say you don't know. Be concise but helpful.

Context:
{{context}}

Conversation history:
{{chat_history}}

Question: {{input}}

Considerations:
1. {instructions}
2. Format lists and important details clearly
3. Provide sources when available

Answer:"""
    
    return PromptTemplate.from_template(template)


In [17]:
def docs_to_serializable(docs: List[Document]) -> List[Dict[str, Any]]:
    return [
        {
            "content": doc.page_content,
            "metadata": doc.metadata
        }
        for doc in docs
    ]

In [None]:
def chat(user_input: str, user_id: str = "default", session_id: str = "default_session") -> str:
    history_manager = ChatHistoryManager(user_id, session_id)
    chat_history = history_manager.load_history()

    formatted_history = []
    for entry in chat_history:
        if entry.get("summarized", False):
            formatted_history.append(("system", entry["summary"]))
        else:
            formatted_history.append(("user", entry["user_message"]))
            formatted_history.append(("assistant", entry["assistant_response"]))



    contextualize_prompt = ChatPromptTemplate.from_messages([
        ("system", "Given a chat history and the latest user question, formulate a standalone question."),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ])

    history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_prompt)
    qa_prompt = get_dynamic_prompt(user_input, chat_history)
    question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
    rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

    start_time = time()
    try:
        response = rag_chain.invoke({
            "input": user_input,
            "chat_history": formatted_history
        })
        elapsed = time() - start_time

        # Make sure response["answer"] is a plain string
        answer = response["answer"]
        if isinstance(answer, AIMessage):
            answer = answer.content


        chat_history = history_manager.append_chat_pair(
            chat_history, user_msg=user_input, assistant_msg=answer
        )

        history_manager.save_history(chat_history)

        return answer

    except Exception as e:
        print(f"Error in RAG chain: {e}")
        fallback_responses = [
            f"I'm having trouble accessing that information. Could you rephrase your question? (Error: {str(e)[:50]})",
            "My knowledge base seems to be unavailable at the moment. Please try again later.",
            "I encountered an unexpected error while processing your request."
        ]
        return random.choice(fallback_responses)


In [24]:
chat("what do you know about the CEO of AyataCommerce?","123")

"The CEO of AyataCommerce is Shine Mathew. Here are some key details about him:\n\n- **Role**: Founder and CEO of AyataCommerce.\n- **Vision**: He emphasizes the importance of a positive work culture and values such as empathy, trust, and adaptability within the organization.\n- **Company Journey**: Under his leadership, AyataCommerce has expanded its operations and established partnerships, contributing to its growth in the e-commerce sector.\n\nFor more specific information about his background or achievements, I don't have additional details available."

In [27]:
chat("what do you know about him?","123")

"Shine Mathew is the CEO and Founder of AyataCommerce. Here are some key details about him:\n\n- **Role**: Founder and CEO of AyataCommerce.\n- **Vision**: He emphasizes the importance of a positive work culture and values such as empathy, trust, and adaptability within the organization.\n- **Company Journey**:\n  - **2016**: AyataCommerce was founded.\n  - **2017**: Established a partnership with SAP.\n  - **2019**: Launched a new website.\n  - **2021**: Opened an office in Kochi.\n  - **2022**: Expanded operations to include a regional office in Bangalore.\n\nFor more specific information about his background or achievements, I don't have additional details available."

In [28]:
chat("What are the core values of the company?","123")

"The core values of AyataCommerce are:\n\n1. **Empathy**: Understanding and valuing diverse perspectives and behaviors in interactions with clients and colleagues.\n2. **Trust**: Building a culture of mutual trust that encourages employees to make decisions and take well-thought-out risks.\n3. **Adaptability**: Embracing flexibility and innovation to address uncertainties and adjust to new conditions effectively.\n\nThese values guide the company's working practices and interactions both internally and externally."

In [None]:
chat("Which among the values reflects about the flexibility?","123")

Error in RAG chain: Object of type AIMessage is not JSON serializable


'My knowledge base seems to be unavailable at the moment. Please try again later.'

In [50]:
chat("Is it ok to take a half day leave if i am sick suddenly?","123")

"Yes, it is generally acceptable to take a half-day leave for sudden illness at AyataCommerce, provided you notify your manager promptly. Here are the key points to consider:\n\n### Eligibility Requirements:\n- **Notification**: Inform your manager as soon as possible on the first day of absence.\n- **Documentation**: If your absence extends beyond 2 days, you will need a medical certificate from your General Physician.\n\n### Limitations:\n- **Sick Leave Balance**: Ensure you have sufficient sick leave available. If you exhaust your Sick/Casual leaves, the leave taken during sickness may be adjusted against your earned leaves.\n- **Approval**: The timing of your leave should be agreed upon with your manager as early as possible.\n\nFor more details, refer to the company's Leave Guidelines or contact your designated HR representative."