In [None]:
import os
from dotenv import load_dotenv
import gradio as gr

from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import MarkdownTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate
from langchain_chroma import Chroma
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.retrievers import BaseRetriever
from langchain_core.language_models import BaseChatModel
from langchain_core.vectorstores import VectorStoreRetriever
from sentence_transformers import CrossEncoder
from langchain_core.output_parsers import StrOutputParser

In [2]:
db_name = "../vector_db"

In [3]:
load_dotenv()
API_KEY = os.getenv('GEMINI_API_KEY')

## Setup Knowledge base and Meta Data

In [4]:
knowledge_base_dir = '../knowledge_base'
current_doc_type = os.path.basename(os.path.normpath(knowledge_base_dir))
print(f"Loading .md files from: '{knowledge_base_dir}' with doc_type: '{current_doc_type}'")

Loading .md files from: '../knowledge_base' with doc_type: 'knowledge_base'


In [5]:
def add_metadata(document, doc_type):
    document.metadata["doc_type"] = doc_type
    return document

In [6]:
documents = []
try:
    # Initialize DirectoryLoader to load .md files from the knowledge_base_dir
    loader = DirectoryLoader(
        path=knowledge_base_dir,          # Path to the directory to search
        glob="*.md",                      # Pattern to match files (e.g., files.md)
        loader_cls=TextLoader,            # Loader to use for .md files
        loader_kwargs={'encoding': 'utf-8'}, # Arguments for TextLoader
        show_progress=True,               # Optional: shows a progress bar
        use_multithreading=False,         # Optional: set to True for potential speedup with many files
                                          # recursive=False by default, which is what we want here.
    )

    # Load the documents
    folder_documents = loader.load()

    if folder_documents:
        print(f"Successfully loaded {len(folder_documents)} document(s) from '{knowledge_base_dir}'.")
        # Add metadata to each loaded document
        for doc in folder_documents:
            documents.append(add_metadata(doc, current_doc_type))
    else:
        print(f"No .md documents found in '{knowledge_base_dir}'.")

except FileNotFoundError:
    print(f"Error: The directory '{knowledge_base_dir}' was not found. Please check the path.")
except Exception as e:
    print(f"An error occurred during document loading: {e}")

100%|██████████| 5/5 [00:00<00:00, 62.25it/s]

Successfully loaded 5 document(s) from '../knowledge_base'.





## Chunking

In [7]:
# Split documents into smaller chunks
text_splitter = MarkdownTextSplitter(chunk_size=1000, chunk_overlap=100)
chunked_documents = text_splitter.split_documents(documents)

print(f"Total number of chunks: {len(chunked_documents)}")
print(f"Document types found: {set(doc.metadata['doc_type'] for doc in documents)}")

Total number of chunks: 32
Document types found: {'knowledge_base'}


## Embedding

In [8]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

# Create vectorstore
vectorstore = Chroma.from_documents(documents=chunked_documents, embedding=embeddings, persist_directory=db_name)
print(f"Vectorstore created with {vectorstore._collection.count()} documents")

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


Vectorstore created with 32 documents


## Setup LLM and retriever

In [9]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", temperature=0.0, api_key=API_KEY)
# set up the conversation memory for the chat
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
# the retriever is an abstraction over the VectorStore that will be used during RAG
retriever = vectorstore.as_retriever(search_kwargs={"k": 10})

  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)


## setup query expansion and cross-encoder re-ranking

In [None]:
class AdvancedRetriever(BaseRetriever):
    """
    A retriever that combines query expansion and cross-encoder re-ranking.
    """
    vectorstore_retriever: VectorStoreRetriever
    llm: BaseChatModel
    top_k: int = 5

    def _get_relevant_documents(self, query: str, *, run_manager):
        reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
        expansion_prompt = ChatPromptTemplate.from_template(
            """You are an AI assistant. Your task is to take a user's question and generate 3 different versions of it to improve document retrieval.
            Provide only the reformulated questions, separated by newlines.
            Original Question: {question}"""
        )
        expansion_chain = expansion_prompt | self.llm | StrOutputParser()
        expanded_queries_str = expansion_chain.invoke({"question": query}, config={"run_name": "QueryExpansion"})
        all_queries = [query] + expanded_queries_str.strip().split('\n')
        
        all_retrieved_docs = []
        for q in all_queries:
            # Retrieve documents for each expanded query
            all_retrieved_docs.extend(self.vectorstore_retriever.get_relevant_documents(q))

        unique_docs_dict = {doc.page_content: doc for doc in all_retrieved_docs}
        unique_docs = list(unique_docs_dict.values())
        
        if not unique_docs:
            return []

        doc_texts = [doc.page_content for doc in unique_docs]
        query_doc_pairs = [[query, doc_text] for doc_text in doc_texts]
        
        scores = reranker.predict(query_doc_pairs)
        
        doc_scores = list(zip(unique_docs, scores))
        doc_scores.sort(key=lambda x: x[1], reverse=True)
        
        reranked_docs = [doc for doc, score in doc_scores[:self.top_k]]
        
        return reranked_docs

In [11]:
advanced_retriever = AdvancedRetriever(
    vectorstore_retriever=retriever, 
    llm=llm,
    top_k=5
)

## setup QA prompt template

In [12]:
# define system prompt content
system_prompt_content = """
You are a specialized AI assistant. I am your builder, and I have a knowledge base that contains information about my qualifications, projects, and other relevant details. Your role is to assist recruiters and HR professionals in understanding my background and expertise.
You will answer questions about me, your builder, you can refer to me as 'My Builder', based solely on the information provided in the context documents from my knowledge base. You must not use any external knowledge or make assumptions beyond what is explicitly stated in those documents.
Your dedicated role is to assist recruiters and HR professionals. In your conversation, the person you are talking to (the recruiter or HR professional) will be referred to as 'you'.

It is absolutely crucial to understand that 'Builder' IS NOT the person you are currently interacting with.
Therefore, you MUST NOT use phrases that equate or confuse 'Builder' with 'you' (the recruiter/HR). For example, do not say 'Builder (you)', 'your projects as Builder', or any similar phrasing that implies the recruiter is Builder. 'Builder' is strictly the subject of the knowledge base.

Your answers must be based STRICTLY and ONLY on the information contained in the provided context documents from Builder's knowledge base.
When discussing Builder's qualifications, projects, or any other information, consistently use the name 'Builder' or 'My Builder'. For example: 'Builder has expertise in...' or 'This project was undertaken by Builder.'

If the information needed to answer a question is not present in the provided context, you MUST clearly state: 'I am unable to find that specific information about Builder in the provided documents.'
Under no circumstances should you use external knowledge, make assumptions, or generate information not explicitly present in the context.
Your responses must be concise, factual, and maintain a professional and helpful tone when addressing the recruiter or HR professional (i.e., 'you').
If the provided context is empty or entirely irrelevant to the question asked, respond with: 'I cannot answer that question based on the provided documents about Builder.'
"""
# define prompt template
# The chat_history is handled by ConversationalRetrievalChain to condense the question before this prompt.
qa_prompt_template = ChatPromptTemplate.from_messages([
    ("system", system_prompt_content),
    ("human", "Given the following context and question, please provide an answer.\n\nContext:\n{context}\n\nQuestion:\n{question}")
])

## Setup Condense Question Prompt

In [14]:
# Create a custom prompt template
condense_question_template = """
Given the following chat history and a follow-up question, rephrase the follow-up question to be a standalone question, in its original language.

Chat History:
{chat_history}

Follow Up Input: {question}
Standalone question:"""

# Instantiate the prompt template
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(condense_question_template)

In [15]:
# putting it together: set up the conversation chain with the LLM, the vector store and memory
conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm, 
    retriever=advanced_retriever, 
    memory=memory, 
    combine_docs_chain_kwargs={"prompt": qa_prompt_template},
    condense_question_prompt=CONDENSE_QUESTION_PROMPT,
)

## Setup Gradio Chat

In [16]:
def chat(question, history):
    result = conversation_chain.invoke({"question": question})
    return result["answer"]

In [17]:
screen_fit_css = """
#screen_fit_chatbot {
    height: 78vh !important; /* Main height for the chatbot message display area */
    display: flex !important; /* Use flexbox for internal layout */
    flex-direction: column !important;
}
#screen_fit_chatbot > .wrap { /* Targets the inner container that holds messages */
    flex-grow: 1 !important; /* Allows this area to expand to fill the specified height */
    overflow-y: auto !important; /* Makes the message area scrollable if content exceeds height */
    min-height: 0 !important; /* Important for flex-grow to work correctly in various content scenarios */
}
"""

In [18]:
initial_bot_greeting = "Hello! You can ask me questions about my builder's experiences and projects. What would you like to know?"
custom_chatbot_instance = gr.Chatbot(
    elem_id="screen_fit_chatbot",
    value=[{"role": "assistant", "content": initial_bot_greeting}], # Initial greeting in "messages" format
    label="RAG Chatbot",
    bubble_full_width=True,
    type="messages"  # Explicitly set the type for the Chatbot instance
)

In [19]:
view = gr.ChatInterface(
    fn=chat,
    chatbot=custom_chatbot_instance,
    type="messages",
    css=screen_fit_css
).launch(inbrowser=True, debug=True)

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


  all_retrieved_docs.extend(self.vectorstore_retriever.get_relevant_documents(q))


Keyboard interruption in main thread... closing server.
