#Import the required libraries




In [None]:
from langchain_community.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import VectorDBQA, RetrievalQA
from langchain_openai import ChatOpenAI
from langchain.document_loaders import TextLoader, PyPDFLoader
import openai
import os
from dotenv import load_dotenv
load_dotenv()

# Loading Documents


In [None]:
loader = PyPDFLoader('https://www.nestle.com/sites/default/files/asset-library/documents/jobs/the_nestle_hr_policy_pdf_2012.pdf')
documents = loader.load()

# initialize the text splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, # Maximum size of each chunk
    chunk_overlap=200 # Overlap between chunks to preserve context
)

# Split the loaded documents
split_documents = text_splitter.split_documents(documents)

# verify the split documents
print(f"Number of split documents: {len(split_documents)}")
# preview the last 3 chunks
for i, doc in enumerate(split_documents[-3:]):  # Preview last 3 chunks
    print(f"Chunk {i+1}: {doc.page_content[:200]}...")  # Print first 200 characters of each chunk


 # Creating Vector Representation of Texts

In [None]:
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
# number of chunks for cheaper embedding
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
vectordb = Chroma.from_documents(texts, embeddings)
retriever = vectordb.as_retriever(search_kwargs={"k": 3})
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)




# Setting Up Question-Answering System

In [None]:
qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(model_name="gpt-3.5-turbo"), chain_type="stuff", retriever=vectordb.as_retriever())
query = "When was the Nestlé HR Policy last updated?"

result = qa.invoke(query)
print("Query:", query)
print("Answer:", result['result'])



# Defining Prompt Template

In [None]:
from langchain import PromptTemplate

# Define the prompt template in English
template = """
I am a HR helpful assistant. Please answer the following question in English.
Question: {question}
Answer:
"""

# Create the PromptTemplate instance with the modified English template
prompt = PromptTemplate(
    input_variables=["question"],
    template=template,
)




#  Building Chat Interface with Gradio and Launching the Chat Interface

In [None]:
import gradio as gr

def add_text(history, text):
    # Add user message
    history = history + [{"role": "user", "content": text}]
    return history, ""

def bot(history):
    # Get the latest user query
    query = history[-1]["content"]
    query = prompt.format(question=query)
    answer = qa.run(query)

    # Append bot response instead of replacing user
    history = history + [{"role": "assistant", "content": answer}]
    return history

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(
        value=[],
        elem_id="chatbot",
        type="messages",
        height=400
    )

    with gr.Row():
        with gr.Column(scale=0.6):
            txt = gr.Textbox(
                show_label=False,
                placeholder="Enter text and press enter",
                container=False
            )

    txt.submit(add_text, [chatbot, txt], [chatbot, txt]).then(
        bot, chatbot, chatbot
    )

demo.launch(share=True)
