In [2]:
import os
import openai
import gradio as gr
import traceback
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# ‚úÖ Step 1: Set Up OpenAI API Key
openai_api_key = os.getenv("OPENAI_API_KEY")  # Read from environment
if not openai_api_key:
    raise ValueError("‚ùå ERROR: OpenAI API Key not found. Please set it in environment variables.")

# ‚úÖ Step 2: Load and Process Nestl√© HR Policy PDF
pdf_path = "nestle_hr_policy.pdf"
if not os.path.exists(pdf_path):
    raise FileNotFoundError(f"‚ùå ERROR: PDF file '{pdf_path}' not found in the directory.")

loader = PyPDFLoader(pdf_path)
documents = loader.load()

# ‚úÖ Step 3: Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
doc_chunks = text_splitter.split_documents(documents)
chunk_texts = [chunk.page_content for chunk in doc_chunks]

# ‚úÖ Step 4: Initialize OpenAI embeddings
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

# ‚úÖ Step 5: Create or Load FAISS Index
faiss_path = "faiss_index"
if os.path.exists(faiss_path):
    vectorstore = FAISS.load_local(faiss_path, embeddings, allow_dangerous_deserialization=True)
    print("‚úÖ FAISS index loaded from local storage.")
else:
    vectorstore = FAISS.from_texts(chunk_texts, embeddings)
    vectorstore.save_local(faiss_path)
    print("‚úÖ FAISS index created and saved locally.")

# ‚úÖ Debugging: Ensure FAISS has vectors
print(f"‚ö° FAISS Index Size: {vectorstore.index.ntotal}")  # Should be > 0

# ‚úÖ Step 6: Function to Retrieve Answers
def get_answer(query):
    try:
        print(f"üîπ Querying FAISS: {query}")

        if vectorstore.index.ntotal == 0:
            return "‚ùå Error: No documents found in FAISS. Please add documents first."

        query_embedding = embeddings.embed_query(query)
        docs = vectorstore.similarity_search_by_vector(query_embedding, k=5)

        if not docs:
            return "‚ùå Error: No relevant documents found in FAISS."

        doc_texts = " ".join([doc.page_content for doc in docs])
        if not doc_texts.strip():
            return "‚ùå Error: Retrieved document is empty. Please check FAISS data."

        print(f"‚úÖ Retrieved Context: {doc_texts[:500]}")  # Print first 500 characters

        response = openai.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are an AI assistant for Nestl√© HR policies."},
                {"role": "user", "content": f"Context: {doc_texts}\n\nQuestion: {query}"}
            ]
        )

        return response.choices[0].message.content  # ‚úÖ Correct access method

    except Exception as e:
        error_message = f"‚ùå Error: {str(e)}\n{traceback.format_exc()}"
        print(error_message)
        return error_message

# ‚úÖ Step 7: Gradio Interface with Error Handling
def chatbot(query):
    try:
        return get_answer(query)
    except Exception as e:
        error_message = f"‚ùå Error: {str(e)}\n{traceback.format_exc()}"
        print(error_message)
        return error_message

# ‚úÖ Step 8: Launch Gradio UI
iface = gr.Interface(fn=chatbot, inputs="text", outputs="text", title="Nestl√© HR Assistant")
iface.launch()


‚úÖ FAISS index loaded from local storage.
‚ö° FAISS Index Size: 35
* Running on local URL:  http://127.0.0.1:7864

To create a public link, set `share=True` in `launch()`.




üîπ Querying FAISS: summarize
‚úÖ Retrieved Context: inspired by sound judgement, compliance with 
local market laws and common sense, taking into 
account the specific context. Its spirit should be 
respected under all circumstances and could be 
summarised in one sentence: At Nestl√© we put 
people at the centre of everything we do.
Jean-Marc Duvoisin
Deputy Executive Vice President
 Introduction HR has adopted a streamlined approach to 
ensuring functional leadership and the highest 
level of focus, clarity, and efficiency. Our structure 
is bas
