In [18]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.chains import RetrievalQA
from dotenv import load_dotenv
load_dotenv("untitled.env")  

import os
token = os.getenv("HUGGINGFACEHUB_API_TOKEN")

In [20]:
loader = PyPDFLoader('the_nestle_hr_policy_pdf_2012.pdf')
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
chunks = text_splitter.split_documents(docs)

In [21]:
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(chunks, embedding_model)
retriever = vectorstore.as_retriever()

In [24]:
model_id = "mistralai/Mistral-7B-Instruct-v0.1"
token = os.getenv("HUGGINGFACEHUB_API_KEY")
tokenizer = AutoTokenizer.from_pretrained(model_id, token=token, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_id, token=token, device_map="auto", trust_remote_code=True)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the disk and cpu.
Device set to use cpu


In [26]:
from langchain_huggingface import HuggingFacePipeline
from langchain.chains import RetrievalQA

llm = HuggingFacePipeline(pipeline=pipe)
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

In [28]:
def ask_question(question):
    return qa_chain.invoke({"query": question})['result']

In [None]:
question = "What is Nestlé’s policy on diversity?"
print("Q:", question)
print("A:", ask_question(question))

Q: What is Nestlé’s policy on diversity?


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
