In [1]:
from qdrant_client import models, QdrantClient
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.llms.huggingface_pipeline import HuggingFacePipeline
from langchain.document_loaders import PyPDFLoader, PyPDFDirectoryLoader
from langchain.vectorstores.qdrant import Qdrant
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain import PromptTemplate
from peft import PeftModel, PeftConfig
#
from tqdm.auto import tqdm
from uuid import uuid4
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
from sentence_transformers import SentenceTransformer, util
import pandas as pd

#
import os
import random
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load the document that you need to parse, please change the location to where the pdf resides

# Load 1 PDF file
loader = PyPDFLoader("/mnt/data/smuckers_poc/RAG/2024-First-Quarter-Results.pdf")
# or load an entire folder
# loader = PyPDFDirectoryLoader("/mnt/data/RAG/")
data = loader.load_and_split(RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0))

In [3]:
print(f"There are {len(data)} pages in the document")

There are 37 pages in the document


In [4]:
# Pick a sample page
print(data[random.randint(0, len(data))])

page_content="The following tables provide a reconciliation of the Company's fiscal 2024 guidance for estimated adjusted earnings per share and\nfree cash flow.\n \nYear Ending April 30, 2024\nLow\nHigh\nNet income per common share – assuming dilution reconciliation:\nNet income per common share – assuming dilution\n$8.01\n$8.41\nChange in net cumulative unallocated derivative gains and losses \n(A)\n0.12\n0.12\nAmortization\n1.16\n1.16\nGain on divestiture\n(0.01)\n(0.01)\nUnrealized loss (gain) on investment in equity securities\n (B)\n0.16\n0.16\nPension plan termination settlement charge\n0.02\n0.02\nAdjusted effective income tax rate impact\n(0.01)\n(0.01)\nAdjusted earnings per share\n$9.45\n$9.85\n(A)\n We are unable to project derivative gains and losses on a forward-looking basis as these will vary each quarter based on\nmarket conditions and derivative positions taken. The change in unallocated derivative gains and losses in the table above reflects" metadata={'source': '/mnt

In [5]:
metadatas = []
texts = []
for row in data:
  metadatas.append(row.metadata)
  texts.append(row.page_content)
print(len(metadatas),len(texts))

37 37


In [6]:
prompt_template = """Use the following pieces of context to answer the question enclosed within  3 backticks at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
Please provide an answer which is factually correct and based on the information retrieved from the vector store.
Please also mention any quotes supporting the answer if any present in the context supplied within two double quotes "" .

{context}

QUESTION:```{question}```
ANSWER:
"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["context","question"])
#


In [7]:
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': True}

In [8]:
embedding_model_name = "BAAI/bge-small-en"
os.environ['SENTENCE_TRANSFORMERS_HOME'] = '/mnt/data/smuckers_poc/model_cache/'
embeddings = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-small-en",
                                      model_kwargs=model_kwargs,
                                      encode_kwargs=encode_kwargs
                                     )

In [9]:
# doc_store = Qdrant.from_texts(texts,
#                               metadatas=metadatas,
#                               embedding=embeddings,
#                               location=":memory:",
#                               collection=f"{embedding_model_name}_press_release")

In [10]:
doc_store = Qdrant.from_texts(texts,
                              metadatas=metadatas,
                              embedding=embeddings,
                              path="mnt/data/smuckers_poc/local_qdrant/",
                              collection=f"{embedding_model_name}_press_release")

In [11]:
chain_type_kwargs = {"prompt": PROMPT}


################################################################################
# bitsandbytes parameters
################################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

model_id = "NousResearch/Llama-2-7b-chat-hf"

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    cache_dir="/mnt/data/smuckers_poc/model_cache/",
    quantization_config=bnb_config,
    device_map='auto'
)

# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
tokenizer.pad_token_id = model.config.eos_token_id

Loading checkpoint shards: 100%|██████████| 2/2 [00:45<00:00, 22.91s/it]


In [12]:
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=100)
rag_llm = HuggingFacePipeline(pipeline=pipe)
    
qa_chain = RetrievalQA.from_chain_type(llm=rag_llm,
                                       chain_type="stuff",
                                       chain_type_kwargs={"prompt": PROMPT},
                                       retriever=doc_store.as_retriever(search_kwargs={"k": 5}),
                                       return_source_documents=True
                                      )

In [13]:
user_question = input("Please provide your question here :")
result = qa_chain(user_question)

Please provide your question here : What were net sales ?




In [14]:
result['result']

'Net sales for the company were $1,805.2 million in 2022, an increase of 49% compared to the prior year, driven by a 43% favorable impact from Jif peanut butter primarily due to lapping the product recall in the prior year. Excluding noncomparable net sales in the prior year of $374.1 million from the divested pet food brands and $3.8 million'