# **Question Anwering on PDF document using Llama 2 model**

In [1]:
#Install necessary dependancies
!pip install langchain
!pip install unstructured==0.7.12
!pip install sentence-transformers
!pip -qqq install bitsandbytes accelerate
!pip install streamlit
!pip install chromadb
!pip install --upgrade chromadb==0.3.29

Collecting langchain
  Downloading langchain-0.0.341-py3-none-any.whl (1.9 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.9 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.5/1.9 MB[0m [31m14.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m32.1 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.6.3-py3-none-any.whl (28 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)
Collecting langchain-core<0.0.7,>=0.0.6 (from langchain)
  Downloading langchain_core-0.0.6-py3-none-any.whl (174 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m174.2/174.2 kB[0m [31m27.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langsmith<0.1.0,>=0.0.63 (from langchain)
  Downloading langsmith-0.

In [35]:
#import all the necessary libraries
from langchain.document_loaders import PyPDFLoader, DirectoryLoader, PDFMinerLoader
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import pipeline
import torch
from langchain.prompts import PromptTemplate
from langchain.schema import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from torch import cuda, bfloat16
import transformers
import os
import chromadb
from chromadb.config import Settings
#device = torch.device('cpu')

embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
model_id = 'meta-llama/Llama-2-7b-chat-hf'
persist_directory = "db"
CHROMA_SETTINGS = Settings(
        chroma_db_impl='duckdb+parquet',
        persist_directory='db',
        anonymized_telemetry=False
)

In [36]:
#Extract the contents from the PDF file using this function
def extract_text_with_langchain_pdf(pdf_file):
    loader = UnstructuredFileLoader(pdf_file)
    documents = loader.load()
    #print(documents)
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
    texts = text_splitter.split_documents(documents)
    return texts

#Uploaded a sample PDF to do the Question Answering
text_with_langchain_files = extract_text_with_langchain_pdf("sample_pdf.pdf")

In [38]:
#Sample of the dataset
text_with_langchain_files[10]

Document(page_content='Python/Spark for data processing, and store the processed data in a PostgreSQL database. The goal is to empower supervisors and managers with actionable insights for optimizing yard operations and reducing time wastage with simple and intuitive product.', metadata={'source': 'sample_pdf.pdf'})

In [32]:
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

# begin initializing HF items, you need an access token
hf_auth = <hugging_face_token>
model_config = transformers.AutoConfig.from_pretrained(
    model_id, use_auth_token=hf_auth
)

#Model initialization
model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)

# enable evaluation mode to allow model inference
model.eval()

#Set up the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_auth)



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [39]:
#Convert the chunks into vectors and store it in ChromaDB
db = Chroma.from_documents(text_with_langchain_files, embeddings, persist_directory=persist_directory, client_settings=CHROMA_SETTINGS)
db.persist()
db=None

In [46]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

#Function to retrieve the answer for the given prompt
def generate_answer(prompt):
    llm = llm_pipeline()
    embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
    db = Chroma(persist_directory="db", embedding_function = embeddings, client_settings=CHROMA_SETTINGS)
    retriever = db.as_retriever()

    template = """Use the following pieces of context to answer the question at the end.
    If you don't know the answer, just say that you don't know, don't try to make up an answer.
    Use five sentences maximum and keep the answer as concise as possible.
    {context}
    Question: {question}
    Helpful Answer:"""
    rag_prompt_custom = PromptTemplate.from_template(template)

    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | rag_prompt_custom
        | llm
        | StrOutputParser())

    answer=rag_chain.invoke(prompt)
    return answer

In [47]:
#Function to setup the pipeline with model and tokenizer
def llm_pipeline():
    pipe = pipeline(
        'text-generation',
        model = model,
        tokenizer = tokenizer,
        max_length = 2000,
        do_sample = True,
        temperature = 0.2,
        top_k= 5
        #device=device
    )
    local_llm = HuggingFacePipeline(pipeline=pipe)
    return local_llm

# **Ask the Questions**

In [49]:
#The prompt call works here
prompt=input("Ask the Question \n")
answer=generate_answer(prompt)

print(answer)

Ask the Question 
What are benefits of using siteflux analytics?
 SiteFlux Analytics provides real-time tracking of worker movement and resource utilization, enabling supervisors and managers to optimize yard operations and make informed decisions. It also facilitates data visualization, presenting actionable insights in a user-friendly format. By harnessing the power of SiteFlux Analytics, stakeholders can identify operational inefficiencies, take corrective measures, and drive continuous improvement.
