In [1]:
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.embeddings import HuggingFaceEmbeddings

In [2]:
# Load PDF file from data path
loader = DirectoryLoader('Data/Llama2-Data',
                         glob="*.pdf",
                         loader_cls=PyPDFLoader)
documents = loader.load()

In [3]:
# Split text from PDF into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,
                                               chunk_overlap=50)
texts = text_splitter.split_documents(documents)

In [4]:
# Load embeddings model
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
                                   model_kwargs={'device': 'cpu'})

# Build and persist FAISS vector store
vectorstore = FAISS.from_documents(texts, embeddings)
vectorstore.save_local('vectorstore/db_faiss')

In [5]:
qa_template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Context: {context}
Question: {question}
Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [6]:
from langchain.llms import CTransformers

# Location of downloaded GGML model
model_location = 'models/models--TheBloke--Llama-2-7B-Chat-GGML/snapshots/76cd63c351ae389e1d4b91cab2cf470aab11864b/llama-2-7b-chat.ggmlv3.q4_0.bin'
# Local CTransformers wrapper for Llama-2-7B-Chat
llm = CTransformers(model=model_location,
                    model_type='llama', # Model type Llama
                    config={'max_new_tokens': 256,
                            'temperature': 0.01})

In [7]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# Wrap prompt template in a PromptTemplate object
def set_qa_prompt():
    prompt = PromptTemplate(template=qa_template,
                            input_variables=['context', 'question'])
    return prompt


# Build RetrievalQA object
def build_retrieval_qa(llm, prompt, vectordb):
    dbqa = RetrievalQA.from_chain_type(llm=llm,
                                       chain_type='stuff',
                                       retriever=vectordb.as_retriever(search_kwargs={'k':2}),
                                       return_source_documents=True,
                                       chain_type_kwargs={'prompt': prompt})
    return dbqa


# Instantiate QA object
def setup_dbqa():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
                                       model_kwargs={'device': 'cpu'})
    vectordb = FAISS.load_local('vectorstore/db_faiss', embeddings)
    qa_prompt = set_qa_prompt()
    dbqa = build_retrieval_qa(llm, qa_prompt, vectordb)

    return dbqa

In [8]:
import timeit

start = timeit.default_timer() # Start timer
question = "How much is the minimum gurantee payable by adidas?"

# Setup QA object
dbqa = setup_dbqa()

# Parse input from argparse into QA object
response = dbqa({'query': question})
end = timeit.default_timer() # End timer

# Print document QA response
print(f'\nAnswer: {response["result"]}')
print('='*50) # Formatting separator

# Process source documents for better display
source_docs = response['source_documents']
for i, doc in enumerate(source_docs):
    print(f'\nSource Document {i+1}\n')
    print(f'Source Text: {doc.page_content}')
    print(f'Document Name: {doc.metadata["source"]}')
    print(f'Page Number: {doc.metadata["page"]}\n')
    print('='* 50) # Formatting separator
    
# Display time taken for CPU inference
print(f"Time to retrieve response: {end - start}")


Answer: The minimum guarantee payable by adidas over the term of the agreement is £750 million, subject to certain adjustments.

Source Document 1

Source Text: The minimum guarantee payable by adidas over the term of our agreement with them is equal to
Document Name: Data\Llama2-Data\manu-20f-2022-09-24.pdf
Page Number: 84


Source Document 2

Source Text: Pursuant to our contract with adidas, which began on 1 August 2015, the minimum guarantee payable
by adidas over the 10-year term of the agreement is equal to £750 million, subject to certain adjustments.See “Item 4. Information on the Company—Revenue Sectors—Commercial—Retail, Merchandising,Apparel & Product Licensing” for additional information regarding our agreement with adidas.
We also maintain a mixture of long-term debt and capacity under our revolving facilities in order to
Document Name: Data\Llama2-Data\manu-20f-2022-09-24.pdf
Page Number: 74

Time to retrieve response: 109.0811162


In [None]:
start = timeit.default_timer() # Start timer
# question = "How much was the total revenue generated?"
question = "How many Twitter followers?"

# Parse input from argparse into QA object
response = dbqa({'query': question})
end = timeit.default_timer() # End timer

# Print document QA response
print(f'\nAnswer: {response["result"]}')
print('='*50) # Formatting separator

# Process source documents for better display
source_docs = response['source_documents']
for i, doc in enumerate(source_docs):
    print(f'\nSource Document {i+1}\n')
    print(f'Source Text: {doc.page_content}')
    print(f'Document Name: {doc.metadata["source"]}')
    print(f'Page Number: {doc.metadata["page"]}\n')
    print('='* 50) # Formatting separator
    
# Display time taken for CPU inference
print(f"Time to retrieve response: {end - start}")