In [1]:
!pip install langchain
!pip install pypdf
!pip install sentence_transformers
!pip install faiss-gpu
!pip install wget
!pip install ctransformers

Collecting langchain
  Downloading langchain-0.0.285-py3-none-any.whl (1.7 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.7 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.1/1.7 MB[0m [31m3.1 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━[0m [32m1.0/1.7 MB[0m [31m18.0 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━[0m [32m1.0/1.7 MB[0m [31m18.0 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.6/1.7 MB[0m [31m13.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.6.0,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.5.14-py3-none-any.whl (26 kB)
Collecting langsmith<0.1.0,>=

In [2]:
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain import PromptTemplate
from langchain.chains import RetrievalQA

import wget
import argparse
import timeit

In [3]:
# Load PDF file from data path
loader = DirectoryLoader('/content/drive/MyDrive/Generative AI',
                         glob="*.pdf",
                         loader_cls=PyPDFLoader)
documents = loader.load()

In [4]:
# Split text from PDF into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,
                                               chunk_overlap=50)
texts = text_splitter.split_documents(documents)

In [5]:
# Load embeddings model
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
                                   model_kwargs={'device': 'cpu'})

Downloading (…)e9125/.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)7e55de9125/README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading (…)55de9125/config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)125/data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)e9125/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

Downloading (…)9125/train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

Downloading (…)7e55de9125/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)5de9125/modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [6]:
# Build and persist FAISS vector store
vectorstore = FAISS.from_documents(texts, embeddings)
vectorstore.save_local('vectorstore/db_faiss')

In [7]:
qa_template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [8]:
# file_from = 'https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q2_K.bin'
# file_to = '/content/drive/MyDrive/Llama'
# wget.download(file_from,file_to)

In [9]:
# File: llm.py
from langchain.llms import CTransformers

# Local CTransformers wrapper for Llama-2-7B-Chat
llm = CTransformers(model='/content/drive/MyDrive/Llama/llama-2-7b-chat.ggmlv3.q2_K.bin', # Location of downloaded GGML model
                   model_type='llama',# Model type Llama
                  gpu_layers=50,
                   config={'max_new_tokens': 256,
                           'temperature': 0.01})

In [10]:
# Wrap prompt template in a PromptTemplate object
def set_qa_prompt():
    prompt = PromptTemplate(template=qa_template,
                            input_variables=['context', 'question'])
    return prompt

In [11]:
# Build RetrievalQA object
def build_retrieval_qa(llm, prompt, vectordb):
    dbqa = RetrievalQA.from_chain_type(llm=llm,
                                       chain_type='stuff',
                                       retriever=vectordb.as_retriever(search_kwargs={'k':2}),
                                      return_source_documents=True,
                                       chain_type_kwargs={'prompt': prompt})
    return dbqa

In [12]:
# Instantiate QA object
def setup_dbqa():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
                                       model_kwargs={'device': 'cpu'})
    vectordb = FAISS.load_local('vectorstore/db_faiss', embeddings)
    qa_prompt = set_qa_prompt()
    dbqa = build_retrieval_qa(llm, qa_prompt, vectordb)

    return dbqa

In [16]:
start = timeit.default_timer() # Start timer
# Setup QA object
dbqa = setup_dbqa()
# Parse input from argparse into QA object
response = dbqa({'query': "What is kendra?"})
end = timeit.default_timer() # End timer

# Print document QA response
print(f'\nAnswer: {response["result"]}')
print('='*50) # Formatting separator

# Process source documents for better display
source_docs = response['source_documents']
for i, doc in enumerate(source_docs):
  print(f'\nSource Document {i+1}\n')
  print(f'Source Text: {doc.page_content}')
  print(f'Document Name: {doc.metadata["source"]}')
  print(f'Page Number: {doc.metadata["page"]}\n')
  print('='* 50) # Formatting separator
# Display time taken for CPU inference
print(f"Time to retrieve response: {end - start}")


Answer: Kendra is an AI-powered search and question answering service provided by AWS for enterprise customers. It allows its customers to power natural language based searching across their own data, and has been tool to support COVID-19 related search using the CORD-19 article corpus.

Source Document 1

Source Text: overview of this architecture.
Amazon Kendra
Amazon Kendra4is a semantic search and question an-
swering service provided by AWS for enterprise cus-
tomers. Kendra allows its customers to power natural lan-
guage based searching across their own data. As response
to the worldwide COVID-19 pandemic, Kendra has also
been tooled to support COVID-19 related search using the
CORD-19 article corpus. The end-to-end Kendra system
consists of several components.
Document Name: /content/drive/MyDrive/Generative AI/aws-cord-19-search-a-neural-search-engine-for-covid-19-literature.pdf
Page Number: 1


Source Document 2

Source Text: prehension model.
4https://aws.amazon.com/kendra/

In [15]:
# from langchain.chains import LLMChain
# llm_chain = LLMChain(prompt=prompt,llm = llm)
# llm_chain.run("Who is ")

TypeError: ignored