In [74]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
import textwrap

In [75]:
def load_pdf_data(file_path):
    # Creating a PyMuPDFLoader object with file_path
    loader = PyMuPDFLoader(file_path=file_path)
    
    # loading the PDF file
    docs = loader.load()
    
    # returning the loaded document
    return docs

In [76]:
# Responsible for splitting the documents into several chunks
def split_docs(documents, chunk_size=1024, chunk_overlap=20):
    
    # Initializing the RecursiveCharacterTextSplitter with
    # chunk_size and chunk_overlap
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
    
    # Splitting the documents into chunks
    chunks = text_splitter.split_documents(documents=documents)
    
    # returning the document chunks
    return chunks

In [77]:
# function for loading the embedding model
def load_embedding_model(model_path, normalize_embedding=True):
    return HuggingFaceEmbeddings(
        model_name=model_path,
        model_kwargs={'device':'cpu'}, # here we will run the model with CPU only
        encode_kwargs = {
            'normalize_embeddings': normalize_embedding # keep True to compute cosine similarity
        }
    )


# Function for creating embeddings using FAISS
def create_embeddings(chunks, embedding_model, storing_path="vectorstore"):
    # Creating the embeddings using FAISS
    vectorstore = FAISS.from_documents(chunks, embedding_model)
    
    # Saving the model in current directory
    vectorstore.save_local(storing_path)
    
    # returning the vectorstore
    return vectorstore

In [78]:
prompt = """
### System:
You are an AI Assistant that follows instructions extreamly well. \
Help as much as you can.
Please say I don't know if you don't know.

### User:
{prompt}

### Response:

"""

In [79]:
template = """
### System:
You are an respectful and honest assistant. You have to answer the user's \
questions using only the context provided to you. If you don't know the answer, \
just say you don't know. Don't try to make up an answer.

### Context:
{context}

### User:
{question}

### Response:
"""

In [80]:
def load_qa_chain(retriever, llm, prompt):
    return RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever, # here we are using the vectorstore as a retriever
        chain_type="stuff",
        return_source_documents=True, # including source documents in output
        chain_type_kwargs={'prompt': prompt} # customizing the prompt
    )

In [81]:
def get_response(query, chain):
    # Getting response from chain
    response = chain({'query': query})
    
    # Wrapping the text for better output in Jupyter Notebook
    wrapped_text = textwrap.fill(response['result'], width=100)
    print(wrapped_text)

In [82]:
from langchain.llms import Ollama
from langchain import PromptTemplate

In [83]:
# Loading orca-mini from Ollama
llm = Ollama(model="llama3:70b", temperature=0)

# Loading the Embedding Model
embed = load_embedding_model(model_path="all-MiniLM-L6-v2")

In [84]:
# loading and splitting the documents
docs = load_pdf_data(file_path="data/A Tutorial on Quantum Approximate Optimization Algorithm QAOA Fundamentals and Applications.pdf")
documents = split_docs(documents=docs)

# creating vectorstore
vectorstore = create_embeddings(documents, embed)

# converting vectorstore to a retriever
retriever = vectorstore.as_retriever()

In [85]:
# Creating the prompt from the template which we created before
prompt = PromptTemplate.from_template(template)

# Creating the chain
chain = load_qa_chain(retriever, llm, prompt)

In [86]:
get_response("Summarize the document", chain)

The document discusses the Quantum Approximate Optimization Algorithm (QAOA), a quantum gate model
algorithm that solves optimization problems by mapping the objective function to a Hamiltonian and
using quantum mechanical techniques to find the optimal solution. The algorithm has a simple and
monotonous structure and relatively good performance. The document also mentions various research
topics related to QAOA, including parameter setting, compilation, combinatorial optimization,
factoring, and deep learning.


In [87]:
get_response("What is QAOA", chain)

QAOA stands for Quantum Alternating Operator Ansatz. It's a quantum gate model algorithm that begins
with mapping an objective function to a Hamiltonian, bringing the problem into Hilbert space. Then,
it uses quantum mechanical techniques to obtain the expectation value of the Hamiltonian and
iteratively finds the parameters that optimize this expectation value.


In [89]:
def load_qa_test_chain(llm, prompt):
    return RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        return_source_documents=True, # including source documents in output
        chain_type_kwargs={'prompt': prompt} # customizing the prompt
    )

In [90]:
chain_test = load_qa_test_chain(llm, prompt)

ValidationError: 1 validation error for RetrievalQA
retriever
  field required (type=value_error.missing)