# RAG using PDFs and open source LLM models with Ollama

Basic setup

In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

# MODEL = "<MODEL_NAME>"
# MODEL = "mistral-large"
MODEL = "llama3.1"

## Creating an instance to use the model

In [2]:
from langchain_community.llms import Ollama

model = Ollama(model=MODEL)

model.invoke("Tell me a joke")

"Here's one:\n\nWhat do you call a fake noodle?\n\nAn impasta!\n\nHope that made you laugh! Do you want to hear another one?"

## Creating a chain

In [3]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

chain = model | parser
chain.invoke("Tell me a joke")

"Here's one:\n\nWhat do you call a fake noodle?\n\nAn impasta!\n\nHope that made you laugh! Do you want to hear another one?"

## Reading the PDF files

In [4]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("Quantum computing - Wikipedia.pdf")
pages = loader.load_and_split()
pages

[Document(metadata={'source': 'Quantum computing - Wikipedia.pdf', 'page': 0}, page_content='Quantum System One, a quantum\ncomputer by IBM from 2019 with 20\nsuperconducting qubits[ 1 ]\nQuantum computing\nA quantum computer  is a computer  that exploits\nquantum mechani cal phenom ena. On small scales, physical\nmatter exhibits properties of both particles and waves , and\nquantum computing leverages this behavior using\nspecialized hardw are. Classical physics  cannot explain the\noperation of these quantum devices, and a scalable quantum\ncomputer could perform some calculations exponentially\nfaster than any modern "classical" computer. In particular, a\nlarge-scale quantum computer could break widely used\nencryption schemes  and aid physicists in performing\nphysical simulatio ns; however, the current state of the art is\nlargely experimental and impractica l, with several obstacles\nto useful applications.\nThe basic unit of information  in quant um comp uting, the qubit  (or "

## Creating the prompt template

In [5]:
from langchain.prompts import PromptTemplate

template = """
Answer the question based on the context below. If you can't
answer the question, reply "I don't know".

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)
prompt.format(context="Here is some context", question="Here is a question")

'\nAnswer the question based on the context below. If you can\'t\nanswer the question, reply "I don\'t know".\n\nContext: Here is some context\n\nQuestion: Here is a question\n'

Continue building the chain

In [6]:
chain = prompt | model | parser

chain.input_schema.schema()

{'title': 'PromptInput',
 'type': 'object',
 'properties': {'context': {'title': 'Context', 'type': 'string'},
  'question': {'title': 'Question', 'type': 'string'}},
 'required': ['context', 'question']}

In [7]:
chain.invoke(
    {
        "context": "Quantum computing is bananas",
        "question": "What is bananas?"
    }
)

"I don't know."

## Setup vector store for the embeddings - In Memory

This is only for the exercise. In actual production or a working solution we will 
use something more durable or a service like **Pinecone**

It's important to notice that the `embeddings` model has to match the LLM model used. In this case `llama3.1`.

In [10]:
from langchain_community.vectorstores import DocArrayInMemorySearch
from langchain_community.embeddings import OllamaEmbeddings

embeddings = OllamaEmbeddings(model=MODEL)
vectorstore = DocArrayInMemorySearch.from_documents(pages, embedding=embeddings)

In [13]:
retriever = vectorstore.as_retriever()
retriever.invoke("Quantum computing principles")

[Document(metadata={'source': 'Quantum computing - Wikipedia.pdf', 'page': 24}, page_content='(4): 64–70. arXiv :1912.01299  (https://arxiv .org/abs/1912.01299) . doi:10.1038/s41928-020-00528-y\n(https://doi.org/10.1038%2Fs41928-020-00528-y) . S2CID \xa0231715555  (https://api.semanticscholar .\norg/CorpusID:231715555) .7/28/24, 11:08 PM Quantum computing - Wikipedia\nhttps://en.wikipedia.org/wiki/Quantum_computing 25/33'),
 Document(metadata={'source': 'Quantum computing - Wikipedia.pdf', 'page': 32}, page_content='Quantum computing for the determined  (https://www .youtube.com/playlist?list=PL1826E60FD05B\n44E4)  – 22 video lectures by Michael Nielsen\nVideo Lectures  (http://www .quiprocone.org/Protected/DD_lectures.htm)  by David Deutsch\nLectures at the Institut Henri Poincaré (slides and videos)  (https://web.archive.org/web/201603031\n83533/http://www .quantware.ups-tlse.fr/IHP2006/)\nOnline lecture on An Introduction to Quantum Computing, Edward Gerjuoy (2008)  (https://web.arc

In [14]:
from operator import itemgetter

chain = (
    { 
        "context": itemgetter("question") | retriever, 
        "question": itemgetter("question") 
    } 
    | prompt
    | model
    | parser
)

chain.invoke({ "question": "What is quantum computing?"})

'Quantum computing refers to a new paradigm in computation that uses the principles of quantum mechanics to perform operations on data. It differs from classical computers, which use bits (0s and 1s) to process information.\n\nIn quantum computing:\n\n1. **Bits are replaced with qubits**: Qubits can exist as both 0 and 1 at the same time, thanks to a phenomenon called superposition. This means that a single qubit can represent multiple possibilities simultaneously.\n2. **Quantum gates and entanglement**: Quantum operations (gates) and quantum states (entanglements) are used to manipulate and combine qubits.\n\nThe potential benefits of quantum computing include:\n\n1. **Exponential scaling**: Quantum computers can process vast amounts of data exponentially faster than classical computers for certain problems.\n2. **New approaches to optimization**: Quantum algorithms can tackle complex optimization tasks more efficiently than traditional methods.\n3. **Simulation capabilities**: Quantu