### Creating a basic retriever chain 

In [12]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings
from langchain_chroma import Chroma
from langchain.prompts.chat import ChatPromptTemplate
from langchain.chat_models import ChatOllama
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain.schema.runnable import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

In [3]:
loader = PyPDFLoader('Files/attention.pdf')
documents = loader.load()

In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
final_documents = text_splitter.split_documents(documents)

In [5]:
embedding = OllamaEmbeddings(model='nomic-embed-text')

In [8]:
vectorstore = Chroma.from_documents(documents=final_documents, embedding=embedding)

In [19]:
template = """Answer the question based only on the following context:

{context}

Question: {input}
"""

prompt = ChatPromptTemplate.from_messages(
    [
        ('system', 'You are an helpful assistant'),
        ('user', template)
    ]
)

In [20]:
retriever = vectorstore.as_retriever()

llm = ChatOllama(model='llama3.2:3b')

### Method 2 Of Creating Chain

In [21]:
document_chain = create_stuff_documents_chain(llm, prompt)

retriever_chain = create_retrieval_chain(retriever, document_chain)

In [22]:
retriever_chain.invoke({'input' : 'what is self attention mechanism'})

{'input': 'what is self attention mechanism',
 'context': [Document(metadata={'page': 1, 'source': 'Files/attention.pdf'}, page_content='in the distance between positions, linearly for ConvS2S and logarithmically for ByteNet. This makes\nit more difficult to learn dependencies between distant positions [ 12]. In the Transformer this is\nreduced to a constant number of operations, albeit at the cost of reduced effective resolution due\nto averaging attention-weighted positions, an effect we counteract with Multi-Head Attention as\ndescribed in section 3.2.\nSelf-attention, sometimes called intra-attention is an attention mechanism relating different positions\nof a single sequence in order to compute a representation of the sequence. Self-attention has been\nused successfully in a variety of tasks including reading comprehension, abstractive summarization,\ntextual entailment and learning task-independent sentence representations [4, 27, 28, 22].\nEnd-to-end memory networks are based on

### Method 2 Of Creating Chain

In [13]:
chain = (
    {'context' : retriever, 'question' : RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [14]:
chain.invoke("what is self attention")

'According to the provided context, Self-Attention (also referred to as intra-attention) is an attention mechanism that relates different positions of a single sequence in order to compute a representation of the sequence. It has been used successfully in various tasks such as reading comprehension, abstractive summarization, textual entailment, and learning task-independent sentence representations.'