In [None]:
!pip install langchain-community==0.2.4 langchain==0.2.3 faiss-cpu==1.8.0 unstructured==0.14.5 unstructured[pdf]==0.14.5 transformers==4.41.2 sentence-transformers==3.0.1

**Importing the Dependencies**

In [2]:
import os

from langchain_community.llms import Ollama
from langchain.document_loaders import UnstructuredFileLoader
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA

In [3]:
# loading the LLM
llm = Ollama(
    model="llama3:instruct",
    temperature=0
)

In [None]:
# loading the document
loader = UnstructuredFileLoader("NIPS-2017-attention-is-all-you-need-Paper.pdf")
documents = loader.load()

In [None]:
# create document chunks
text_splitter = CharacterTextSplitter(separator="/n",
                                      chunk_size=1000,
                                      chunk_overlap=200)

In [None]:
text_chunks = text_splitter.split_documents(documents)

In [None]:
# loading the vector embedding model
embeddings = HuggingFaceEmbeddings()

In [None]:
knowledge_base = FAISS.from_documents(text_chunks, embeddings)

In [None]:
# retrieval QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=knowledge_base.as_retriever())

In [None]:
question = "What is this document about?"
response = qa_chain.invoke({"query": question})
print(response["result"])

This document appears to be a research paper or article about the Transformer, a sequence transduction model based entirely on attention, which outperforms previous architectures in translation tasks. The authors discuss their approach, its advantages, and future directions for applying attention-based models to other tasks.


In [None]:
question = "What is the architecture discussed in the model?"
response = qa_chain.invoke({"query": question})
print(response["result"])

The architecture discussed in the model is the Transformer, which is a sequence transduction model based entirely on attention, replacing recurrent layers commonly used in encoder-decoder architectures with multi-headed self-attention.
