In [1]:
import os
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_ollama import OllamaLLM
from langchain.chains import RetrievalQA
from utils import clean
from dotenv import load_dotenv

In [2]:
load_dotenv()

True

In [3]:
dir = "../../documents"
documents_path = os.listdir(dir)
documents_path = [f"{dir}/{file}" for file in documents_path]

In [4]:
documents = []
for file in documents_path:
    loader = PyPDFLoader(file)
    loaded_docs = loader.load()
    
    for doc in loaded_docs:
        doc.page_content = clean(doc.page_content)
    
    documents.extend(loaded_docs)

In [10]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=120)
chunks = text_splitter.split_documents(documents)

In [11]:
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# embeddings = embedding_model.embed_documents([chunk.page_content for chunk in chunks])

In [12]:
vectorstore = FAISS.from_documents(documents=chunks, embedding=embedding_model)

In [13]:
retriever  = vectorstore.as_retriever()
llm = OllamaLLM(model="llama3.2:1b")

rag_pipeline = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)

In [14]:
query = "how use nural network in nlp"
response = rag_pipeline.invoke(query)

print("Answer:")
print(response["result"])

Answer:
I don't know. The text doesn't specifically mention neural networks as a tool for NLP (Natural Language Processing), but rather introduces traditional methods such as hand-built features, word2vec embeddings, and feedforward networks for sentiment analysis.


In [19]:
query = "who author this book"
response = rag_pipeline.invoke(query)

print("Answer:")
print(response["result"])

Answer:
I don't know.


In [20]:
query = "this book explain the lstm and rnn"
response = rag_pipeline.invoke(query)

print("Answer:")
print(response["result"])

Answer:
Based on the provided context, I don't know how much information about the LSTM network (long short-term memory) is mentioned in the passage. The passage primarily discusses RNN architectures and their applications in natural language processing. If you're looking for information specifically about LSTMs, it seems they are mentioned as a common extension to traditional RNNs, but not discussed in detail within this snippet.


In [21]:
query = "who best naive bays or transformer"
response = rag_pipeline.invoke(query)

print("Answer:")
print(response["result"])

Answer:
I don't know. The text doesn't mention which of the two classifiers (Naive Bayes Classifiers and Transformer) is more "best" in this context.


In [22]:
query = "In this book what the chapter number for  vector semantic"
response = rag_pipeline.invoke(query)

print("Answer:")
print(response["result"])

Answer:
I don't know, as I'm not sure which specific question you're referring to. The text provided discusses vectors and their application in information retrieval and vector semantics, but it does not specify a particular question that requires an answer. If you could provide more context or clarify which chapter or section you are asking about, I would be happy to try and assist further.


In [24]:
query = "what the transformer use case"
response = rag_pipeline.invoke(query)

print("Answer:")
print(response["result"])

Answer:
I don't know. The text doesn't provide enough information to determine how the transformer uses key, query, and value in its architecture. It only mentions that it came from memory networks, which is a mechanism for adding an external read-write memory to networks, but it does not explain how these terms are used specifically within the transformer's architecture.


In [25]:
query = "what the naive bays use case"
response = rag_pipeline.invoke(query)

print("Answer:")
print(response["result"])

Answer:
I don't know, as this information is not provided in the context.
