In [None]:
 # Import necessary modules
import fitz  # PyMuPDF for fast PDF parsing
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import FlashrankRerank
from langchain_community.document_compressors.flashrank_rerank import FlashrankRerank
from langchain_community.llms import Ollama
from langchain.prompts.prompt import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [None]:

# Set up embedding model
embeddings = OllamaEmbeddings(model="nomic-embed-text")

# Load and process PDF using PyMuPDF
pdf_path = "meditations.pdf"  # Ensure this file exists

def load_pdf(fpath, max_pages=10):
    doc = fitz.open(fpath)
    text = "\n\n".join(doc[i].get_text("text") for i in range(min(max_pages, len(doc))) if doc[i].get_text("text").strip())
    return text


# Extract text from PDF
raw_text = load_pdf(pdf_path)

# Use RecursiveCharacterTextSplitter to split the text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
chunks = text_splitter.split_text(raw_text)

# Convert to LangChain Document format
documents = [Document(page_content=chunk) for chunk in chunks]

  embeddings = OllamaEmbeddings(model="nomic-embed-text")


In [None]:
chunks

["Marcus Aurelius' Meditations - tr. Casaubon v. 8.16, uploaded to www.philaletheians.co.uk, 14 July 2013 \nPage 1 of 128 \nThe meditations of \nMarcus Aurelius Antoninus \nOriginally translated by Meric Casaubon \n \nAbout this edition \nMarcus Aurelius Antoninus Augustus was Emperor of Rome from 161 to his death, \nthe last of the “Five Good Emperors.” He was nephew, son-in-law, and adoptive son \nof Antonius Pius. Marcus Aurelius was one of the most important Stoic philosophers,",
 'of Antonius Pius. Marcus Aurelius was one of the most important Stoic philosophers, \ncited by H.P. Blavatsky amongst famous classic sages and writers such as Plato, Eu-\nripides, Socrates, Aristophanes, Pindar, Plutarch, Isocrates, Diodorus, Cicero, and \nEpictetus.\n1 \nThis edition was originally translated out of the Greek by Meric Casaubon in 1634 as \n“The Golden Book of Marcus Aurelius,” with an Introduction by W.H.D. Rouse. It was',
 '“The Golden Book of Marcus Aurelius,” with an Introduction by 

In [None]:
len(chunks)

92

In [None]:
# Save or Load FAISS
faiss_path = "faiss_index"

try:
    vectorstore = FAISS.load_local(faiss_path, embeddings, allow_dangerous_deserialization=True)
    print("Loaded existing FAISS index.")
except Exception:
    print("Creating new FAISS index...")
    vectorstore = FAISS.from_documents(documents, embeddings)
    vectorstore.save_local(faiss_path)
    print("FAISS index created and saved.")


In [None]:
faiss_path = "faiss_index"

vectorstore = FAISS.load_local(faiss_path, embeddings, allow_dangerous_deserialization=True)
print("Loaded existing FAISS index.")

Loaded existing FAISS index.


In [None]:

# Set up retriever
retriever_vectordb = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 4})
compressor = FlashrankRerank()
compression_retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=retriever_vectordb)


In [None]:

# Load LLM
llm = Ollama(model="gemma:2b")

In [None]:
# Define Prompt
prompt_template = """
Use the given context to answer the user's question. If the context is missing, try to answer based on general knowledge.
Question: {question}
Context: {context}
"""
prompt = PromptTemplate(input_variables=["question", "context"], template=prompt_template)


In [None]:
# Format retrieved docs
def format_docs(docs):
    print('Fetching relevant document chunks...')
    return "\n\n".join(doc.page_content for doc in docs)


In [None]:
# RAG Pipeline
rag_chain = (
    {"context": compression_retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
rag_chain

{
  context: ContextualCompressionRetriever(base_compressor=FlashrankRerank(client=<flashrank.Ranker.Ranker object at 0x0000027A399D9180>, top_n=3, score_threshold=0.0, model='ms-marco-MultiBERT-L-12', prefix_metadata=''), base_retriever=VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000027A39AA1990>, search_kwargs={'k': 4}))
           | RunnableLambda(format_docs),
  question: RunnablePassthrough()
}
| PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="\nUse the given context to answer the user's question. If the context is missing, try to answer based on general knowledge.\nQuestion: {question}\nContext: {context}\n")
| Ollama(model='gemma:2b')
| StrOutputParser()

In [None]:
# Ask a sample question
question = "What is meditation?"
answer = rag_chain.invoke(question)
print(answer)

Fetching relevant document chunks...
Sure, here's a definition of meditation from the context:

Meditation is a practice of focusing the mind on a single thought or object to help reduce stress, improve focus, and achieve a state of inner peace.


In [None]:
# Ask a sample question
question = "What is meditation?"
answer = rag_chain.invoke(question)
print(answer)

Fetching relevant document chunks...
Sure, here's the answer to the question:

Meditation is a practice that involves focusing on one's thoughts and feelings to bring about a state of inner peace and clarity. It is a way of self-reflection and helps individuals to gain a deeper understanding of themselves and their surroundings.


In [None]:
question = "When was AURELIUS ANTONINUS born?(give me month only)"
answer = rag_chain.invoke(question)
print(answer)

Fetching relevant document chunks...
According to the context, Marcus Aurelius Antoninus was born on April 26, A.D. 121.


In [None]:
question = "What philosophy was Marcus Aurelius trained in?"
answer = rag_chain.invoke(question)
print(answer)

Fetching relevant document chunks...
The context says that Marcus Aurelius studied under the cynic Crates. So, his philosophy was Stoicism.


In [None]:
question = "How did Marcus Aurelius show personal courage?"
answer = rag_chain.invoke(question)
print(answer)

Fetching relevant document chunks...
The context indicates that Marcus Aurelius showed personal courage by bending to obscurity despite his ideal of quiet happiness in home life.


In [None]:
question = "What is the main idea of this section?"
answer = rag_chain.invoke(question)
print(answer)

Fetching relevant document chunks...
Sure, here is the main idea of the section:

The context discusses the different views on morality and the ideal way to live according to the Stoic philosophy. It highlights the importance of distinguishing between things that are in our power and those that are not, as well as the unity of the universe and man's duty to live in accord with nature.


# Model Evaluation Using cosine_similarity

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

# Initialize model for similarity checking
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')


  from .autonotebook import tqdm as notebook_tqdm
INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2


In [None]:
# Define test cases
test_cases = {
    "What philosophy was Marcus Aurelius trained in?": "Stoicism",
    "Who was Marcus Aurelius?": "Roman Emperor and Stoic philosopher",
    "What book did Marcus Aurelius write?": "Meditations",
    "What is the main idea of Stoicism?" :  "Stoicism teaches self-control, virtue, and accepting fate with reason",
    "What is the significance of the Pax Romana?": "TThe Pax Romana was a 200-year period of peace and stability in the Roman Empire",
    "Who was Socrates, and why is he important?" :	"Socrates was a Greek philosopher who developed the Socratic method and influenced Western thought",
    "What is the difference between rationalism and empiricism?" : "Rationalism believes knowledge comes from reason, while empiricism argues it comes from experience",
    "What was the cause of the fall of the Roman Empire?" : "The fall of Rome was caused by economic decline, military weakness, political corruption, and invasions",
    "How did Aristotle influence Western philosophy?" : "Aristotle's work in logic, ethics, and science shaped philosophy, influencing medieval and modern thought",
    "What is Plato's theory of forms?	" : "Plato's theory of forms states that non-material abstract forms (ideals) represent the highest reality"
}


In [None]:
# Evaluate the model
for question, expected_answer in test_cases.items():
    model_answer = rag_chain.invoke(question)
    
    # Compute embeddings for similarity comparison
    expected_embedding = embedding_model.encode([expected_answer])
    model_embedding = embedding_model.encode([model_answer])
    
    similarity_score = cosine_similarity(expected_embedding, model_embedding)[0][0]
    
    print(f"Question: {question}")
    print(f"Expected Answer: {expected_answer}")
    print(f"Model Answer: {model_answer}")
    print(f"Similarity Score: {similarity_score:.4f}")
    print("-" * 50)

Fetching relevant document chunks...


Batches: 100%|██████████| 1/1 [00:00<00:00, 80.35it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 46.71it/s]


Question: What philosophy was Marcus Aurelius trained in?
Expected Answer: Stoicism
Model Answer: The context says that Marcus Aurelius studied under the cynic Crates, but he did not neglect other philosophical systems. He also studied under other Stoic philosophers.
Similarity Score: 0.4615
--------------------------------------------------
Fetching relevant document chunks...


Batches: 100%|██████████| 1/1 [00:00<00:00, 86.51it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 45.36it/s]


Question: Who was Marcus Aurelius?
Expected Answer: Roman Emperor and Stoic philosopher
Model Answer: According to the context, Marcus Aurelius was a one of the most important Stoic philosophers. He was cited by many famous classic writers, including Plato, Eu-ripides, Socrates, Aristophanes, Pindar, Plutarch, Isocrates, Diodorus, Cicero, and Epictetus.
Similarity Score: 0.6869
--------------------------------------------------
Fetching relevant document chunks...


Batches: 100%|██████████| 1/1 [00:00<00:00, 79.88it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 42.45it/s]


Question: What book did Marcus Aurelius write?
Expected Answer: Meditations
Model Answer: The context says that Marcus Aurelius wrote the book “The Golden Book of Marcus Aurelius,” with an Introduction by W.H.D. Rouse.
Similarity Score: 0.1104
--------------------------------------------------
Fetching relevant document chunks...


Batches: 100%|██████████| 1/1 [00:00<00:00, 66.32it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 33.55it/s]


Question: What is the main idea of Stoicism?
Expected Answer: Stoicism teaches self-control, virtue, and accepting fate with reason
Model Answer: Sure. Here's the main idea of Stoicism:

The main idea of Stoicism is that virtue alone enables a person to live in accord with nature. The Stoics believed that virtue should be defined as virtue in the soul, not in the external world. They also believed that the test of truth is the pursuit of virtue, and that they should strive to live in accordance with what is natural.
Similarity Score: 0.7218
--------------------------------------------------
Fetching relevant document chunks...


Batches: 100%|██████████| 1/1 [00:00<00:00, 64.28it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 42.53it/s]


Question: What is the significance of the Pax Romana?
Expected Answer: TThe Pax Romana was a 200-year period of peace and stability in the Roman Empire
Model Answer: The context does not specify the significance of the Pax Romana, so I cannot answer this question from the provided context.
Similarity Score: 0.7159
--------------------------------------------------
Fetching relevant document chunks...


Batches: 100%|██████████| 1/1 [00:00<00:00, 36.24it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 19.40it/s]


Question: Who was Socrates, and why is he important?
Expected Answer: Socrates was a Greek philosopher who developed the Socratic method and influenced Western thought
Model Answer: According to the context, Socrates was one of the most important philosophers. He was cited by many other famous philosophers, including Plato, Aristotle, and Epictetus.
Similarity Score: 0.6243
--------------------------------------------------
Fetching relevant document chunks...


Batches: 100%|██████████| 1/1 [00:00<00:00, 44.28it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 19.23it/s]


Question: What is the difference between rationalism and empiricism?
Expected Answer: Rationalism believes knowledge comes from reason, while empiricism argues it comes from experience
Model Answer: Sure, here's the answer to the question:

Rationalism and empiricism are two different philosophical schools of thought that deal with the nature of truth and reason.

**Rationalism** is a school of thought that emphasizes the role of reason and logic in acquiring knowledge. Rationalists believe that reason is the only reliable way to determine truth, and that all knowledge can be reduced to logical propositions.

**Empiricism** is a school of thought that emphasizes the role of observation and experience in acquiring knowledge. Empiricists believe that all knowledge can be acquired through sense experience, and that reason is not necessary for acquiring knowledge.

The context does not provide any information about the difference between rationalism and empiricism, so I cannot answer this 

Batches: 100%|██████████| 1/1 [00:00<00:00, 39.03it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  8.95it/s]


Question: What was the cause of the fall of the Roman Empire?
Expected Answer: The fall of Rome was caused by economic decline, military weakness, political corruption, and invasions
Model Answer: The passage does not specify the cause of the fall of the Roman Empire, so I cannot answer this question from the provided context.
Similarity Score: 0.7451
--------------------------------------------------
Fetching relevant document chunks...


Batches: 100%|██████████| 1/1 [00:00<00:00, 37.39it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 14.65it/s]


Question: How did Aristotle influence Western philosophy?
Expected Answer: Aristotle's work in logic, ethics, and science shaped philosophy, influencing medieval and modern thought
Model Answer: According to the context, Aristotle's philosophy was not an eager intellectual inquiry, but rather more what we should call religious feeling. His philosophy softened and transformed by passing through a nature reverent and tolerant, gentle and free from guile.
Similarity Score: 0.6033
--------------------------------------------------
Fetching relevant document chunks...


Batches: 100%|██████████| 1/1 [00:00<00:00, 87.39it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 38.40it/s]

Question: What is Plato's theory of forms?	
Expected Answer: Plato's theory of forms states that non-material abstract forms (ideals) represent the highest reality
Model Answer: According to the context, Plato's theory of forms was that the universe is God of many forms. He believed that the universe was governed by a spiritual force that acted through different forms, such as fire, aether, spirit, soul, reason, and the ruling principle.
Similarity Score: 0.6083
--------------------------------------------------



