In [None]:

from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS

from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.prompts import PromptTemplate

from langchain.chains import RetrievalQA

In [9]:
import pandas as pd

# Load the CSV file
data_file = "hadith_data.csv"
df = pd.read_csv(data_file)

# Extract the "Hadith Text" column
hadith_texts = df["Hadith Text"].dropna().tolist()


In [10]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Initialize the text splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

# Split each Hadith text into chunks
final_documents = []
for text in hadith_texts:
    chunks = text_splitter.split_text(text)
    final_documents.extend(chunks)

# Each chunk is a part of the Hadith text
print(final_documents[:5])  # Display the first 5 chunks


['Allah\'s Messenger (ﷺ) said, "While I was at Mecca the roof of my house was opened and Gabriel descended, \nopened my chest, and washed it with Zamzam water. Then he brought a golden tray full of wisdom \nand faith and having poured its contents into my chest, he closed it. Then he took my hand and \nascended with me to the nearest heaven, when I reached the nearest heaven, Gabriel said to the \ngatekeeper of the heaven, \'Open (the gate).\' The gatekeeper asked, \'Who is it?\' Gabriel answered: \n\'Gabriel.\' He asked, \'Is there anyone with you?\' Gabriel replied, \'Yes, Muhammad I is with me.\' He \nasked, \'Has he been called?\' Gabriel said, \'Yes.\' So the gate was opened and we went over the nearest \nheaven and there we saw a man sitting with some people on his right and some on his left. When he \nlooked towards his right, he laughed and when he looked toward his left he wept. Then he said, \n\'Welcome! O pious Prophet and pious son.\' I asked Gabriel, \'Who is he?\' He repl

In [11]:
## Embedding Using Huggingface
huggingface_embeddings=HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",      #sentence-transformers/all-MiniLM-l6-v2
    model_kwargs={'device':'cpu'},
    encode_kwargs={'normalize_embeddings':True}

)

# Example texts
texts = ["While I was at Mecca the roof of my house was opened...",
         "Gabriel said to the gatekeeper of the heaven, 'Open (the gate).'"]

# Generate embeddings
embeddings = huggingface_embeddings.embed_documents(texts)

# Display the embeddings
print(embeddings[0])  # Prints the embedding vector for the first text



[0.019624490290880203, 0.04553646966814995, 0.03260500729084015, -0.0031482118647545576, 0.022959046065807343, -0.006676042918115854, 0.08351331204175949, 0.001752863870933652, 0.04383356496691704, -0.0391056090593338, 0.0016460524639114738, 0.019809318706393242, 0.010862298309803009, 0.003174691693857312, 0.06661178171634674, 0.011614515446126461, -0.057227615267038345, 0.03592041879892349, -0.09562207013368607, 0.04215120151638985, -0.09020876884460449, -0.046471692621707916, -0.008220091462135315, -0.056422024965286255, -0.02743370085954666, 0.10401047766208649, 0.015995267778635025, 0.030536523088812828, -0.013632283546030521, -0.06252141296863556, -0.0202234648168087, -0.0506729818880558, 0.03919683024287224, -0.01572345569729805, 0.03516285493969917, 0.05221071094274521, -0.053536150604486465, -0.03585733473300934, 0.03800337761640549, 0.01365664042532444, 0.08970610797405243, 0.03337949514389038, 0.004542062059044838, -0.0356660820543766, 0.006378337275236845, -0.032881565392017