# Google "LK Hadith Corpus Github"

In [1]:
!git clone https://github.com/ShathaTm/LK-Hadith-Corpus.git

fatal: destination path 'LK-Hadith-Corpus' already exists and is not an empty directory.


In [2]:
import pandas as pd
import glob

In [3]:
colnames = ['Chapter_Number', 'Chapter_English', 'Chapter_Arabic', 'Section_Number',
       'Section_English', 'Section_Arabic', 'Hadith_number', 'English_Hadith',
       'English_Isnad', 'English_Matn', 'Arabic_Hadith', 'Arabic_Isnad',
       'Arabic_Matn', 'Arabic_Comment', 'English_Grade', 'Arabic_Grade']


In [6]:
# import glob
path = 'LK-Hadith-Corpus'
files = sorted(glob.glob(path + '//**//*.csv', recursive=True))

print(len(files))

335


In [7]:
import re

def clean_text(text):
  text = text.lower()
  text = re.sub(r'[^a-zA-Z0-9\s]', '', text) # removes puctuations
  text = re.sub(r'\s+', ' ', text)          # removes extra spaces
  return text

In [9]:
all_hadith = []
for file in files:
  df = pd.read_csv(file, names=colnames, skiprows=1)
  # print(df.columns)
  # break
  if 'English_Hadith' in df.columns:
    df['Clean_Hadith'] = df['English_Hadith'].astype(str).apply(clean_text)
    all_hadith.extend(df[['Chapter_Number', 'Chapter_English', 'Section_Number',
       'Section_English', 'Hadith_number', 'English_Hadith', 'Clean_Hadith', 'English_Grade']].values.tolist())


In [10]:
hadith_df = pd.DataFrame(all_hadith, columns=['Chapter_Number', 'Chapter_English', 'Section_Number',
       'Section_English', 'Hadith_number', 'English_Hadith', 'Clean_Hadith', 'English_Grade'])

In [11]:
hadith_df.to_csv('cleaned_hadith_data.csv', index=False)

# Hugging Face - Sentence Transformer

In [11]:
!pip install sentence-transformers



In [11]:
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

In [14]:
embeddings = model.encode(hadith_df['Clean_Hadith'].values)

In [15]:
import numpy as np
embeddings = np.array(embeddings)

In [16]:
np.save('hadith_embeddings.npy', embeddings)


In [4]:
embeddings = np.load('hadith_embeddings.npy')

# FAISS

In [18]:
# !pip install faiss-gpu
!pip install faiss-cpu



In [5]:
import faiss

dimensions = embeddings.shape[1]
faiss_index = faiss.IndexFlatL2(dimensions)  # L2 Distance (Euclidean Distance)

In [6]:
faiss_index.add(embeddings)

In [7]:
faiss.write_index(faiss_index, 'faiss_index.faiss')

In [None]:


def get_similar_hadith(query, model, faiss_index, count=5):
    query_embedding = model.encode([query])
    distance, indices = faiss_index.search(query_embedding, count)
    
    results = []
    for i in range(count):
        results.append({
            "rank": i+1,
            "distance": float(distance[0][i]),
            "hadith": hadith_df['English_Hadith'].iloc[indices[0][i]],
            "reference": hadith_df['Reference'].iloc[indices[0][i]] if 'Reference' in hadith_df.columns else "N/A"
        })
    return results

def chatbot():
    print("Welcome to the Hadith Search Chatbot!")
    print("Ask me anything to find relevant hadith. Type 'exit' to quit.")
    
    while True:
        user_input = input("\nYour question: ")
        
        if user_input.lower() in ['exit', 'quit', 'bye']:
            print("Goodbye! May Allah bless you.")
            break
            
        if not user_input.strip():
            print("Please enter a valid question.")
            continue
            
        try:
            results = get_similar_hadith(user_input, model, faiss_index)
            
            print("\nHere are the most relevant hadith:")
            for result in results:
                print(f"\nHadith #{result['rank']} (Similarity: {result['distance']:.4f})")
                if 'reference' in result:
                    print(f"Reference: {result['reference']}")
                print(result['hadith'])
                print("-" * 50)
                
        except Exception as e:
            print(f"An error occurred: {e}. Please try again.")



chatbot()

Welcome to the Hadith Search Chatbot!
Ask me anything to find relevant hadith. Type 'exit' to quit.

Here are the most relevant hadith:

Hadith #1 (Similarity: 0.6906)
Reference: N/A
It was narrated from Abu Hurairah that the Messenger of Allah (ﷺ) said: "Praying in congregation is twenty-five portions better than one of you praying alone."
--------------------------------------------------

Hadith #2 (Similarity: 0.6928)
Reference: N/A
Narrated `Abdullah bin `Umar: Allah's Messenger (ﷺ) said, "The prayer in congregation is twenty seven times superior to the prayer offeredby person alone."
--------------------------------------------------

Hadith #3 (Similarity: 0.6941)
Reference: N/A
Abu Huraira reported Allah's Messenger (ﷺ) as saying: Prayer said in a congregation is equivalent to twenty-five (prayers) as compared with the prayer said by a single person.
--------------------------------------------------

Hadith #4 (Similarity: 0.7201)
Reference: N/A
It was narrated from Abu Hurair