In [2]:
import pandas as pd
from bs4 import BeautifulSoup
import requests

base_urls = [
    ("bukhari", 97), 
    ("muslim", 56), 
    ("nasai", 51), 
    ("abudawud", 43), 
    ("tirmidhi", 49), 
    ("ibnmajah", 37)
]

# A list to store hadith texts
hadith_texts = []

# Iterate over each collection
for collection, hadith_count in base_urls:
    print(f"Scraping Hadith from {collection} collection...")
    
    # Iterate over each Hadith number
    for hadith_number in range(1, hadith_count + 1):
        hadith_url = f"https://sunnah.com/{collection}/{hadith_number}"
        response = requests.get(hadith_url)
        print("Scraping from:", hadith_url)
        
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, "html.parser")
            hadith_text = soup.find("div", class_="text_details").get_text(strip=True)
            narrated_by = soup.find("div", class_="hadith_narrated").get_text(strip=True)
            hadith_texts.append((collection, hadith_number, narrated_by, hadith_text))
        else:
            print(f"Error fetching Hadith from {hadith_url}")

df = pd.DataFrame(hadith_texts, columns=["Collection", "Hadith Number", "Narrated By", "Hadith Text"])


Scraping Hadith from bukhari collection...
Scraping from: https://sunnah.com/bukhari/1
Scraping from: https://sunnah.com/bukhari/2
Scraping from: https://sunnah.com/bukhari/3
Scraping from: https://sunnah.com/bukhari/4
Scraping from: https://sunnah.com/bukhari/5
Scraping from: https://sunnah.com/bukhari/6
Scraping from: https://sunnah.com/bukhari/7
Scraping from: https://sunnah.com/bukhari/8
Scraping from: https://sunnah.com/bukhari/9
Scraping from: https://sunnah.com/bukhari/10
Scraping from: https://sunnah.com/bukhari/11
Scraping from: https://sunnah.com/bukhari/12
Scraping from: https://sunnah.com/bukhari/13
Scraping from: https://sunnah.com/bukhari/14
Scraping from: https://sunnah.com/bukhari/15
Scraping from: https://sunnah.com/bukhari/16
Scraping from: https://sunnah.com/bukhari/17
Scraping from: https://sunnah.com/bukhari/18
Scraping from: https://sunnah.com/bukhari/19
Scraping from: https://sunnah.com/bukhari/20
Scraping from: https://sunnah.com/bukhari/21
Scraping from: https:

Scraping from: https://sunnah.com/nasai/31
Scraping from: https://sunnah.com/nasai/32
Scraping from: https://sunnah.com/nasai/33
Scraping from: https://sunnah.com/nasai/34
Scraping from: https://sunnah.com/nasai/35
Scraping from: https://sunnah.com/nasai/36
Scraping from: https://sunnah.com/nasai/37
Scraping from: https://sunnah.com/nasai/38
Scraping from: https://sunnah.com/nasai/39
Scraping from: https://sunnah.com/nasai/40
Scraping from: https://sunnah.com/nasai/41
Scraping from: https://sunnah.com/nasai/42
Scraping from: https://sunnah.com/nasai/43
Scraping from: https://sunnah.com/nasai/44
Scraping from: https://sunnah.com/nasai/45
Scraping from: https://sunnah.com/nasai/46
Scraping from: https://sunnah.com/nasai/47
Scraping from: https://sunnah.com/nasai/48
Scraping from: https://sunnah.com/nasai/49
Scraping from: https://sunnah.com/nasai/50
Scraping from: https://sunnah.com/nasai/51
Scraping Hadith from abudawud collection...
Scraping from: https://sunnah.com/abudawud/1
Scraping

In [3]:
# Define keywords for each of the 5 Pillars
pillar_keywords = {
    "Shahada": ["faith", "belief", "testimony"],
    "Salat": ["prayer", "worship", "salaat"],
    "Zakat": ["charity", "almsgiving", "poor"],
    "Sawm": ["fasting", "ramadan", "abstain"],
    "Hajj": ["pilgrimage", "mecca", "kaaba"]
}

# Function to categorize Hadith based on keywords
def categorize_hadith(hadith_text):
    for pillar, keywords in pillar_keywords.items():
        for keyword in keywords:
            if keyword in hadith_text.lower():
                return pillar
    return "Uncategorized"

# Apply categorization to the DataFrame
df["Pillar"] = df["Hadith Text"].apply(categorize_hadith)

# Now, you can store the DataFrame in a database or data storage system of your choice.
# For demonstration purposes, we'll save it to a CSV file.
df.to_csv("hadiths.csv", index=False)

# Optionally, you can create a user interface to interact with the categorized Hadiths.
# Here's a simple example using pandas for data retrieval:
while True:
    print("\nSelect a pillar to retrieve Hadiths (type 'exit' to quit):")
    selected_pillar = input().strip()
    if selected_pillar.lower() == "exit":
        break
    filtered_df = df[df["Pillar"] == selected_pillar]
    if not filtered_df.empty:
        print(filtered_df[["Collection", "Hadith Number", "Narrated By", "Hadith Text"]])
    else:
        print("No Hadiths found for the selected pillar.")



Select a pillar to retrieve Hadiths (type 'exit' to quit):
Salat
    Collection  Hadith Number  \
1      bukhari              2   
3      bukhari              4   
4      bukhari              5   
8      bukhari              9   
9      bukhari             10   
11     bukhari             12   
13     bukhari             14   
14     bukhari             15   
15     bukhari             16   
17     bukhari             18   
18     bukhari             19   
21     bukhari             22   
22     bukhari             23   
23     bukhari             24   
29     bukhari             30   
52     bukhari             53   
55     bukhari             56   
66     bukhari             67   
72     bukhari             73   
77     bukhari             78   
88     bukhari             89   
90     bukhari             91   
94     bukhari             95   
100     muslim              4   
101     muslim              5   
102     muslim              6   
103     muslim              7   
104     mu

Zakat
    Collection  Hadith Number  \
33     bukhari             34   
83     bukhari             84   
120     muslim             24   
145     muslim             49   
181      nasai             29   
182      nasai             30   
247   tirmidhi              1   
275   tirmidhi             29   
310   ibnmajah             15   

                                           Narrated By  \
33                               Narrated Abu Huraira:   
83                            Narrated Ka`b bin 'Ujra:   
120  Umar b. Khattab (Allah be pleased with him) re...   
145  Usama b. Zaid reported that Allah's Messenger ...   
181      It was narrated that 'Amr bin Al-Harith said:   
182            It was narrated that Abu Hurairah said:   
247                           Ibn `Umar narrated that:   
275  Abu Hurairah narrated that the Messenger of Al...   
310       It was narrated from 'Umar bin Khattab that:   

                                           Hadith Text  
33   You people say that 