In [1]:
pip install requests beautifulsoup4

Note: you may need to restart the kernel to use updated packages.


In [2]:
import requests
from bs4 import BeautifulSoup
import csv  # Add this import to handle CSV writing

In [3]:
# mapping of surah numbers to their names in the URL
surah_names = {
    1: "alfatihah",
    2: "al-baqarah",
    3: "ale-imran",
    4: "an-nisa",
    5: "al-maidah",
    6: "al-anam",
    7: "al-araf",
    8: "al-anfal",
    9: "at-taubah",
    10: "yunus",     
    11: "hud",
    12: "yusuf",
    13: "ar-rad",
    14: "ibrahim",
    15: "al-hijr",
    16: "an-nahl",
    17: "al-isra",
    18: "al-kahf",
    19: "maryam",
    20: "ta-ha",
    21: "al-anbiya",
    22: "al-hajj",
    23: "al-muminun", 
    24: "an-nur",
    25: "al-furqan",
    26: "ash-shuara",
    27: "an-naml",
    28: "al-qasas",
    29: "al-ankabut",
    30: "ar-rum",
    31: "luqman",
    32: "as-sajdah",
    33: "al-ahzab",
    34: "saba",
    35: "al-fatir-or-al-malaikah",
    36: "ya-sin",
    37: "as-saffaat",
    38: "saad",
    39: "az-zumar",
    40: "ghafir-or-al-mumin",
    41: "fussilat",
    42: "ash-shurah",
    43: "az-zukhruf",
    44: "ad-dukhan",
    45: "al-jathiyah",
    46: "al-ahqaf",
    47: "muhammad",
    48: "al-fath",
    49: "al-hujuraat",
    50: "qaf",
    51: "adh-dhariyat",
    52: "at-tur",
    53: "an-najm",
    54: "al-qamar",
    55: "ar-rahman",
    56: "al-waqiah",
    57: "al-hadid",
    58: "al-mujadilah",
    59: "al-hashr",
    60: "al-mumtahinah",
    61: "as-saff",
    62: "al-jumuah",
    63: "al-munafiqun",
    64: "al-taghabun",
    65: "at-talaaq",
    66: "at-tahrim",
    67: "al-mulk",
    68: "al-qalam",
    69: "al-haqqah",
    70: "al-maarij",
    71: "nuh",
    72: "al-jinn", 
    73: "al-muzzammil",
    74: "al-muddathir",
    75: "al-qiyamah",
    76: "al-insan",
    77: "al-mursalaat",
    78: "an-naba",
    79: "an-naziaat",
    80: "abasa",
    81: "at-takwir",
    82: "al-infitaar",
    83: "al-mutaffifin",
    84: "al-inshiqaq",
    85: "al-burooj",
    86: "at-tariq",
    87: "al-ala",
    88: "al-ghaashiyah",
    89: "al-fajr",
    90: "al-balad",
    91: "ash-shams",
    92: "al-lail",
    93: "ad-duha",
    94: "ash-sharh",
    95: "at-tin",
    96: "al-alaq",
    97: "al-qadr",
    98: "al-baiyyinah",
    99: "al-zalzalah",
    100: "al-aadiyaat",
    101: "al-qariah",
    102: "at-takaathur",
    103: "al-asr",
    104: "al-humazah",
    105: "al-feel",
    106: "al-quraish",
    107: "al-maaoon",
    108: "al-kauthar",
    109: "al-kaafiroon",
    110: "an-nasr",
    111: "al-masad",
    112: "al-ikhlaas-or-at-tauhid",
    113: "al-falaq",
    114: "an-naas"
}

# URL for Noble Quran chapters
base_url = "https://noblequran.com/surah-{}.html"

# to scrape a specific surah's translation
def scrape_surah(surah_num):
    # Get the transliterated surah name from the dictionary
    surah_name_in_url = surah_names.get(surah_num, None)
    if not surah_name_in_url:
        print(f"Surah number {surah_num} not found in the surah names dictionary.")
        return None, None
    
    # construct the URL for the specific surah
    # special case for Surah 23 and 72 (URLs with numbers in front)
    if surah_num in [23, 72]:
        url = f"https://noblequran.com/{surah_num}-surah-{surah_name_in_url}/"
    else:
        url = f"https://noblequran.com/surah-{surah_name_in_url}/"
        
    response = requests.get(url)
    
    # check if the request was successful
    if response.status_code != 200:
        print(f"Failed to retrieve Surah {surah_num}. Status code: {response.status_code}")
        return None, None
    
    # parse the page content with BS
    soup = BeautifulSoup(response.content, "html.parser")
    
    # find the surah name (H2 element with class 'elementor-heading-title')
    surah_name_element = soup.find("h2", class_="elementor-heading-title")
    
    if surah_name_element:
        surah_name = surah_name_element.text.strip()
    else:
        print(f"Surah name not found for Surah {surah_num}")
        return None, None
    
    # find the translation (p elements within div with class 'elementor-widget-text-editor')
    translations = soup.find_all("div", class_="elementor-widget-text-editor")
    
    if not translations:
        print(f"No translations found for Surah {surah_num}")
        return surah_name, []
    
    # extract the actual verses and translations
    translation_texts = []
    for translation in translations:
        paragraphs = translation.find_all("p")
        for p in paragraphs:
            verse_text = p.text.strip()
            translation_texts.append(verse_text)
    
    # return the surah name and the extracted translations
    return surah_name, translation_texts

# create a CSV file to save the translations
with open("quran_translations.csv", "w", newline='', encoding='utf-8') as csvfile:
    csvwriter = csv.writer(csvfile)
    
    # write the header
    csvwriter.writerow(["Surah Name", "Verse Translation"])

    # looping through all surahs in the dict
    for surah in surah_names.keys():
        surah_name, translations = scrape_surah(surah)
        
        if surah_name and translations:
            print(f"Scraped {surah_name}")
            
            # write to the CSV
            for verse in translations:
                csvwriter.writerow([surah_name, verse])
        else:
            print(f"Skipping Surah {surah} due to missing data.")

print("All surahs scraped and saved to 'quran_translations.csv'.")

Scraped 1. Surah Al-Fatihah
Scraped 2.Surah Al-Baqarah
Scraped 3. Surah Ale-Imran
Scraped 4. Surah An-Nisa'
Scraped 5. Surah Al-Ma'idah
Scraped 6. Surah Al-An'am
Scraped 7. Surah Al-A'raf
Scraped 8. Surah Al-Anfal
Scraped 9. Surah At-Taubah
Surah name not found for Surah 10
Skipping Surah 10 due to missing data.
Scraped 11. Surah Hud
Scraped 12. Surah Yusuf
Scraped 13. Surah Ar-Ra'd
Scraped 14. Surah Ibrahim
Scraped 15. Surah Al-Hijr
Scraped 16. Surah An-Nahl
Scraped 17. Surah Al-Isra'
Scraped 18. Surah Al-Kahf
Scraped 19. Surah Maryam
Scraped 20. Surah Ta-Ha
Scraped 21. Surah Al-Anbiya'
Scraped 22. Surah Al-Hajj
Scraped 23. Surah Al-Mu'minun
Scraped 24. Surah An-Nur
Scraped 25. Surah Al-Furqan
Scraped 26. Surah Ash-Shu'ara'
Scraped 27. Surah An-Naml
Scraped 28. Surah Al-Qasas
Scraped 29. Surah Al-Ankabut
Scraped 30. Surah Ar-Rum
Scraped 31. Surah Luqman
Scraped 32. Surah As-Sajdah
Scraped 33. Surah Al-Ahzab
Scraped 34. Surah Saba'
Scraped 35. Surah Al-Fatir or Al-Mala'ikah
Scraped 36.