In [None]:
import pandas as pd
import json
import requests
import numpy as np

SEED = 42
np.random.seed(SEED)

## Al Quran API Editions
Using the Editions Api, we have decided to extract these three editions.

### 1. Arabic Quran (Simple)

| Detail | Value |
| :--- | :--- |
| **Identifier** | `quran-simple` |
| **Language** | `ar` (Arabic) |
| **Name (Arabic)** | القرآن الكريم برسم العثماني (unicode) |
| **Format** | `text` |
| **Type** | `quran` |
| **Direction** | `rtl` (Right-to-Left) |

---

### 2. English Translation (Saheeh International)

| Detail | Value |
| :--- | :--- |
| **Identifier** | `en.sahih` |
| **Language** | `en` (English) |
| **Name** | Saheeh International |
| **English Name** | Saheeh International |
| **Format** | `text` |
| **Type** | `translation` |
| **Direction** | `ltr` (Left-to-Right) |

---

### 3. Arabic Quran (Uthmani Script)

| Detail | Value |
| :--- | :--- |
| **Identifier** | `quran-uthmani` |
| **Language** | `ar` (Arabic) |
| **Name (Arabic)** | القرآن الكريم برسم العثماني (uthmani) |
| **English Name** | Uthmani |
| **Format** | `text` |
| **Type** | `quran` |
| **Direction** | `rtl` (Right-to-Left) |



## First we preprocess Surahs for easier reference

In [None]:
# http://api.alquran.cloud/v1/meta This Api contains the metadata for Surahs.

url = "http://api.alquran.cloud/v1/meta"
response = requests.get(url)
data = response.json()
surahs_data = data.get("data", {}).get('surahs', {})
references = surahs_data.get("references", {})
print(references[0])

{'number': 1, 'name': 'سُورَةُ ٱلْفَاتِحَةِ', 'englishName': 'Al-Faatiha', 'englishNameTranslation': 'The Opening', 'numberOfAyahs': 7, 'revelationType': 'Meccan'}


In [None]:
df = pd.DataFrame(references)
df.head()

Unnamed: 0,number,name,englishName,englishNameTranslation,numberOfAyahs,revelationType
0,1,سُورَةُ ٱلْفَاتِحَةِ,Al-Faatiha,The Opening,7,Meccan
1,2,سُورَةُ البَقَرَةِ,Al-Baqara,The Cow,286,Medinan
2,3,سُورَةُ آلِ عِمۡرَانَ,Aal-i-Imraan,The Family of Imraan,200,Medinan
3,4,سُورَةُ النِّسَاءِ,An-Nisaa,The Women,176,Medinan
4,5,سُورَةُ المَائـِدَةِ,Al-Maaida,The Table,120,Medinan


In [None]:
new_column_names = {
    "name": "name_arabic",
    "englishName": "name_simple",
    "englishNameTranslation": "name_english",
    "numberOfAyahs": "verses_count",
    "revelationType": "revelation_place"
}

df_renamed = df.rename(columns=new_column_names)
df_renamed.to_csv("surahs.csv", index=False, encoding="utf-8-sig")

print("DataFrame columns renamed and saved to surahs.csv")

DataFrame columns renamed and saved to surahs.csv


# Now, preprocessing different editions for verses

In [None]:
# http://api.alquran.cloud/v1/quran/en.sahih

url = "http://api.alquran.cloud/v1/quran/en.sahih"
response = requests.get(url)
data = response.json()
data_eng = data.get("data", {}).get("surahs", [])


In [None]:
# http://api.alquran.cloud/v1/quran/quran-simple

url = "http://api.alquran.cloud/v1/quran/quran-simple"
response = requests.get(url)
data = response.json()
data_simple = data.get("data", {}).get("surahs", [])

In [None]:
# http://api.alquran.cloud/v1/quran/quran-uthmani

url = "http://api.alquran.cloud/v1/quran/quran-uthmani"
response = requests.get(url)
data = response.json()
data_uthmani = data.get("data", {}).get("surahs", [])

In [None]:
df_english = pd.DataFrame(data_eng)
df_simple = pd.DataFrame(data_simple)
df_uthmani = pd.DataFrame(data_uthmani)

new_column_names = {
    "name": "name_arabic",
    "englishName": "name_simple",
    "englishNameTranslation": "name_english",
    "numberOfAyahs": "verses_count",
    "revelationType": "revelation_place"
}
df_english.rename(columns=new_column_names, inplace=True)
df_simple.rename(columns=new_column_names, inplace=True)
df_uthmani.rename(columns=new_column_names, inplace=True)

In [None]:
df_uthmani.head()

Unnamed: 0,number,name_arabic,name_simple,name_english,revelation_place,ayahs
0,1,سُورَةُ ٱلْفَاتِحَةِ,Al-Faatiha,The Opening,Meccan,"[{'number': 1, 'text': '﻿بِسْمِ ٱللَّهِ ٱلرَّح..."
1,2,سُورَةُ البَقَرَةِ,Al-Baqara,The Cow,Medinan,"[{'number': 8, 'text': 'بِسْمِ ٱللَّهِ ٱلرَّحْ..."
2,3,سُورَةُ آلِ عِمۡرَانَ,Aal-i-Imraan,The Family of Imraan,Medinan,"[{'number': 294, 'text': 'بِسْمِ ٱللَّهِ ٱلرَّ..."
3,4,سُورَةُ النِّسَاءِ,An-Nisaa,The Women,Medinan,"[{'number': 494, 'text': 'بِسْمِ ٱللَّهِ ٱلرَّ..."
4,5,سُورَةُ المَائـِدَةِ,Al-Maaida,The Table,Medinan,"[{'number': 670, 'text': 'بِسْمِ ٱللَّهِ ٱلرَّ..."


In [None]:
df_simple.head()

Unnamed: 0,number,name_arabic,name_simple,name_english,revelation_place,ayahs
0,1,سُورَةُ ٱلْفَاتِحَةِ,Al-Faatiha,The Opening,Meccan,"[{'number': 1, 'text': '﻿بِسْمِ اللَّهِ الرَّح..."
1,2,سُورَةُ البَقَرَةِ,Al-Baqara,The Cow,Medinan,"[{'number': 8, 'text': 'بِسْمِ اللَّهِ الرَّحْ..."
2,3,سُورَةُ آلِ عِمۡرَانَ,Aal-i-Imraan,The Family of Imraan,Medinan,"[{'number': 294, 'text': 'بِسْمِ اللَّهِ الرَّ..."
3,4,سُورَةُ النِّسَاءِ,An-Nisaa,The Women,Medinan,"[{'number': 494, 'text': 'بِسْمِ اللَّهِ الرَّ..."
4,5,سُورَةُ المَائـِدَةِ,Al-Maaida,The Table,Medinan,"[{'number': 670, 'text': 'بِسْمِ اللَّهِ الرَّ..."


In [None]:
def expand(df, edition_name):
    rows = []
    for _, row in df.iterrows():
        surah = row["number"]
        ayahs = row["ayahs"]

        for ayah in ayahs:
            rows.append({
                "surah_number": surah,
                "verse_number": ayah["number"],
                f"text_{edition_name}": ayah["text"],
                "juz_number": ayah.get("juz"),
                "ruku_number": ayah.get("ruku"),
            })
    return pd.DataFrame(rows)

In [None]:
df_e = expand(df_english, "english")
df_s = expand(df_simple, "simple")
df_u = expand(df_uthmani, "uthmani")

ayahs_df = (
    df_e
    .merge(df_s, on=["surah_number", "verse_number"], how="outer")
    .merge(df_u, on=["surah_number", "verse_number"], how="outer")
)

ayahs_df["verse_key"] = (
    ayahs_df["surah_number"].astype(str)
    + ":"
    + ayahs_df["verse_number"].astype(str)
)

In [None]:
ayahs_df.head()

Unnamed: 0,surah_number,verse_number,text_english,juz_number_x,ruku_number_x,text_simple,juz_number_y,ruku_number_y,text_uthmani,juz_number,ruku_number,verse_key
0,1,1,"In the name of Allah, the Entirely Merciful, t...",1,1,﻿بِسْمِ اللَّهِ الرَّحْمَٰنِ الرَّحِيمِ,1,1,﻿بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيمِ,1,1,1:1
1,1,2,"[All] praise is [due] to Allah, Lord of the wo...",1,1,الْحَمْدُ لِلَّهِ رَبِّ الْعَالَمِينَ,1,1,ٱلْحَمْدُ لِلَّهِ رَبِّ ٱلْعَٰلَمِينَ,1,1,1:2
2,1,3,"The Entirely Merciful, the Especially Merciful,",1,1,الرَّحْمَٰنِ الرَّحِيمِ,1,1,ٱلرَّحْمَٰنِ ٱلرَّحِيمِ,1,1,1:3
3,1,4,Sovereign of the Day of Recompense.,1,1,مَالِكِ يَوْمِ الدِّينِ,1,1,مَٰلِكِ يَوْمِ ٱلدِّينِ,1,1,1:4
4,1,5,It is You we worship and You we ask for help.,1,1,إِيَّاكَ نَعْبُدُ وَإِيَّاكَ نَسْتَعِينُ,1,1,إِيَّاكَ نَعْبُدُ وَإِيَّاكَ نَسْتَعِينُ,1,1,1:5


In [None]:
filtered_columns = [
    "surah_number", "verse_number", "juz_number", "ruku_number",
    "text_simple", "text_uthmani",  "text_english" ,"verse_key"
]
ayahs_filtered = ayahs_df[filtered_columns].copy()
ayahs_filtered.head()

Unnamed: 0,surah_number,verse_number,juz_number,ruku_number,text_simple,text_uthmani,text_english,verse_key
0,1,1,1,1,﻿بِسْمِ اللَّهِ الرَّحْمَٰنِ الرَّحِيمِ,﻿بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيمِ,"In the name of Allah, the Entirely Merciful, t...",1:1
1,1,2,1,1,الْحَمْدُ لِلَّهِ رَبِّ الْعَالَمِينَ,ٱلْحَمْدُ لِلَّهِ رَبِّ ٱلْعَٰلَمِينَ,"[All] praise is [due] to Allah, Lord of the wo...",1:2
2,1,3,1,1,الرَّحْمَٰنِ الرَّحِيمِ,ٱلرَّحْمَٰنِ ٱلرَّحِيمِ,"The Entirely Merciful, the Especially Merciful,",1:3
3,1,4,1,1,مَالِكِ يَوْمِ الدِّينِ,مَٰلِكِ يَوْمِ ٱلدِّينِ,Sovereign of the Day of Recompense.,1:4
4,1,5,1,1,إِيَّاكَ نَعْبُدُ وَإِيَّاكَ نَسْتَعِينُ,إِيَّاكَ نَعْبُدُ وَإِيَّاكَ نَسْتَعِينُ,It is You we worship and You we ask for help.,1:5


In [None]:
ayahs_filtered.rename(columns={"surah_number": "surah_id"}, inplace=True)
ayahs_filtered.to_csv("ayahs.csv", index=False, encoding="utf-8-sig")
print("CSV Created Successfully.")

CSV Created Successfully.
