In [2]:
import pandas as pd
import xml.etree.ElementTree as ET
from pathlib import Path

## Parse the Arabic Text

In [5]:
xml_quran_fp = Path('../data/quran-uthmani.xml')

In [11]:
xml_quran_root = ET.parse(xml_quran_fp).getroot()

In [111]:
columns = ['Surah Name', 'Surah Number', 'Verse Number', 'Verse Text']

df = pd.DataFrame(columns=columns)

In [112]:
for sura_element in xml_quran_root:
    sura_num = int(sura_element.attrib.get('index'))
    sura_name = sura_element.attrib.get('name')
    for verse_element in sura_element:
        verse_num = int(verse_element.get('index'))
        verse_text = verse_element.get('text')
        df = df.append(pd.Series([sura_name, sura_num, verse_num, verse_text],
                                 index=columns), ignore_index=True)



In [113]:
df.head()

Unnamed: 0,Surah Name,Surah Number,Verse Number,Verse Text
0,الفاتحة,1,1,بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيمِ
1,الفاتحة,1,2,ٱلْحَمْدُ لِلَّهِ رَبِّ ٱلْعَٰلَمِينَ
2,الفاتحة,1,3,ٱلرَّحْمَٰنِ ٱلرَّحِيمِ
3,الفاتحة,1,4,مَٰلِكِ يَوْمِ ٱلدِّينِ
4,الفاتحة,1,5,إِيَّاكَ نَعْبُدُ وَإِيَّاكَ نَسْتَعِينُ


In [114]:
df = df.set_index(['Surah Number', 'Verse Number'])

In [115]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Surah Name,Verse Text
Surah Number,Verse Number,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1,الفاتحة,بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيمِ
1,2,الفاتحة,ٱلْحَمْدُ لِلَّهِ رَبِّ ٱلْعَٰلَمِينَ
1,3,الفاتحة,ٱلرَّحْمَٰنِ ٱلرَّحِيمِ
1,4,الفاتحة,مَٰلِكِ يَوْمِ ٱلدِّينِ
1,5,الفاتحة,إِيَّاكَ نَعْبُدُ وَإِيَّاكَ نَسْتَعِينُ


In [116]:
arabic_df = df.copy()

In [117]:
arabic_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Surah Name,Verse Text
Surah Number,Verse Number,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1,الفاتحة,بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيمِ
1,2,الفاتحة,ٱلْحَمْدُ لِلَّهِ رَبِّ ٱلْعَٰلَمِينَ
1,3,الفاتحة,ٱلرَّحْمَٰنِ ٱلرَّحِيمِ
1,4,الفاتحة,مَٰلِكِ يَوْمِ ٱلدِّينِ
1,5,الفاتحة,إِيَّاكَ نَعْبُدُ وَإِيَّاكَ نَسْتَعِينُ


## Parse English translation

In [118]:
xml_trans_files = list(Path('../data/en_trans_xml_files').iterdir())

In [119]:
import numpy as np

In [125]:
for xml_en_file in xml_trans_files:
    trans_code = xml_en_file.name.strip('.xml').replace('.','_')
    print(trans_code)
    df[trans_code] = np.nan
    root = ET.parse(xml_en_file).getroot()
    for sura_element in root:
        sura_num = int(sura_element.attrib.get('index'))
        sura_name = sura_element.attrib.get('name')
        for verse_element in sura_element:
            verse_num = int(verse_element.get('index'))
            verse_text = verse_element.get('text')
            df[trans_code].loc[sura_num, verse_num] = verse_text

        

en_sahih
en_sarwar
en_shakir
en_daryabadi
en_ahmedraza
en_ahmedali
en_qaribullah
en_yusufali
en_wahiduddin
en_maududi
en_mubarakpuri
en_qarai
en_picktha
en_itani
en_arberry
en_hilali
en_transliteration


In [126]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Surah Name,Verse Text,en_sahih,en_sarwar,en_shakir,en_daryabadi,en_ahmedraza,en_ahmedali,en_qaribullah,en_yusufali,en_wahiduddin,en_maududi,en_mubarakpuri,en_qarai,en_picktha,en_itani,en_arberry,en_hilali,en_transliteration
Surah Number,Verse Number,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1,1,الفاتحة,بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيمِ,"In the name of Allah, the Entirely Merciful, t...","In the Name of Allah, the Beneficent, the Merc...","In the name of Allah, the Beneficent, the Merc...","In the name of Allah, the Compassionate, the M...",Allah - beginning with the name of - the Most ...,"In the name of Allah, most benevolent, ever-me...","In the Name of Allah, the Merciful, the Most M...","In the name of Allah, Most Gracious, Most Merc...","﻿In the name of God, the Most Gracious, the Mo...","In the name of Allah, the Merciful, the Compas...","In the Name of Allah, the Most Gracious, the M...","In the Name of Allah, the All-beneficent, the ...","In the name of Allah, the Beneficent, the Merc...","In the name of God, the Gracious, the Merciful.","In the Name of God, the Merciful, the Compassi...","In the Name of Allah, the Most Beneficent, the...",Bismi All<u>a</U>hi a<b>l</B>rra<u>h</U>m<u>a<...
1,2,الفاتحة,ٱلْحَمْدُ لِلَّهِ رَبِّ ٱلْعَٰلَمِينَ,"[All] praise is [due] to Allah, Lord of the wo...","All praise belongs to God, Lord of the Universe,","All praise is due to Allah, the Lord of the Wo...","All praise unto Allah, the Lord of all the wor...","All praise is to Allah, the Lord Of The Creation.","ALL PRAISE BE to Allah, Lord of all the worlds,","Praise be to Allah, Lord of the Worlds,","Praise be to Allah, the Cherisher and Sustaine...","All praise is due to God, the Lord of the Univ...","Praise be to Allah, the Lord of the entire uni...","Al-Hamd be to Allah, the Lord of all that exists.","All praise belongs to Allah, Lord of all the w...","Praise be to Allah, Lord of the Worlds,","Praise be to God, Lord of the Worlds.","Praise belongs to God, the Lord of all Being,","All the praises and thanks be to Allah, the Lo...",Al<u>h</U>amdu lill<u>a</U>hi rabbi alAA<u>a</...
1,3,الفاتحة,ٱلرَّحْمَٰنِ ٱلرَّحِيمِ,"The Entirely Merciful, the Especially Merciful,","the Beneficent, the Merciful","The Beneficent, the Merciful.","The Compassionate, the Merciful.","The Most Gracious, the Most Merciful","Most beneficent, ever-merciful,","the Merciful, the Most Merciful,","Most Gracious, Most Merciful;","the Beneficent, the Merciful;","The Merciful, the Compassionate","Ar-Rahman (the Most Gracious), Ar-Rahim (the M...","the All-beneficent, the All-merciful,","The Beneficent, the Merciful.","The Most Gracious, the Most Merciful.","the All-merciful, the All-compassionate,","The Most Beneficent, the Most Merciful.",A<b>l</B>rra<u>h</U>m<u>a</U>ni a<b>l</B>rra<u...
1,4,الفاتحة,مَٰلِكِ يَوْمِ ٱلدِّينِ,Sovereign of the Day of Recompense.,and Master of the Day of Judgment,Master of the Day of Judgment.,Sovereign of the Day of Requital.,Owner of the Day of Recompense,King of the Day of Judgement.,Owner of the Day of Recompense.,Master of the Day of Judgment.,Lord of the Day of Judgement.,The Master of the Day of Recompense.,The Owner of the Day of Recompense.,Master of the Day of Retribution.,"Master of the Day of Judgment,",Master of the Day of Judgment.,the Master of the Day of Doom.,The Only Owner (and the Only Ruling Judge) of ...,M<u>a</U>liki yawmi a<b>l</B>ddeen<b>i</b>
1,5,الفاتحة,إِيَّاكَ نَعْبُدُ وَإِيَّاكَ نَسْتَعِينُ,It is You we worship and You we ask for help.,"(Lord), You alone We do worship and from You a...",Thee do we serve and Thee do we beseech for help.,Thee alone do we worship and of Thee alone do ...,You alone we worship and from You alone we see...,"You alone we worship, and to You alone turn fo...",You (alone) we worship; and You (alone) we rel...,"Thee do we worship, and Thine aid we seek.","You alone we worship, and to You alone we turn...","You alone do we worship, and You alone do we t...","You we worship, and You we ask for help.","You [alone] do we worship, and to You [alone] ...",Thee (alone) we worship; Thee (alone) we ask f...,"It is You we worship, and upon You we call for...",Thee only we serve; to Thee alone we pray for ...,"You (Alone) we worship, and you (Alone) we ask...",Iyy<u>a</U>ka naAAbudu waiyy<u>a</U>ka nastaAA...


In [127]:
df.to_json('../data/translation.json')

In [128]:
df.to_pickle('../data/df_translation.pckl')