In [None]:
# remove csv files from tafsir folder
import os
from pathlib import Path

base_folder = Path('tafsir')
for auth_folder in base_folder.iterdir():
    for csv_file in auth_folder.glob("*.csv"):
        os.remove(csv_file)

In [None]:
# Extract tafsir text from json file and save it to a text file
import os
import json

base_dir = "tafsir"

for author in os.listdir(base_dir):
    author_dir = os.path.join(base_dir, author)
    if not os.path.isdir(author_dir):
        continue
    for filename in os.listdir(author_dir):
        if filename.endswith(".json"):
            surah_number = filename.replace(".json", "")
            json_path = os.path.join(author_dir, filename)
            with open(json_path, "r", encoding="utf-8") as f:
                data = json.load(f)
            for item in data:
                ayah_number = item["ayah_number"]
                txt_path = os.path.join(author_dir, f"{surah_number}_{ayah_number}.txt")
                tafsir_text = item.pop("tafsir_text", "")
                if tafsir_text:
                    with open(txt_path, "w", encoding="utf-8") as txt_file:
                        txt_file.write(tafsir_text)
                    with open(json_path, "w", encoding="utf-8") as f:
                        json.dump(data, f, ensure_ascii=False, indent=2)

In [None]:
# split long text by words then create chunks of acceptable characters per api request
with open("000 - INTRODUCTION TO AL-QURAN-01_chunk_01.txt", "r", encoding="utf-8") as f:
    text = f.read()

words = text.split()
print(f"Total characters: {len(text)}")
print(f"Total words: {len(words)}")

sentences = []
i = 0
while i < len(words):
    sentence = []
    while i < len(words) and len(" ".join(sentence + words[i:i+1])) <= 3000:
        sentence.append(words[i])
        i += 1
    sentences.append(" ".join(sentence))

print(f"Total sentences: {len(sentences)}")

for i, sentence in enumerate(sentences):
    print(f"Sentence {i+1}: {sentence[:50]}... ({len(sentence)} characters)")


In [None]:
# Translate Urdu Tafsir to English (single file)
from translate import TafsirTranslator

translator = TafsirTranslator()
result = translator.translate_from_file("001 - SURAH AL-FATIHA_01.txt", "001 - SURAH AL-FATIHA_01_en.txt", 'ur')

In [None]:
# Translate Urdu Tafsir to English (batch processing)
from translate import TafsirTranslator

translator = TafsirTranslator()
result = translator.batch_translate_files("lectures/", "lectures_translated/", 'ur')        # English
result = translator.batch_translate_files("lectures/", "lectures_translated/", 'ur', 'fr')  # French
result = translator.batch_translate_files("lectures/", "lectures_translated/", 'ur', 'de')  # German
result = translator.batch_translate_files("lectures/", "lectures_translated/", 'ur', 'es')  # Spanish
result = translator.batch_translate_files("lectures/", "lectures_translated/", 'ur', 'ar')  # Arabic

In [2]:
# Translate Arabic Tafsir to English (single file)
from translate import TafsirTranslator

translator = TafsirTranslator()
result = translator.translate_from_file("surah1-ayat1.txt", "surah1-ayat1_en.txt", 'ar')

2025-07-05 13:13:25,443 - INFO - Multi-language Tafsir Translator initialized
2025-07-05 13:13:25,449 - INFO - Read 63834 characters from surah1-ayat1.txt
2025-07-05 13:13:25,451 - INFO - Starting tafsir translation with automatic language detection...
2025-07-05 13:13:25,452 - INFO - Source language: Arabic (ar)
2025-07-05 13:13:25,787 - INFO - Text split into 22 chunks
2025-07-05 13:13:25,789 - INFO - Translating chunk 1/22...
2025-07-05 13:13:28,191 - INFO - Translating chunk 2/22...
2025-07-05 13:13:31,705 - INFO - Translating chunk 3/22...
2025-07-05 13:13:34,666 - INFO - Translating chunk 4/22...
2025-07-05 13:13:37,540 - INFO - Translating chunk 5/22...
2025-07-05 13:13:39,939 - INFO - Translating chunk 6/22...
2025-07-05 13:13:43,268 - INFO - Translating chunk 7/22...
2025-07-05 13:13:45,728 - INFO - Translating chunk 8/22...
2025-07-05 13:13:48,497 - INFO - Translating chunk 9/22...
2025-07-05 13:13:51,303 - INFO - Translating chunk 10/22...
2025-07-05 13:13:54,645 - INFO - Tr

In [None]:
# Translate Arabic Tafsir to English (batch processing)
from translate import TafsirTranslator

translator = TafsirTranslator()
result = translator.batch_translate_files("tafsir/", "tafsir_translated/", 'ar')        # English