In [10]:
import arabic_reshaper
from bidi.algorithm import get_display
import PIL.Image, PIL.ImageFont, PIL.ImageDraw
import pyquran as q
from config import arabic_reshaper_config
import re 
import requests
import numpy as np
import cv2
def remove_Quranic_stops(text: str) -> str:
    Quranic_stops = re.compile(
        '['
        '\u06d6-\u06ef'
        ']',

        re.UNICODE | re.X
    )

    return re.sub(Quranic_stops, "", text)

def remove_tashkeel(text: str) -> str:
    HARAKAT_RE = re.compile(
        '['
        '\u0610-\u061a'
        '\u064b-\u065f'
        '\u0670'
        '\u06d6-\u06dc'
        '\u06df-\u06e8'
        '\u06ea-\u06ed'
        '\u08d4-\u08e1'
        '\u08d4-\u08ed'
        '\u08e3-\u08ff'
        ']',

        re.UNICODE | re.X
    )

    return re.sub(HARAKAT_RE, "", text)

def binarize(img: np.ndarray) -> np.ndarray:
    threshold, output_img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    avg_corners = np.average([*output_img[0, :], *output_img[-1, :], *output_img[:, 0], *output_img[:, -1]])

    if avg_corners > (255 / 2) and cv2.countNonZero(output_img) / img.size >= 0.5:
        output_img = 255 - output_img

    return output_img


def auto_crop(binary_image: np.ndarray) -> np.ndarray:
    all_points = cv2.findNonZero(binary_image)
    x, y, w, h = cv2.boundingRect(all_points)
    height, width = binary_image.shape
    border = 50
    left = max(0, x - border)
    right = min(width, x + w + border)
    top = max(0, y - border)
    bottom = min(height, y + h + border)
    return binary_image[top:bottom, left:right]


def preprocess(img: np.ndarray) -> np.ndarray:
    img = binarize(img)
    img = auto_crop(img)
    return img

    
i = 0
reshaper = arabic_reshaper.ArabicReshaper(configuration=arabic_reshaper_config)
for surah in range(50, 115):
    surah = str(surah).zfill(3)
    text = requests.get(f"https://www.quranful.com/text/ar-allah/{surah}.txt").text

    for aya_index, aya in enumerate(text.split('\n')):
        if len(aya) >= 50 or aya.strip() == '':
            continue

        text_to_be_reshaped = aya
        for font_name in ["rekaa"]:
            if font_name in ["kufi"]:
                text_to_be_reshaped = remove_tashkeel(text_to_be_reshaped)
            text_to_be_reshaped = remove_Quranic_stops(aya)
            reshaped_text = reshaper.reshape(text_to_be_reshaped)

            rev_text = reshaped_text[::-1]  # slice backwards

            font = PIL.ImageFont.truetype(f"{font_name}.ttf", 100)
            img = PIL.Image.new("L", (2500, 400), color=0)
            draw = PIL.ImageDraw.Draw(img)

            draw.text((50, 50), rev_text, fill=255, font=font)
            draw = PIL.ImageDraw.Draw(img)

            img = preprocess(np.array(img))
            cv2.imwrite("rekaa/image"+str(i)+".jpg", img)
            i+=1




