In [8]:
import json
import re

from io import BytesIO

import requests
from pyquery import PyQuery as pq
from tqdm import tqdm
from PIL import Image, ImageDraw

def decrypt_page(sim, data):
    dim = Image.new("RGB", (data["PageWidth"], data["PageHeight"]))

    s = 50  # Size of the square

    for p in data["Sub"]:

        sx = p['X1'] * (s + 16)
        sy = p['Y1'] * (s + 16)
        sbox = (sx, sy, sx + s, sy + s)

        dx = p['X2'] * s
        dy = p['Y2'] * s
        dbox = (dx, dy, dx + s, dy + s)

        dim.paste(im=sim.crop(sbox), box=dbox)

    return dim

# Extract page names and page ids from Rekhta Ebook page
def download_ebook(url):
    html = requests.get(url).content.decode("ascii", "ignore")
    bookname = pq(html)("bdi").text()

    BOOKID_RE = re.compile(r"var bookId = \"(.*)\";")
    bookid = BOOKID_RE.search(html).group(1)

    PAGE_RE = re.compile(r"var pages = \[(.*?)\];", re.DOTALL)
    pagenames = PAGE_RE.search(html).group(1).split()
    pagenames = [p[1:-1] for p in pagenames if p != ',']

    PAGEIDS_RE = re.compile(r"var pageIds = \[(.*?)\];", re.DOTALL)
    pageids = PAGEIDS_RE.search(html).group(1).split()
    pageids = [p[1:-1] for p in pageids if p != ',']

    print(f"Downloading {bookname}")

    # Download Images
    pages = []
    for pagename, pageid in tqdm(zip(pagenames, pageids)):
        pages.append(download_page(bookid, pagename, pageid))

    # Save as PDF
    pages[0].save(bookname + ".pdf", save_all=True, append_images=pages[1:])

def download_page(bookid, pagename, pageid):
    page_img_url = f"https://ebooksapi.rekhta.org/images/{bookid}/{pagename}"
    page_img = Image.open(BytesIO(requests.get(page_img_url).content))

    page_data_url = f"https://ebooksapi.rekhta.org/api_getebookpagebyid/?atky=pns&pgi={pageid}"
    page_data = requests.get(page_data_url).json()

    return decrypt_page(page_img, page_data)


URL = "https://www.rekhta.org/ebooks/armughan-e-hijaz-allama-iqbal-ebooks-9"
download_ebook(URL)
