In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.select import Select
from selenium.common.exceptions import NoSuchElementException
import requests
import os
from PIL import Image
import re
import shutil
import ctypes
from tenacity import retry, stop_after_delay, wait_fixed, retry_if_exception_type, RetryError

In [None]:
# possible optimizations?
# multithreading?
# async functions to get the images from each issue

In [None]:
# Paste the URL of a comic series from:
# https://readcomiconline.li/
url = "".rstrip("/")
parsed_url = url.split("/")
comic_series = parsed_url[-1]

In [None]:
@retry(
        wait=wait_fixed(20), 
        stop=stop_after_delay(180), 
        retry=retry_if_exception_type(NoSuchElementException)
      )
def get_anchor_elements(driver):
    table_element = driver.find_element(By.CLASS_NAME, "listing")
    anchor_elements = table_element.find_elements(By.TAG_NAME, "a")
    return anchor_elements

In [None]:
options = webdriver.FirefoxOptions()
options = options.add_argument("-headless")
driver = webdriver.Firefox(options=options)
driver.get(url)
# Get all issues URLs in the series
try:
    anchor_elements = get_anchor_elements(driver)
except RetryError:
    print("Captcha time limit exceeded")
else:
    # Once the captcha is solved, continue with the rest of the process
    issues = [{elem.text: elem.get_attribute("href")} for elem in anchor_elements]
    issues.reverse()
driver.quit()

In [None]:
comic_series_folder = f"{comic_series}"
try:
    os.makedirs(comic_series_folder)
    print(f"Directory '{comic_series_folder}' created successfully.")
    comic_series_path = os.path.join(comic_series_folder)
except OSError as e:
    comic_series_path = comic_series_folder
    print(f"Error creating directory '{comic_series_folder}': {e}")

In [None]:
@retry(
        stop=stop_after_delay(10)
      )
def get_image(img_url):
    return requests.get(img_url)

In [None]:
@retry(
        wait=wait_fixed(20), 
        stop=stop_after_delay(180), 
        retry=retry_if_exception_type(NoSuchElementException)
      )
def get_image_elements(driver):
    select_element_page = driver.find_element(By.ID, "selectReadType")
    select = Select(select_element_page)
    select.select_by_value("1")
    div_image = driver.find_element(By.ID, "divImage")
    img_elements = div_image.find_elements(By.TAG_NAME, "img")
    return img_elements


In [None]:
for issue in issues[:]:

    for title, url in issue.items():

        pattern = r'[<>:"\/|?*]'
        # Use re.sub() to replace all occurrences of the special characters with an empty string
        title = re.sub(pattern, '', title)
        if not os.path.exists(f"{comic_series_folder}/{title}.pdf"):
            issue_folder = os.path.join(comic_series_path, title)
            images_folder = os.path.join(issue_folder, "images")
            try:
                os.makedirs(images_folder)
                print(f"Directories '{issue_folder}' and '{images_folder}' created successfully.")
            except OSError as e:
                print(f"Comic series folder already exists.")

            print("Trying", title)

            options = webdriver.FirefoxOptions()
            options = options.add_argument("-headless")
            driver = webdriver.Firefox(options=options)
            driver.get(url)

            try:
                img_elements = get_image_elements(driver)
            except RetryError:
                print("Captcha time limit exceeded")
            else:
                # Once the captcha is solved, continue with the rest of the process
                img_srcs = [elem.get_attribute("src") for elem in img_elements]
                print("Images found:", len(img_srcs))
            driver.quit()

            image_files = []
            i = 1
            successful_img_downloads = 0
            for img_url in img_srcs:
                try:
                    response = get_image(img_url)
                except:
                    print("Error: Image", i, "of", len(img_srcs))
                    print(img_url)
                    pil_image = Image.open("no-image-placeholder.jpg")
                else:
                    image_name = f"image_{i}.jpg"
                    with open(os.path.join(images_folder, image_name), "wb") as image:
                        image_download = response.content
                        image.write(image_download)
                    pil_image = Image.open(os.path.join(images_folder, image_name))
                    successful_img_downloads += 1
                finally:
                    image_files.append(pil_image)
                i += 1
            print("Total successful image downloads", successful_img_downloads, "out of", len(img_srcs))

            image_files[0].save(f"{comic_series_folder}/{title}.pdf", save_all=True, append_images=image_files[1:])
            try:
                shutil.rmtree(issue_folder)
                print(f"Folder '{issue_folder}' successfully deleted.")

            except OSError as e:
                print(f"Error: {issue_folder} : {e.strerror}")

            print("Successful", title)

ctypes.windll.user32.MessageBoxW(None, f"Completed {comic_series}", "Success!", 0)