In [2]:
import re

def get_episode_number(file_name: str) -> int:
	# Try to extract the episode number using a regular expression that matches the "SXXEYY" pattern
	pattern = r'(?<=\b[Ss]\d\d[Ee])(\d+)'
	match = re.search(pattern, file_name)
	if match:
		episode_number = int(match.group(0))
		return episode_number
	# If the episode number was not found using the "SXXEYY" pattern, try to extract it using a regular expression
	# that matches a sequence of digits
	pattern = r'\b\d+\b'
	match = re.search(pattern, file_name)
	if match:
		episode_number = int(match.group(0))
		return episode_number
	for i, c in enumerate(file_name):
		if c.isdigit():
			# If a digit is found, try to extract the episode number as a sequence of digits following it
			episode_number_str = ""
			for j in range(i, len(file_name)):
				if file_name[j].isdigit():
					episode_number_str += file_name[j]
				else:
					break
			return int(episode_number_str)
	return None

In [3]:
import os

data_dir = 'data'
anime = 'fma'
total_chapters = 108

ep_path = f"data/{anime}/episodes/"
ep_names = os.listdir(ep_path)

episodes = []
ready = []
frames = [f"ep{get_episode_number(ep)}" for ep in ep_names]
manga = [f"ch{i}" for i in range(1, total_chapters + 1)]
panels = manga

In [51]:
# main_dir = [episodes, frames, manga, panels, ready]			# Loading the list of sub-directories
main_dir = [manga]			# Loading the list of sub-directories
if not os.path.exists(data_dir):
	os.mkdir(data_dir)
root_dir = f"{data_dir}/{anime}"
# main_dir_names = ['episodes', 'frames', 'manga', 'panels', 'ready'] # Name of the sub-directories
main_dir_names = ['manga'] # Name of the sub-directories
def create_dir_struct():
	# Create directory
	for i in range(0, len(main_dir)):
		if len(main_dir[i]) == 0:
			if not os.path.exists(main_dir_names[i]):
				os.makedirs(f"{root_dir}/{main_dir_names[i]}")
		for j in range(0,len(main_dir[i])):
			dirName = f"{root_dir}/{main_dir_names[i]}/{main_dir[i][j]}"
			# Create target Directory if don't exist
			if not os.path.exists(dirName):
				os.makedirs(dirName)
		 
if __name__ == '__main__':
	create_dir_struct()

In [None]:
chapter_images[0]

In [None]:
import mangadex

api = mangadex.Api()
api.get_manga_list(title = "Fullmetal Alchemist")

In [None]:
import concurrent.futures
import urllib.request
import mangadex
import time

# Set the rate limit and time period
rate_limit = 300
time_period = 10 * 60 # 10 minutes in seconds

# Calculate the delay between requests based on the rate limit and time period
delay = time_period / rate_limit

api = mangadex.Api()
manga_id = api.get_manga_list(title = "Fullmetal Alchemist", limit=2)[1].manga_id

data_dir = 'data'
anime = 'fma'
total_chapters = 108

# Desired location to save the images
manga_folder = f"{data_dir}/{anime}/manga"

# Function to download an image given its URL
def download_image(url):
	# Get the image name and keep only the part after 'x'
	image_name = url.split('/')[-1].split('-')[0]
	# Get the file extension
	file_extension = url.split('.')[-1]
	# Download the image and save it to the desired location
	urllib.request.urlretrieve(url, f"{save_location}/{image_name}.{file_extension}")

# Loop through the chapters
for chapter in range(1, total_chapters + 1):
	# Get the list of image URLs for the current chapter
	chapter_images = api.chapter_list(manga=manga_id, chapter=str(chapter), translatedLanguage=['en'], limit=2)[0].fetch_chapter_images()

	# Set the save location for the current chapter
	save_location = f"{manga_folder}/ch{chapter}"
	
	# Create a thread pool with 32 worker threads
	with concurrent.futures.ThreadPoolExecutor(max_workers=32) as executor:
		# Use the map method of the thread pool to download the images concurrently
		executor.map(download_image, chapter_images)

	time.sleep(delay)

In [None]:
# most succesfull way to download the manga
import asyncio
import aiohttp
import mangadex
import os
from PIL import Image
from datetime import timedelta

def is_image_valid(image_path):
	try:
		# Open the image and check if it is valid
		with Image.open(image_path) as img:
			img.verify()
	except Exception:
		# If the image is invalid, return False
		return False
	else:
		# If the image is valid, return True
		return True

# Set the rate limit and time period
rate_limit = 300
time_period = 10 * 60 # 10 minutes in seconds

# Calculate the delay between requests based on the rate limit and time period
delay = time_period / rate_limit

api = mangadex.Api()
manga_id = api.get_manga_list(title = "Fullmetal Alchemist", limit=2)[1].manga_id

data_dir = 'data'
anime = 'fma'
total_chapters = 108

# Desired location to save the images
manga_folder = f"{data_dir}/{anime}/manga"


# Function to download an image given its URL
async def download_image(url, save_location, semaphore):
	# Acquire the semaphore to limit the number of concurrent tasks
	async with semaphore:
		# Get the image name and keep only the part after 'x'
		image_name = url.split('/')[-1].split('-')[0]
		# Get the file extension
		file_extension = url.split('.')[-1]
		timeout = aiohttp.ClientTimeout(
			sock_connect=timedelta(seconds=10).total_seconds(),
			sock_read=timedelta(seconds=15).total_seconds(),
			total=timedelta(seconds=20).total_seconds()
		)
		async with aiohttp.ClientSession(timeout=timeout, connector=aiohttp.TCPConnector(limit=10)) as session:
			try:
				async with session.get(url) as resp:
					if resp.status == 200:
						image_data = await resp.read()
						# Save the image to a file
						image_path = f"{save_location}/{image_name}.{file_extension}"
						with open(image_path, 'wb') as f:
							f.write(image_data)
						# Check if the image is valid
						if not is_image_valid(image_path):
							# If the image is invalid, remove it and return the URL and chapter number
							os.remove(image_path)
							return url, chapter
			except asyncio.TimeoutError:
				# If a TimeoutError occurs, retry the request
				print("TimeoutError occured")
				return await download_image(url, save_location, semaphore)

# Create a semaphore with a limit of 10 concurrent tasks
semaphore = asyncio.Semaphore(10)

# List to store the URLs and chapter numbers of invalid images
invalid_images = []

# Loop through the chapters
for chapter in range(1, total_chapters + 1):
	# Get the list of image URLs for the current chapter
	chapter_images = api.chapter_list(manga=manga_id, chapter=str(chapter), translatedLanguage=['en'], limit=2)[0].fetch_chapter_images()
	# Set the save location for the current chapter
	save_location = f"{manga_folder}/ch{chapter}"

	# Use asyncio.gather to download the images concurrently
	results = await asyncio.gather(*[download_image(url, save_location, semaphore) for url in chapter_images], return_exceptions=True)

	# Add the invalid images to the list
	invalid_images.extend([result for result in results if result is not None])

	await asyncio.sleep(delay)

# 9.34m, 6 timeouts

In [54]:
invalid_images

[]

In [None]:
import cv2

# Load the image
image = cv2.imread('data/fma/manga/ch49/x6.png')

# Check if the image is greyscale
if len(image.shape) == 2:
    print("The image is greyscale.")
else:
    print("The image is not greyscale.")