# Data Generator

## (0) Library Import

In [7]:
# Image Captcha
from __future__ import annotations
import os
import typing as t
import random
from PIL.Image import new as createImage, Image, QUAD, BILINEAR
from PIL.ImageDraw import Draw, ImageDraw
from PIL.ImageFilter import SMOOTH
from PIL.ImageFont import FreeTypeFont, truetype
from io import BytesIO

# Image Captcha Testing
import string
import matplotlib.pyplot as plt

# Image Captcha EEclass
import requests
from PIL import Image
from io import BytesIO
import numpy as np
import cv2

# Yolo Bounding Creator
import pandas as pd
import tqdm
import torch
import sys
current_dir = os.getcwd() # Get the current directory
sys.path.append(current_dir) # Add the directory where the module is located

# from pathlib import Path
from yolov9_2.utils.general import non_max_suppression, scale_boxes
from yolov9_2.models.common import DetectMultiBackend
# from yolov9_2.utils.dataloaders import LoadImages
from yolov9_2.utils.general import check_img_size
from yolov9_2.utils.torch_utils import select_device
from yolov9_2.utils.augmentations import (Albumentations, augment_hsv, classify_albumentations, classify_transforms, copy_paste, letterbox, mixup, random_perspective)
from yolov9_2.utils.plots import Annotator, colors

## (1) Self-made ImageCaptcha

### (1.1) ImageCaptcha Generator

In [8]:
__all__ = ['ImageCaptcha']
ColorTuple = t.Union[t.Tuple[int, int, int], t.Tuple[int, int, int, int]]

DATA_DIR = os.getcwd()
DEFAULT_FONTS = [os.path.join(DATA_DIR, 'DroidSansMono.ttf')] # Default font


class ImageCaptcha:
    """Create an image CAPTCHA.

    Many of the codes are borrowed from wheezy.captcha, with a modification
    for memory and developer friendly.

    ImageCaptcha has one built-in font, DroidSansMono, which is licensed under
    Apache License 2. You should always use your own fonts::

        captcha = ImageCaptcha(fonts=['/path/to/A.ttf', '/path/to/B.ttf'])

    You can put as many fonts as you like. But be aware of your memory, all of
    the fonts are loaded into your memory, so keep them a lot, but not too
    many.

    :param width: The width of the CAPTCHA image.
    :param height: The height of the CAPTCHA image.
    :param fonts: Fonts to be used to generate CAPTCHA images.
    :param font_sizes: Random choose a font size from this parameters.
    """
    lookup_table: list[int] = [int(i * 1.97) for i in range(256)]
    character_offset_dx: tuple[int, int] = (0, 4)
    character_offset_dy: tuple[int, int] = (0, 6)
    character_rotate: tuple[int, int] = (-30, 30)
    character_warp_dx: tuple[float, float] = (0.1, 0.3)
    character_warp_dy: tuple[float, float] = (0.2, 0.3)
    word_space_probability: float = 0.5
    word_offset_dx: float = 0.25

    def __init__(
            self,
            width: int = 160,
            height: int = 60,
            fonts: list[str] | None = None,
            font_sizes: tuple[int, ...] | None = None,
            space_random: bool = False,
            space: int = -1,
            dot_width: int = 3,
            dot_number: int = 30,
            curve_number: int = 4,
            curve_width: int = 2,
            curve_length: int = 10):
        self._width = width
        self._height = height
        self._fonts = fonts or DEFAULT_FONTS
        self._font_sizes = font_sizes or (42, 50, 56)
        self._truefonts: list[FreeTypeFont] = []
        self._space: int = space
        self._dot_width = dot_width
        self._dot_number = dot_number
        self._curve_number = curve_number
        self._curve_width = curve_width
        self._curve_length = curve_length
        self._space_random = space_random

    @property
    def truefonts(self) -> list[FreeTypeFont]:
        if self._truefonts:
            return self._truefonts
        self._truefonts = [
            truetype(n, s)
            for n in self._fonts
            for s in self._font_sizes
        ] # Load all the fonts
        return self._truefonts

    @staticmethod
    def create_noise_curve(image: Image, color: ColorTuple, width: int, number: int) -> Image:
        w, h = image.size
        # Method 1: Draw a curve with a random color
        # for _ in range(number):
        #     x1 = random.randint(0, int(w / 5)) # x1 is between 0 and 1/5 of the width
        #     x2 = random.randint(w - int(w / 5), w) # x2 is between 4/5 and 1 of the width
        #     y1 = random.randint(int(h / 5), h - int(h / 5))
        #     y2 = random.randint(y1, h - int(h / 5))
        #     points = [x1, y1, x2, y2]
        #     end = random.randint(160, 200) # end is between 160 and 200
        #     start = random.randint(0, 20) # start is between 0 and 20
        #     # Draw(image).arc(points, start, end, fill=color, width=width)
        #     Draw(image).arc(points, start, end, fill=color)
            
        # Method 2: Draw a curve with a random color
        for _ in range(number):
            x1 = random.randint(0, w)
            y1 = random.randint(0, h)
            x2 = random.randint(x1, w) # x2 must be greater than x1
            y2 = random.randint(y1, h) # y2 must be greater than y1
            start = random.randint(160, 200)
            end = random.randint(0, 20)
            Draw(image).arc((x1, y1, x2, y2), start, end, fill=color, width=width)
        return image

    @staticmethod
    def create_noise_dots(
            image: Image,
            color: ColorTuple,
            width: int = 3,
            number: int = 30) -> Image:
        draw = Draw(image)
        w, h = image.size
        while number:
            x1 = random.randint(0, w)
            y1 = random.randint(0, h)
            draw.line(((x1, y1), (x1 - 1, y1 - 1)), fill=color, width=width)
            number -= 1
        return image

    def _draw_character(
            self,
            c: str,
            draw: ImageDraw,
            color: ColorTuple) -> Image:
        font = random.choice(self.truefonts)
        _, _, w, h = draw.multiline_textbbox((1, 1), c, font=font)

        dx1 = random.randint(*self.character_offset_dx) # Random offset dx to the left
        dy1 = random.randint(*self.character_offset_dy) # Random offset dy to the top
        im = createImage('RGBA', (w + dx1, h + dy1))
        Draw(im).text((dx1, dy1), c, font=font, fill=color)

        # rotate
        im = im.crop(im.getbbox())
        im = im.rotate(
            random.uniform(*self.character_rotate),
            BILINEAR,
            expand=True,
        )

        # warp
        dx2 = w * random.uniform(*self.character_warp_dx)
        dy2 = h * random.uniform(*self.character_warp_dy)
        x1 = int(random.uniform(-dx2, dx2))
        y1 = int(random.uniform(-dy2, dy2))
        x2 = int(random.uniform(-dx2, dx2))
        y2 = int(random.uniform(-dy2, dy2))
        w2 = w + abs(x1) + abs(x2)
        h2 = h + abs(y1) + abs(y2)
        data = (
            x1, y1,
            -x1, h2 - y2,
            w2 + x2, h2 + y2,
            w2 - x2, -y1,
        )
        im = im.resize((w2, h2))
        im = im.transform((w, h), QUAD, data)
        return im
    
    def create_empty_image(self, width: int, height: int, background: ColorTuple) -> Image:
        image = createImage('RGB', (width, height), background)
        return image

    def create_captcha_image(
            self,
            chars: str,
            color: ColorTuple,
            image: Image) -> Image:
        """Create the CAPTCHA image itself.

        :param chars: text to be generated.
        :param color: color of the text.
        :param background: color of the background.

        The color should be a tuple of 3 numbers, such as (0, 255, 255).
        """
        # image = createImage('RGB', (self._width, self._height), background)
        draw = Draw(image)

        images: list[Image] = []
        for c in chars:
            # If the attribute space is set, add a space to the character which it width = attribute space
            if self._space_random == False:
                if self._space != 0:
                    for _ in range(self._space):
                        images.append(self._draw_character(" ", draw, color))
                elif self._space == 0: # No space
                    images.append(self._draw_character(" ", draw, color))
                else: # Random space: 50% chance to add space
                    if random.random() > self.word_space_probability:
                        images.append(self._draw_character(" ", draw, color))
            elif self._space_random == True:
                space_length = random.randint(0, 2) # Random space length between 0 and 2
                for _ in range(space_length):
                    images.append(self._draw_character(" ", draw, color))
                    
            images.append(self._draw_character(c, draw, color))

        text_width = sum([im.size[0] for im in images])

        width = max(text_width, self._width)
        image = image.resize((width, self._height))

        average = int(text_width / len(chars))
        rand = int(self.word_offset_dx * average)
        offset = int(average * 0.1)

        for im in images:
            w, h = im.size
            mask = im.convert('L').point(self.lookup_table)
            image.paste(im, (offset, int((self._height - h) / 2)), mask)
            offset = offset + w + random.randint(-rand, 0)

        if width > self._width:
            image = image.resize((self._width, self._height))

        return image

    def generate_image(self, chars: str) -> Image:
        """Generate the image of the given characters.

        :param chars: text to be generated.
        """
        background = random_color(238, 255)
        color = random_color(20, 230, random.randint(220, 255))
        # color = random_color(10, 200, 255)
        im = self.create_empty_image(self._width, self._height, background)
        im = self.create_noise_dots(im, color, self._dot_width, self._dot_number)
        im = self.create_noise_curve(im, color, self._curve_width, self._curve_number)
        im = self.create_captcha_image(chars, color, im)
        im = im.filter(SMOOTH) # Smooth the images
        return im

    def generate(self, chars: str, format: str = 'png') -> BytesIO:
        """Generate an Image Captcha of the given characters.

        :param chars: text to be generated.
        :param format: image file format
        """
        im = self.generate_image(chars)
        out = BytesIO()
        im.save(out, format=format)
        out.seek(0)
        return out

    def write(self, chars: str, output: str, format: str = 'png') -> None:
        """Generate and write an image CAPTCHA data to the output.

        :param chars: text to be generated.
        :param output: output destination.
        :param format: image file format
        """
        im = self.generate_image(chars)
        im.save(output, format=format)


def random_color(
        start: int,
        end: int,
        opacity: int | None = None) -> ColorTuple:
    red = random.randint(start, end)
    green = random.randint(start, end)
    blue = random.randint(start, end)
    if opacity is None:
        return red, green, blue
    return red, green, blue, opacity


### (1.2) ImageCaptcha Testing

In [9]:

# 生成隨機字符串
def generate_random_string(length=5):
    characters = string.ascii_letters + string.digits  # 包含大小寫字母和數字
    captcha_text = ''.join(random.choices(characters, k=length))
    return captcha_text

def generate_captcha_image(captcha_text, save_path='captcha_sample.png', width=280, height=90, font_sizes=(60,), space_random=True, space=2 ,dot_width=4, dot_number=30, curve_number=4):
    image = ImageCaptcha(width=width, height=height, font_sizes=font_sizes, dot_width=dot_width, dot_number=dot_number, curve_number=curve_number, space_random=space_random, space=space)
    captcha_image = image.generate_image(captcha_text)  # 生成圖片
    captcha_image.save(save_path)  # 保存圖片
    return captcha_image

# 生成隨機 CAPTCHA 字符串
# captcha_text = generate_random_string() 
# 固定 CAPTCHA 字符串
captcha_text = '1234'
# 生成並保存 CAPTCHA 圖像
captcha_image = generate_captcha_image(captcha_text)

# 顯示生成的 CAPTCHA 圖像
plt.imshow(captcha_image)
plt.axis('off')  # 不顯示軸
plt.title(f'CAPTCHA: {captcha_text}')
plt.show()

# Save the image
captcha_image.save('captcha_sample.png')

## (2) Image Captcha from EEclass

In [10]:
def getImage():
# 验证码图片的URL
    captcha_url = "https://oauth.ccxp.nthu.edu.tw/v1.1/captchaimg.php?id=800f1c6ad132e45fb2a9ea7ceedf3c37bd028bb3"

    # 使用requests获取图片
    response = requests.get(captcha_url)

    # 检查请求是否成功
    if response.status_code == 200:
        # 将图片保存到本地
        with open("captcha_image.png", "wb") as f:
            f.write(response.content)

        # 显示验证码图片（可选）
        # image = Image.open(BytesIO(response.content))
        img = cv2.imread("captcha_image.png")
        # cv2.imshow("captcha_image", img)
        return img
    else:
        print(f"Failed to retrieve captcha image. Status code: {response.status_code}")

img = getImage()
plt.imshow(img)
plt.axis('off')  # 不顯示軸
plt.title(f'CAPTCHA EECLASS')
plt.show()

## (3) Data Processing

### (3.1) Data Preprocess Function

In [11]:
def repair_gray(frame):
    # 将图像转换为灰度
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    # gray = frame

    # 进行二值化处理，将数字和背景区分开
    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    # 使用形态学操作去除小噪声
    # Erode
    kernel = np.ones((2, 2), np.uint8)
    cleaned = cv2.erode(binary, kernel, iterations=1)
    # Dilate
    kernel = np.ones((2, 2), np.uint8)
    cleaned = cv2.dilate(cleaned, kernel, iterations=2)

    # Remove lines
    kernel = np.ones((2, 2), np.uint8)
    cleaned = cv2.erode(cleaned, kernel, iterations=3)

    # 反转图像，使数字为白色，背景为黑色
    mask = cv2.bitwise_not(cleaned)
    
    # 检查生成的掩码
    cv2.imwrite("cleaned_mask.jpg", mask)

    # 使用掩码进行修复
    # dst = cv2.inpaint(frame, mask, 18, cv2.INPAINT_TELEA)

    # 保存修复后的图像
    # cv2.imwrite("repaired_image.png", dst)
    dst = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
    return dst

def to_black_white(image, threshold=210):
    '''
        Convert image to black and white
        
        Args:
            image: cv2 image
        
        Returns:
            binary_image: binary black and white image
    '''
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary_image = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    binary_image = cv2.bitwise_not(binary_image)
    return binary_image

def segment_using_column_scan(image, captcha_text, print_msg=False):
    '''
        Segment characters using column scanning method
        
        Args:
            image: cv2 image (binary black and white)
        
        Returns:
            character_images: list of segmented character images
    '''
    captcha_text_length = len(captcha_text)
    if print_msg:
        print(f'CAPTCHA text length: {captcha_text_length}')

    # Convert image to binary
    binary_image = image.copy()
    # Reverse the color from background white to black, character black to white
    # White: 255, Black: 0
    binary_image = cv2.bitwise_not(binary_image)

    # Get height and width of image
    height, width = binary_image.shape
    if print_msg:
        print(f'height: {height}, width: {width}')
    
    # Initialize variables
    char_start = None
    character_images = []
    
    # 扫描每一列
    for i, col in enumerate(range(width)):
        column_sum = np.sum(binary_image[:, col])
        # print(f'column {i+1} sum: {column_sum}')
        # white: 0, black: 255
        if column_sum > 255 * 1 and char_start is None:
            char_start = col
        # if column sum < 50, it may be end of character
        elif column_sum < 255 * 1 and char_start is not None:

            # That might be connected characters, so we need to use the threshold to split them
            num_col = col - char_start
            threshold = 50
            average_col = num_col / threshold    
            overlapping_col = 0

            if average_col <= 1:
                # Split character directly
                char_image = binary_image[:, char_start:col]

                # Calculate the width of the character
                char_width = col - char_start
                # If the width of the character is too small, it may be noise
                if char_width > 10:
                    character_images.append(cv2.bitwise_not(char_image))
            else:
                average_col = int(average_col)
                # Split connected characters
                for j in range(average_col):
                    # Split character using threshold & add some overlapping
                    if j == 0:
                        char_image = binary_image[:, char_start + j * threshold : char_start + (j + 1) * threshold + overlapping_col]
                    else:
                        char_image = binary_image[:, char_start + j * threshold : char_start + (j + 1) * threshold + overlapping_col]
                    character_images.append(cv2.bitwise_not(char_image))

            char_start = None


    # 检查最后一个字符是否未完成
    if char_start is not None:
        char_image = binary_image[:, char_start:width]
        character_images.append(cv2.bitwise_not(char_image))

    if len(character_images) != captcha_text_length:
        if print_msg:
            print(f'Warning: Number of segmented characters is not equal to CAPTCHA text length')

        if len(character_images) > captcha_text_length:
            # Remove characters which has minimum width until the number of characters is equal to CAPTCHA text length

            # # Build a list of characters with their width
            # character_widths = [(i, character.shape[1]) for i, character in enumerate(character_images)]
            # # Sort characters based on width
            # character_widths = sorted(character_widths, key=lambda x: x[1])
            # # Get index of characters with minimum width
            # min_width_char_indices = [index for index, _ in character_widths[:len(character_images) - captcha_text_length]]
            # # Remove characters with minimum width
            # character_images = [char for i, char in enumerate(character_images) if i not in min_width_char_indices]
            character_images = []   
        elif len(character_images) < captcha_text_length:
            # Separate the character which has maximum width into two characters until the number of characters is equal to CAPTCHA text length
            
            # Build a list of characters with their width
            # character_widths = [(i, character.shape[1]) for i, character in enumerate(character_images)]
            # # Sort characters based on width
            # character_widths = sorted(character_widths, key=lambda x: x[1], reverse=True)
            # # Count the number of characters to be added
            # num_chars_to_add = captcha_text_length - len(character_images)
            # for i in range(num_chars_to_add):
            #     # Get the character with maximum width
            #     max_width_char_index = character_widths[i][0]
            #     # Get the character
            #     max_width_char = character_images[max_width_char_index]
            #     # Split the character into two characters
            #     split_col = max_width_char.shape[1] // 2
            #     char1 = max_width_char[:, :split_col]
            #     char2 = max_width_char[:, split_col:]
            #     # Remove the character with maximum width
            #     character_images.pop(max_width_char_index)
            #     # Add the two split characters to the position of the removed character
            #     character_images.insert(max_width_char_index, char1)
            #     character_images.insert(max_width_char_index + 1, char2)
            character_images = [] # Failed to segment characters

    return character_images

def preprocess_captcha_image(image_path, captcha_text, print_img=False):
    '''
        Preprocess CAPTCHA image and segment characters

        Args:
            image_path: str, path to the image file
        
        Returns:
            characters: list, list of segmented characters in image
    '''
    # Load the image
    image = cv2.imread(image_path)
    # Display the image
    if print_img:
        plt.imshow(image)
        plt.axis('off')
        plt.title('Original CAPTCHA Image')
        plt.show()

    # Repair gray
    preprocessed_image = repair_gray(image)
    if print_img:
        plt.imshow(preprocessed_image, cmap='gray')
        plt.axis('off')
        plt.title('After open CAPTCHA Image')
        plt.show()

    # Save the preprocessed image
    # cv2.imwrite('preprocessed_image.png', preprocessed_image)

    # Column scan method to segment characters
    # Turn the image into binary format
    preprocessed_image = cv2.cvtColor(preprocessed_image, cv2.COLOR_BGR2GRAY)
    # characters = segment_using_column_scan(preprocessed_image, captcha_text, print_msg=print_img)
    characters = []
    return characters, preprocessed_image

### (3.2) Data Preprocess Testing

In [14]:
# Image Captcha
# Generate a random CAPTCHA text
# captcha_text = generate_random_string(4)
# captcha_text = '1234'
# # Generate and save a CAPTCHA image
# captcha_image = generate_captcha_image(captcha_text, 'captcha_sample.png', font_sizes=(60, 56, 48), space_random=True, dot_width=10, dot_number=50, curve_number=6)
# # Display the CAPTCHA image
# plt.imshow(captcha_image)

# Load the CAPTCHA image
captcha_image = cv2.imread('captcha_sample.png')
# to_black_white
black = to_black_white(captcha_image)
cv2.imwrite('captcha_black.png', black)

# Display the CAPTCHA image
plt.imshow(black, cmap='gray')
plt.axis('off')
plt.title('CAPTCHA Image')
plt.show()
plt.close()

# Preprocess the CAPTCHA image
characters, preprocessd_img = preprocess_captcha_image('captcha_sample.png', captcha_text, print_img=True)
# Display the segmented characters
plt.figure(figsize=(10, 3))
for i, char in enumerate(characters):
    plt.subplot(1, len(characters), i + 1)
    plt.imshow(char, cmap='gray')
    plt.axis('off')
    plt.title(f'Character {i + 1}')
    # Save the segmented characters
    # cv2.imwrite(f'character_{i + 1}.png', char)
    # print(f'character_{i + 1}.png')
plt.show()
# plt.close()
# Save the preprocessed image
cv2.imwrite('preprocessed_image.png', preprocessd_img)


characters, _ = preprocess_captcha_image('captcha_image.png', captcha_text)
# Display the segmented characters
plt.figure(figsize=(10, 3))
for i, char in enumerate(characters):
    plt.subplot(1, len(characters), i + 1)
    plt.imshow(char, cmap='gray')
    plt.axis('off')
    plt.title(f'Character {i + 1}')
plt.show()
# plt.close()



## (4) Dataset Generator

### (4.1) Single Character Dataset Generator

In [20]:
def single_character_dataset(data_size, img_path, csv_path):
    '''
        Generate a dataset of single character images

        Args:
            data_size: int, number of images to generate
            save_path: str, path to save the dataset
    '''
    # Create a directory to save the dataset
    if not os.path.exists(img_path):
        os.makedirs(img_path)
    else:
        # Clear the directory
        for file in os.listdir(img_path):
            os.remove(os.path.join(img_path, file))

    if not os.path.exists(csv_path):
        os.makedirs(csv_path)
    else:
        # Clear the directory
        for file in os.listdir(csv_path):
            os.remove(os.path.join(csv_path, file))

    # Create a CSV file to save the labels
    with open(f'{csv_path}/labels.csv', 'w') as f:
        f.write('image,label\n')

    # Generate and save the dataset
    for i in tqdm.tqdm(range(data_size)):
        # Generate a random CAPTCHA text
        captcha_text = generate_random_string(1)
        # Generate Captcha image by randomly attributes

        font_sizes = (random.randint(40, 60), random.randint(40, 60), random.randint(40, 60)) # Random font sizes
        space_random = True # Always random space
        dot_width = random.randint(3, 10)
        dot_number = random.randint(20, 50)
        curve_number = random.randint(3, 10)
        save_image_path = f'{img_path}/{i}.png'
        image_name = f'{i}.png'

        captcha_image = generate_captcha_image(captcha_text, save_image_path, font_sizes=font_sizes, space_random=space_random, dot_width=dot_width, dot_number=dot_number, curve_number=curve_number)

        # Append the label and image name to the CSV file
        with open(f'{csv_path}/labels.csv', 'a') as f:
            f.write(f'{image_name},{captcha_text}\n')
            
# Generate a dataset of single character images
# single_character_dataset(100000, 'single_char_images', 'single_char_labels')

### (4.2) Single Character Dataset Preprocessing

In [None]:
def single_character_dataset_preprocess(data_size, img_path, csv_path, preprocessed_path, print_img=False):
    '''
        Preprocess the single character dataset

        Args:
            data_size: int, number of images to preprocess
            img_path: str, path to the image dataset
            csv_path: str, path to the CSV file
    '''
    # Create a directory to save the preprocessed dataset
    if not os.path.exists(preprocessed_path):
        os.makedirs(preprocessed_path)
    else:
        # Clear the directory
        for file in os.listdir(preprocessed_path):
            os.remove(os.path.join(preprocessed_path, file))

    # CSV file to save the preprocessed labels, if the file exists, remove it
    if os.path.exists(f'{csv_path}/preproccessed_labels.csv'):
        os.remove(f'{csv_path}/preproccessed_labels.csv')

    # Create a CSV file to save the preprocessed labels
    with open(f'{csv_path}/preproccessed_labels.csv', 'w') as f:
        f.write('image,label\n')

    # Load the CSV file
    labels = pd.read_csv(f'{csv_path}/labels.csv')

    # Count successful and failed segmentations
    successful_segmentations = 0
    failed_segmentations = 0

    # Preprocess the images
    for i in tqdm.tqdm(range(data_size)):
        # Load the image
        image = cv2.imread(f'{img_path}/{i}.png')
        # Preprocess the image
        characters = preprocess_captcha_image(f'{img_path}/{i}.png', labels['label'][i], print_img=print_img)
        # Save the preprocessed characters
        if len(characters) == 1:
            cv2.imwrite(f'{preprocessed_path}/{i}.png', characters[0])
            # Append the label and image name to the CSV file
            with open(f'{csv_path}/preproccessed_labels.csv', 'a') as f:
                f.write(f'{i}.png,{labels["label"][i]}\n')
            successful_segmentations += 1
        else:
            # print(f'Failed to segment characters for image {i}.png')
            failed_segmentations += 1

    print(f'Successful segmentations: {successful_segmentations}')
    print(f'Failed segmentations: {failed_segmentations}')
    print(f'Total images: {successful_segmentations + failed_segmentations}')
    print(f'Percentage of successful segmentations: {(successful_segmentations / (successful_segmentations + failed_segmentations)) * 100:.2f}%')

# Preprocess the single character dataset
# single_character_dataset_preprocess(100000, 'single_char_images', 'single_char_labels', 'single_char_preprocessed', print_img=False)


### (4.3) Four Character Dataset Generator

In [None]:
def four_character_dataset(data_size, img_path, csv_path):
    '''
        Generate a dataset of four character images

        Args:
            data_size: int, number of images to generate
            save_path: str, path to save the dataset
    '''
    # Create a directory to save the dataset
    if not os.path.exists(img_path):
        os.makedirs(img_path)
    else:
        # Clear the directory
        for file in os.listdir(img_path):
            os.remove(os.path.join(img_path, file))

    if not os.path.exists(csv_path):
        os.makedirs(csv_path)
    else:
        # Clear the directory
        for file in os.listdir(csv_path):
            os.remove(os.path.join(csv_path, file))

    # Create a CSV file to save the labels
    with open(f'{csv_path}/labels.csv', 'w') as f:
        f.write('image,label\n')

    # Generate and save the dataset
    for i in tqdm.tqdm(range(data_size)):
        # Generate a random CAPTCHA text
        captcha_text = generate_random_string(4)
        # Generate Captcha image by randomly attributes

        font_sizes = (random.randint(40, 60), random.randint(40, 60), random.randint(40, 60)) # Random font sizes
        space_random = False # Always random space
        dot_width = random.randint(3, 10)
        dot_number = random.randint(20, 50)
        curve_number = random.randint(3, 5)
        save_image_path = f'{img_path}/{i}.png'
        space = 2 # Fixed space
        image_name = f'{i}.png'

        captcha_image = generate_captcha_image(captcha_text, save_image_path, font_sizes=font_sizes, space_random=space_random, dot_width=dot_width, dot_number=dot_number, curve_number=curve_number, space=space)

        # Append the label and image name to the CSV file
        with open(f'{csv_path}/labels.csv', 'a') as f:
            f.write(f'{image_name},{captcha_text}\n')
            
# Generate a dataset of single character images
# four_character_dataset(10, 'four_char_images', 'four_char_labels')

### (4.4) Four Character Dataset Preprocessing

In [None]:
def four_character_dataset_preprocess(data_size, img_path, csv_path, preprocessed_path, pre_path, print_img=False):
    '''
        Preprocess the single character dataset

        Args:
            data_size: int, number of images to preprocess
            img_path: str, path to the image dataset
            csv_path: str, path to the CSV file
            preprocessed_path: str, path to save the preprocessed images
            pre_path: str, path to save the preprocessed images (without segmentation)
    '''
    # Create a directory to save the preprocessed dataset
    if not os.path.exists(preprocessed_path):
        os.makedirs(preprocessed_path)
    else:
        # Clear the directory
        for file in os.listdir(preprocessed_path):
            os.remove(os.path.join(preprocessed_path, file))

    if not os.path.exists(pre_path):
        os.makedirs(pre_path)
    else:
        # Clear the directory
        for file in os.listdir(pre_path):
            os.remove(os.path.join(pre_path, file))

    # CSV file to save the preprocessed labels, if the file exists, remove it
    if os.path.exists(f'{csv_path}/preproccessed_labels.csv'):
        os.remove(f'{csv_path}/preproccessed_labels.csv')

    # Create a CSV file to save the preprocessed labels
    with open(f'{csv_path}/preproccessed_labels.csv', 'w') as f:
        f.write('image,label\n')

    # Load the CSV file
    labels = pd.read_csv(f'{csv_path}/labels.csv')

    # Count successful and failed segmentations
    successful_segmentations = 0
    failed_segmentations = 0

    # Preprocess the images
    for i in tqdm.tqdm(range(data_size)):
        # Load the image
        image = cv2.imread(f'{img_path}/{i}.png')
        # Preprocess the image
        characters, pr = preprocess_captcha_image(f'{img_path}/{i}.png', labels['label'][i], print_img=print_img)
        print(f'characters: {len(characters)}')

        # Save the preprocessed image
        cv2.imwrite(f'{pre_path}/{i}.png', pr)

        # Save the preprocessed characters
        if len(characters) == 4:
            # Display and save the preprocessed image
            for j, char in enumerate(characters):
                cv2.imwrite(f'{preprocessed_path}/{i}_{j}.png', char)
                if print_img:
                    plt.imshow(char, cmap='gray')
                    plt.axis('off')
                    plt.title(f'Character {j + 1}')
                    plt.show()

                # Append the label and image name to the CSV file
                # with open(f'{csv_path}/preproccessed_labels.csv', 'a') as f:
                #     f.write(f'{i}.png,{labels["label"][i]}\n')
            successful_segmentations += 1
        else:
            # print(f'Failed to segment characters for image {i}.png')
            failed_segmentations += 1

    print(f'Successful segmentations: {successful_segmentations}')
    print(f'Failed segmentations: {failed_segmentations}')
    print(f'Total images: {successful_segmentations + failed_segmentations}')
    print(f'Percentage of successful segmentations: {(successful_segmentations / (successful_segmentations + failed_segmentations)) * 100:.2f}%')

# Preprocess the single character dataset
# four_character_dataset_preprocess(10, 'four_char_images', 'four_char_labels', 'four_char_preprocessed', 'four_char_pre_con',print_img=False)


### (4.5) Single Character Dataset Generate + Preprocess

In [None]:
def single_char_combine(data_size, original_img_path, preprocessed_img_path, label_path):
    '''
        Generate a dataset of single character images and preprocess them

        Args:
            data_size: int, number of images to combine
            original_img_path: str, path to store the original generated images
            preprocessed_img_path: str, path to store the preprocessed images
            label_path: str, path to store the labels
    '''
    
    # Create a directory to save the dataset
    if not os.path.exists(original_img_path):
        os.makedirs(original_img_path)
    else:
        # Clear the directory
        for file in os.listdir(original_img_path):
            os.remove(os.path.join(original_img_path, file))

    if not os.path.exists(preprocessed_img_path):
        os.makedirs(preprocessed_img_path)
    else:
        # Clear the directory
        for file in os.listdir(preprocessed_img_path):
            os.remove(os.path.join(preprocessed_img_path, file))

    if not os.path.exists(label_path):
        os.makedirs(label_path)
    else:
        # Clear the directory
        for file in os.listdir(label_path):
            os.remove(os.path.join(label_path, file))

    # Create a CSV file to save the labels
    with open(f'{label_path}/labels.csv', 'w') as f:
        f.write('image,label\n')

    # Tqdm progress bar
    progress_bar = tqdm.tqdm(total=data_size)
    successful_segmentations = 0
    # Generate and save the dataset
    while successful_segmentations < data_size:
        # Generate a random CAPTCHA text
        captcha_text = generate_random_string(1)
        # Generate Captcha image by randomly attributes

        font_sizes = (random.randint(40, 60), random.randint(40, 60), random.randint(40, 60)) # Random font sizes
        space_random = False # Always random space
        dot_width = random.randint(3, 10)
        dot_number = random.randint(20, 50)
        curve_number = random.randint(3, 5)
        save_image_path = f'{original_img_path}/{successful_segmentations}.png'
        image_name = f'{successful_segmentations}.png'

        # The image is saved in the original image path
        _ = generate_captcha_image(captcha_text, save_image_path, font_sizes=font_sizes, space_random=space_random, dot_width=dot_width, dot_number=dot_number, curve_number=curve_number)

        # Append the label and image name to the CSV file
        with open(f'{label_path}/labels.csv', 'a') as f:
            f.write(f'{image_name},{captcha_text}\n')

        # Preprocess the image
        characters, _ = preprocess_captcha_image(f'{original_img_path}/{successful_segmentations}.png', captcha_text, print_img=False)
        # print(f'characters: {len(characters)}')
        # Save the preprocessed characters
        if len(characters) == len(captcha_text):
            cv2.imwrite(f'{preprocessed_img_path}/{successful_segmentations}.png', characters[0])
            successful_segmentations += 1
            progress_bar.update(1)
    progress_bar.close()
        
# Generate a dataset of single character images
# single_char_combine(100000, 'single_char_images', 'single_char_preprocessed', 'single_char_labels')

### (4.6) Four Character Dataset Generate + Preprocess

#### (4.6.1) Yolo Bounding Creator

In [15]:
class YoloBoundingCreator:
    def __init__(self):
        self._weights = "yolov9_2\\runs\\train\\exp\\weights\\best.pt"  # 模型權重文件
        self._imgsz = (640, 640)
        self._device = select_device(device='cpu')
        self._dnn = False
        self._data = 'data\datasets.yaml'
        self._half = False
        self._model = DetectMultiBackend(self._weights, device=self._device,  dnn=self._dnn, data=self._data, fp16=self._half)
        self._stride, self._names, self._pt = self._model.stride, self._model.names, self._model.pt
        self._imgsz = check_img_size(self._imgsz, s=self._stride)

    def bouding_generator(self, source_image):
        im0 = source_image
        im = letterbox(im0, self._imgsz, stride=self._stride, auto=self._pt)[0]  # padded resize
        im = im.transpose((2, 0, 1))[::-1]
        im = np.ascontiguousarray(im)

        img = torch.from_numpy(im).to(self._device)
        img = img.half() if self._model.fp16 else img.float()
        img /= 255.0
        if len(img.shape) == 3:
            img = img[None]

        pred = self._model(img)
        pred = pred[0][1]
        pred = non_max_suppression(pred, 0.1, 0.45, classes=None, agnostic=False)

        s = ''
        for i, det in enumerate(pred):  # per image
            s += '%gx%g ' % img.shape[2:]
            annotator = Annotator(im0, line_width=1, example=str(self._names))
            if len(det):
                det[:, :4] = scale_boxes(img.shape[2:], det[:, :4], im0.shape).round()
                for c in det[:, 5].unique():
                    n = (det[:, 5] == c).sum()
                    s += f"{n} {self._names[int(c)]}{'s' * (n > 1)}, "
                hide_labels = True
                hide_conf = True
                for *xyxy, conf, cls in reversed(det):
                    c = int(cls)
                    label = None if hide_labels else (self._names[c] if hide_conf else f'{self._names[c]} {conf:.2f}')
                    annotator.box_label(xyxy, label, color=colors(c, True))

        im0 = annotator.result()
        bouding_box = det[:, :4].cpu().numpy()

        # sort the bounding box by x1
        bouding_box = bouding_box[bouding_box[:, 0].argsort()]

        return bouding_box

def draw_bounding_box(image, bounding_box):
    
    for i in range(len(bounding_box)):
        x1, y1, x2, y2 = bounding_box[i]
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 1) # (0, 255, 0) is the color of the bounding box, 2 is the thickness of the bounding box

    return image

yolo_bounding_creator = YoloBoundingCreator()
                
# Testing
source_path = "preprocessed_image.png" # 測試影像路徑
source_image = cv2.imread(source_path)
bouding_box = yolo_bounding_creator.bouding_generator(source_image)
print(bouding_box)
image_with_bounding_box = draw_bounding_box(source_image, bouding_box)
# Save the image with bounding box
cv2.imwrite("image_with_bounding_box.jpg", image_with_bounding_box)



YOLO  2024-12-1 Python-3.10.15 torch-2.5.1+cpu CPU

Fusing layers... 
yolov9-c summary: 604 layers, 50698278 parameters, 0 gradients, 236.6 GFLOPs


[[         11          18          41          78]
 [         49          16          75          77]
 [        106          15         135          81]
 [        187          17         210          77]]


True

In [16]:
def get_char_region(image, bounding_box):
    """
    Get the region of the character from the bounding box.

    Args:
        image: np.array, input image.
        bounding_box: list, bounding box coordinates.

    Returns:
        np.array, region of the character.
    """
    x1, y1, x2, y2 = bounding_box
    x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
    # char_region = image[y1:y2, x1:x2] 
    # Keep the original height of the character
    char_region = image[:, x1:x2]
    return char_region

In [17]:
def four_char_combine(data_size, original_img_path, preprocessed_img_path, segment_img_path, label_path, yolo_bounder):
    '''
        Generate a dataset of four character images and preprocess them

        Args:
            data_size: int, number of images to combine
            original_img_path: str, path to store the original generated images
            preprocessed_img_path: str, path to store the preprocessed images (not segmented)
            segment_img_path: str, path to store the preprocessed images (segmented)
            label_path: str, path to store the labels
            yolo_bounder: YoloBoundingCreator, YOLO bounding box creator
    '''

    # Create a directory to save the dataset
    if not os.path.exists(original_img_path):
        os.makedirs(original_img_path)
    else:
        # Clear the directory
        for file in os.listdir(original_img_path):
            os.remove(os.path.join(original_img_path, file))

    if not os.path.exists(preprocessed_img_path):
        os.makedirs(preprocessed_img_path)
    else:
        # Clear the directory
        for file in os.listdir(preprocessed_img_path):
            os.remove(os.path.join(preprocessed_img_path, file))\
            
    if not os.path.exists(segment_img_path):
        os.makedirs(segment_img_path)
    else:
        # Clear the directory
        for file in os.listdir(segment_img_path):
            os.remove(os.path.join(segment_img_path, file))

    if not os.path.exists(label_path):
        os.makedirs(label_path)
    else:
        # Clear the directory
        for file in os.listdir(label_path):
            os.remove(os.path.join(label_path, file))

    # Create a CSV file to save the labels
    with open(f'{label_path}/labels.csv', 'w') as f:
        f.write('image,label\n')
        
    # Create a CSV file to save the labels for the segmented characters
    with open(f'{label_path}/segmented_labels.csv', 'w') as f:
        f.write('image,label\n')

    # Tqdm progress bar
    progress_bar = tqdm.tqdm(total=data_size)
    successful_segmentations = 0
    total_image = 0

    # Generate and save the dataset
    while successful_segmentations < data_size:
        # Generate a random CAPTCHA text
        captcha_text = generate_random_string(4)
        # Generate Captcha image by randomly attributes

        font_sizes = (random.randint(40, 60), random.randint(40, 60), random.randint(40, 60)) # Random font sizes
        space_random = True # Always random space
        dot_width = random.randint(3, 10)
        dot_number = random.randint(20, 50)
        curve_number = random.randint(3, 5)
        save_image_path = f'{original_img_path}/{successful_segmentations}.png'
        space = 2 # Fixed space
        image_name = f'{successful_segmentations}.png'

        # The image is saved in the original image path
        _ = generate_captcha_image(captcha_text, save_image_path, font_sizes=font_sizes, space_random=space_random, dot_width=dot_width, dot_number=dot_number, curve_number=curve_number, space=space)

        # Append the label and image name to the CSV file
        with open(f'{label_path}/labels.csv', 'a') as f:
            f.write(f'{image_name},{captcha_text}\n')


        # Preprocess the image
        characters, preprocessd_img = preprocess_captcha_image(f'{original_img_path}/{successful_segmentations}.png', captcha_text, print_img=False)

        # Save the image without segmentation
        cv2.imwrite(f'{preprocessed_img_path}/{successful_segmentations}.png', preprocessd_img) # Save the preprocessed image i

        # # Save the preprocessed characters
        # if len(characters) == len(captcha_text):
        #     for j, char in enumerate(characters):
        #         # Save the segmented characters
        #         cv2.imwrite(f'{segment_img_path}/{successful_segmentations}_{j}.png', char)
        #     sg += 1

        # Yolo bounding box
        preprocessed_image = cv2.cvtColor(preprocessd_img, cv2.COLOR_GRAY2BGR)
        bounding_boxes = yolo_bounder.bouding_generator(preprocessed_image)

        if len (bounding_boxes) >= 4:
            # Keep only the 4 bounding boxes with the highest confidence
            if len(bounding_boxes) > 4:
                bounding_boxes = bounding_boxes[:4]

            with open(f'{label_path}/segmented_labels.csv', 'a') as f:
                for i, bounding_box in enumerate(bounding_boxes):
                    char_region = get_char_region(preprocessd_img, bounding_box)
                    # Append the label and image name to the CSV file
                    cv2.imwrite(f'{segment_img_path}/{successful_segmentations}_{i}.png', char_region)
                    f.write(f'{successful_segmentations}_{i}.png,{captcha_text[i]}\n')

            successful_segmentations += 1
            progress_bar.update(1)
        total_image += 1
        
    progress_bar.close()
    print(f'Total successful segmentations: {successful_segmentations}')
    print(f'Total images: {total_image}')
    print(f'Percentage of successful segmentations: {(successful_segmentations / (total_image)) * 100:.2f}%')

# Generate a dataset of four character images
# four_char_combine(3000, 'yolo/original', 'yolo/preprocess', 'yolo/segment', 'yolo/label')
# four_char_combine(1, 'test_data_generator/original', 'test_data_generator/preprocess', 'test_data_generator/segment', 'test_data_generator/label', yolo_bounding_creator)
four_char_combine(25000, 'dataset/overlap/train/original', 'dataset/overlap/train/preprocess', 'dataset/overlap/train/segment', 'dataset/overlap/train/labels', yolo_bounding_creator)

100%|██████████| 10/10 [00:05<00:00,  1.86it/s]

Total successful segmentations: 10
Total images: 11
Percentage of successful segmentations: 90.91%



