In [8]:
! pip freeze > requirements.txt

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import os
from PIL import Image

In [6]:
def generate_by_merge(img_dir, save_dir, num, size=4, patch_width=224):
    """
    size*size 크기의 grid CAPTCHA 이미지를 여러 이미지를 붙여서 만든다.
    img_dir: 캡챠 생성에 사용할 이미지들이 있는 경로. 사용할 이미지는 폴더별로 분류되어 있어야 한다.
    save_dir: 생성된 CAPTCHA를 저장할 경로
    num: 만든 CAPTCHA의 번호
    size: 그리드 한 줄에 사용할 이미지 개수
    patch_width: CAPTCAH 셀 하나당 width
    """
    # directory preprocessing
    if not (img_dir.endswith("/")):
            img_dir = img_dir + "/"
    if not (save_dir.endswith("/")):
            save_dir = save_dir + "/"
            
    # check if directories exsist else make one
    if not os.path.isdir(f"{save_dir}merge"):
        os.makedirs(f"{save_dir}merge")
    if not os.path.isdir(f"{save_dir}merge/ans"):
        os.makedirs(f"{save_dir}merge/ans")
    
    # read dir
    folders = os.listdir(img_dir)
    if (len(folders)) == 0:
        print("No sub directory found in given directory: Locate folders with images")
        return
    # choose a folder
    candidates = np.random.choice(folders, size*size, replace=True)
    
    # compute total size of CAPTCHA image
    length = 20 + 4*(size-1) + patch_width*size
    
    # generate a new image
    captcha = Image.new('RGB', (length, length), (255,255,255))
    ans = []
    for row in range(size):
        row_ans = []
        for col in range(size):
            # read folder to randomly select one photo from there
            folder_name = candidates[row*size+col]
            next_dir = f"{img_dir}{folder_name}/"
            row_ans.append(folder_name)
            # read image folder and select random image
            imgs = os.listdir(next_dir)
            img = np.random.choice(imgs, 1)
            # open randomly selected photo
            patch = Image.open(f"{next_dir}{img[0]}")
            patch = patch.resize((patch_width, patch_width))
            # paste patch into new image
            captcha.paste(patch, (10+4*(col)+col*patch_width,10+4*(row)+row*patch_width))
            patch.close()
        ans.append(row_ans)
    
    # save CAPTCHA answer to text file
    with open(f"{save_dir}merge/ans/reCAPTCHA_merge_{num}.txt", "w") as f:
        for row in ans:
            line = ""
            for item in row:
                line = line + item + ","
            line = line + "\n"
            f.write(line)
    
    # save image
    try:
        captcha.save(f"{save_dir}merge/reCAPTCHA_merge_{num}.jpg", "JPEG")
    except OSError:
        captcha = captcha.convert("RGB")
        captcha.save(f"{save_dir}merge/reCAPTCHA_merge_{num}.jpg", "JPEG")
    #captcha.show()

In [4]:
def generate_by_divide(img_dir, save_dir, size=4, width=928):
    """
    size*size 크기의 grid CAPTCHA 이미지를 하나의 이미지를 구역으로 나눠서 만든다.
    img_dir: 캡챠 생성에 사용할 이미지들이 있는 경로. 사용할 이미지는 폴더별로 분류되어 있어야 한다.
    save_dir: 생성된 CAPTCHA를 저장할 경로
    size: 그리드 한 줄에 사용할 이미지 개수
    patch_width: CAPTCAH 셀 하나당 width
    """
    # not to make to small image
    if (width < 13):
        print("Width must be larger than 13px")
    # preprocessing
    if not (save_dir.endswith("/")):
            save_dir = save_dir + "/"
    if not (img_dir.endswith("/")):
            img_dir = img_dir + "/"
            
    # check if directory exsists
    if not os.path.isdir(f"{save_dir}divide"):
        os.makedirs(f"{save_dir}divide")
    
    # read dir
    dir_files = os.listdir(img_dir)
    if (len(dir_files) == 0):
        print("No photoes found")
        return
    
    for num, img in enumerate(dir_files):
        captcha = Image.open(img_dir+img)
        captcha = captcha.resize((928,928))
        px = captcha.load()
        # Divide images into patches
        for y in range(0,10):
            for x in range(0,width):
                px[y, x] = (255,255,255)
        for y in range(width-10,width):
            for x in range(0,width):
                px[y, x] = (255,255,255)
        for x in range(0,10):
            for y in range(0,width):
                px[y, x] = (255,255,255)
        for x in range(width-10,width):
            for y in range(0,width):
                px[y, x] = (255,255,255)
        
        # Drawing white border line
        for i in range(size):
            for y in range(10+4*(i-1)+224*i,10+4*i+224*i):
                for x in range(0,928):
                    px[y, x] = (255,255,255)
        for j in range(size):
            for x in range(10+4*(j-1)+224*j,10+4*j+224*j):
                for y in range(0,928):
                    px[y, x] = (255,255,255)
        
        # save image
        try:
            captcha.save(f"{save_dir}divide/reCAPTCHA_divide_{num+1}.jpg", "JPEG")
        except OSError:
            captcha = captcha.convert("RGB")
            captcha.save(f"{save_dir}divide/reCAPTCHA_divide_{num+1}.jpg", "JPEG")
        captcha.close()

In [5]:
OUTPUT_DIR = "../datasets/reCAPTCHA"

In [7]:
for num in range(200):
    generate_by_merge("../datasets/test/", OUTPUT_DIR, num+1, size=3)



In [13]:
generate_by_divide("../datasets/town street/", OUTPUT_DIR)