### Importing Libs

In [1]:
import cv2
import glob
import numpy as np
import random
import re
import math
import string
import wand
from PIL import ImageFont, ImageDraw, Image
from wand.image import Image as wimg

### Aux Functions

In [2]:
# Creating RGB base image (numpy array)
def create_blank(width, height, rgb_color = (0, 0, 0)):
        
    # Black image
    image = np.zeros((height, width, 3), np.uint8)

    # Inverting colors, beacouse o OpenCV uses BGR and not RGB
    color = tuple(reversed(rgb_color))
    
    # Coloring image background
    rangecor = np.linspace(190, 255, width).astype(int)
    for i, cor in enumerate(rangecor):
        image[:, i, :] = cor

    return image

In [3]:
# Captcha text generation
def generate_text():
    
    # All possible characters for the captcha
    characteres = '0123456789'
    
    return [characteres[random.randint(0, len(characteres)-1)] for _ in range(0, 5)]

In [4]:
# Creating short noise lines from captcha for the first character
def create_short_line():
    
    # y axis point for starting the line
    yp1 = random.randint(0, 40)
    
    # Max angle for secound point of the line in y axis
    if yp1 > 10:
        if yp1 < 30:
            ang1 = random.randint(-10, 10)
        else:
            ang1 = random.randint(-10, 0)
    else:
        ang1 = random.randint(0, 10)
        
    # Secound point of the line in y axis
    yp2 = yp1 + ang1
    
    # Max angle for third point of the line in y axis
    if yp2 > 10:
        if yp2 < 30:
            ang2 = random.randint(-10, 10)
        else:
            ang2 = random.randint(-10, 0)
    else:
        ang2 = random.randint(0, 10)  
        
    # Third point of the line in y axis
    yp3 = yp2 + ang2
    
    # Max angle for fourth point of the line in y axis
    if yp3 > 10:
        if yp3 < 30:
            ang3 = random.randint(-10, 10)
        else:
            ang3 = random.randint(-10, 0)
    else:
        ang3 = random.randint(0, 10)
        
    # Fourth point of the line in y axis. End of noise line
    yp4 = yp3 + ang3
    
    # x axis points for the image
    line_x1, line_y1 = 8, yp1
    line_x2, line_y2 = 12, yp2
    line_x3, line_y3 = 18, yp3
    line_x4, line_y4 = 25, yp4
    
    # Point matrix for the line cration
    line_pts = np.array([[line_x1, line_y1], 
                         [line_x2, line_y2], 
                         [line_x3, line_y3], 
                         [line_x4, line_y4]], 
                        np.int32)
    
    return line_pts

In [5]:
# Creating big noise line from captcha passing by all the charactes
def create_big_line():
    
    # y axis point where the line will be created at
    yp1 = random.randint(0, 40)
    
    # Max angle for the secound point in y axis
    if yp1 > 10:
        if yp1 < 30:
            ang1 = random.randint(-10, 10)
        else:
            ang1 = random.randint(-10, 0)
    else:
        ang1 = random.randint(0, 10)
            
    # Secound y axis point        
    yp2 = yp1 + ang1
    
    # Max angle for the third poing in y axis
    if yp2 > 10:
        if yp2 < 30:
            ang2 = random.randint(-10, 10)
        else:
            ang2 = random.randint(-10, 0)
    else:
        ang2 = random.randint(0, 10)    
    
    # Third point in y axis
    yp3 = yp2 + ang2
    
    # Max angle for the fourth point in y axis
    if yp3 > 10:
        if yp3 < 30:
            ang3 = random.randint(-10, 10)
        else:
            ang3 = random.randint(-10, 0)
    else:
        ang3 = random.randint(0, 10)
    
    # Fourth point in y axis
    yp4 = yp3 + ang3
    
    # Max angle for the fifth point in y axis
    if yp4 > 10:
        if yp4 < 30:
            ang4 = random.randint(-10, 10)
        else:
            ang4 = random.randint(-10, 0)
    else:
        ang4 = random.randint(0, 10)  
        
    # Fifth point in y axis    
    yp5 = yp4 + ang4
    
    # Max angle for the sixth point in y axis
    if yp5 > 10:
        if yp5 < 30:
            ang5 = random.randint(-10, 10)
        else:
            ang5 = random.randint(-10, 0)
    else:
        ang5 = random.randint(0, 10)
    
    # Sixth point in y axis
    yp6 = yp5 + ang5
    
    # x and y axis points for the line creation
    line_x1, line_y1 = 8, yp1
    line_x2, line_y2 = 15, yp2
    line_x3, line_y3 = 25, yp3
    line_x4, line_y4 = 50, yp4
    line_x5, line_y5 = 75, yp5
    line_x6, line_y6 = 100, yp6
    
    # Points Matrix for the line creation
    line_pts = np.array([[line_x1, line_y1], 
                         [line_x2, line_y2], 
                         [line_x3, line_y3], 
                         [line_x4, line_y4], 
                         [line_x5, line_y5], 
                         [line_x6, line_y6]], 
                        np.int32)
    
    return line_pts

### Captcha Generation Function

In [6]:
# Generating captcha synthetic image
def captcha_generator():

    # width and height parameters in pixels
    WIDTH, HEIGHT = 110, 38
    # thickness parameter for the noise lines in pixels
    THICKNESS = 2
    
    # Color codes for the captcha
    RED = tuple(reversed((163, 33, 71)))
    BLACK = tuple((0, 0, 0))
    WHITE = tuple((255, 255, 255))
    
    # Text generation for captcha
    generated_text = ''.join(generate_text())
    
    # Captcha background
    image = create_blank(WIDTH, HEIGHT, rgb_color = WHITE)
    
    # Transforming into PIL Image
    img_text = Image.fromarray(image)
    
    # Modifying to ImageDraw for the text input
    draw = ImageDraw.Draw(img_text)
    
    # Text font to be used
    font = ImageFont.truetype('Rasa-Bold.ttf', 42)
    
    # Drawing text on image
    draw.text((8, -8), generated_text, fill = RED, font = font)
    
    # Saving image to transform into Wand
    img_text.save('synthetic_captchas/{}.jpeg'.format(generated_text))
    
    # Opening image as Wand to apply distortion and saving again
    # This can modify the original size
    with wimg(filename = 'synthetic_captchas/{}.jpeg'.format(generated_text)) as img:
        img.wave(amplitude = img.height / 32, 
                 wave_length = img.width / 4)
        img.save(filename = 'synthetic_captchas/{}.jpeg'.format(generated_text))
        
    # Opening image as PIL
    img_load = Image.open('synthetic_captchas/{}.jpeg'.format(generated_text))
            
    # Transforming image to use with OpenCV (numpy array)
    image_with_text = cv2.cvtColor(np.array(img_load), cv2.COLOR_RGB2BGR)
    
    # Create short noise line
    line1_pts = create_short_line()
    
    # Create big noise line
    line2_pts = create_big_line()
    
    # Define that the lines are not a closed polygon
    isClosed = False
    
    # Applying noise lines on image
    cv2.polylines(image_with_text, [line1_pts], isClosed, BLACK, THICKNESS)
    cv2.polylines(image_with_text, [line2_pts], isClosed, BLACK, THICKNESS)
    
    # Bluring the image
    blurred_image = cv2.blur(image_with_text, (3, 3))
    
    # Transforming to PIL again for exporting
    img_pil = Image.fromarray(blurred_image)
    
    # Saving image
    img_pil.save('synthetic_captchas/{}.jpeg'.format(generated_text))

In [7]:
# Creating 100k synthetic images
for i in range(100000):
    captcha_generator()