In [1]:
# -*- coding: utf-8 -*- 

#------------------------------------ Imports ----------------------------------#

# Import python imaging libs
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
from PIL import ImageFilter

# Import noise creating library
from noisify.recipes import human_error, machine_error

# Import operating system lib and time
import os
import time

# Import random generator
from random import randint

# import csv reader and pandas
import csv
import pandas as pd

In [2]:
#------------------------------------ Cleanup ----------------------------------#	
def Cleanup():
    # Delete ds_store file
    if os.path.isfile(font_dir + '.DS_Store'):
        os.unlink(font_dir + '.DS_Store')
	
    # Delete all files from output directory
    for file in os.listdir(out_dir):
        file_path = os.path.join(out_dir, file)
        if os.path.isfile(file_path):
            os.unlink(file_path)
    return

In [3]:
#---------------------------------- Input and Output ---------------------------#

# Directory containing fonts
font_dir = './fonts-test/'

# Output
out_dir = './chars-test/'

In [4]:
dataset = []
with open('chars.csv', newline='', encoding='utf-8') as csvfile:
    data = csv.reader(csvfile, delimiter=',', quotechar='"')
    for row in data:
        dataset.append(row)

chars_df = pd.DataFrame(dataset[1:], columns=['index', 'char', 'name'])
chars_df = chars_df.set_index(chars_df.columns[0])

In [31]:
def duration_text(secs):
    secs = round(secs, 2)
    
    mins = secs // 60
    hours = mins // 60
    
    remain_mins = mins - hours * 60
    remain_secs = secs - mins * 60
    
    time_str = ''
    if hours > 0:
        time_str = str(hours) + ":" + str(remain_mins) + ":" + str(remain_secs) + "s"
    elif mins > 0:
        time_str = str(mins) + ":" + str(remain_secs) + "s"
    else:
        time_str = str(secs) + "s"
        
    return time_str

In [32]:
#------------------------------ Generate Characters ----------------------------#
def GenerateCharacters():
    # Counter
    k = 1
    
    files_list = []
    durations = []
    
    # Process the font files
    for dirname, dirnames, filenames in os.walk(font_dir):
        size = len(filenames)
        item = 1
        # For each font do
        for filename in filenames:
            print(item, "/", size, ":", filename, "... ", end="")
            start_time = time.time()
            # Get font full file path
            font_resource_file = os.path.join(dirname, filename)
            
            # For each character do
            for index, row in chars_df.iterrows():
                # For each font size do
                for font_size in font_sizes:
                    if font_size > 0:
                        # For each background color do
                        for background_color in background_colors:
                            # For each rotation option do
                            for rotation in rotations:
                                # For each type of quality do
                                for quality in qualities:
                                    # Create row dictionary
                                    file_row = dict()

                                    # Set char id
                                    file_row['index'] = index
                                    file_row['char'] = row['char']
                                    file_row['char_name'] = row['name']
                                    file_row['font_file'] = filename
                                    file_row['font_size'] = font_size
                                    file_row['rotation'] = rotation
                                    file_row['quality'] = quality
                                    file_row['background_color'] = background_color

                                    # Convert the character into unicode
                                    character = row['char']

                                    # Create character image : 
                                    # Grayscale, image size, background color
                                    char_image = Image.new('L', (image_size, image_size),\
                                    background_color)

                                    # Draw character image
                                    draw = ImageDraw.Draw(char_image)

                                    # Specify font : Resource file, font size
                                    font = ImageFont.truetype(font_resource_file, font_size, encoding='unic')

                                    # Get character width and height
                                    (font_width, font_height) = font.getsize(character)

                                    # Calculate x position
                                    x = (image_size - font_width)/2

                                    # Calculate y position
                                    y = (image_size - font_height)/2

                                    # Draw text : Position, String, 
                                    # Options = Fill color, Font
                                    draw.text((x, y), character, (245-background_color) + \
                                    randint(0, 10) , font=font)

                                    # Set Character Rotation
                                    char_image = char_image.rotate(angle=rotation, fillcolor=background_color)

                                    # If quality is set to noisy, add noise to the image
                                    if quality == "noisy":
                                        combined_noise = machine_error(errors[0]) + human_error(errors[1])
                                        char_image = list(combined_noise(char_image))[0]

                                    # Final file name    				
                                    file_name = out_dir + str(k) + '_' + \
                                    filename + '_fs_' + \
                                    str(font_size) + '_bc_' + \
                                    str(background_color) + '_rot_' + \
                                    str(rotation) + '_' + \
                                    str(quality) + '.' + \
                                    row['name'] + '.png'

                                    file_row['file_name'] = file_name

                                    # Save image
                                    char_image.save(file_name)

                                    # Add item to files list
                                    files_list.append(file_row)

                                    # Increment counter
                                    k = k + 1

            end_time = time.time()
            duration = end_time - start_time
            durations.append(duration)
            avg_duration = sum(durations) / len(durations)
            eta = (size - item) * avg_duration
            eta_text = duration_text(eta)
            print(round(duration,2), " s. ETA: ", eta_text, ".", sep="")
            
            item += 1
            
    return files_list

In [25]:
#------------------------------------- Colors ----------------------------------#

# Background color
white_colors = (215, 225, 235, 245)
black_colors = (0, 10, 20, 30)
gray_colors = (135, 145, 155)

background_colors = white_colors + black_colors + gray_colors

#-------------------------------------- Sizes ----------------------------------#

# Character sizes
font_sizes = (18, 24)
    	
# Image size
image_size = 32

#-------------------------------------- Augmentation Params --------------------#

# Blur factor
blur = 1

# Writing error generation
qualities = ["clean", "noisy"]
errors = (20, 45) # 20 machine error; # 45 human error

# Rotations
rotations = (-45, -5, 0, 5, 45)

In [None]:
#-------------------------------------- Main -----------------------------------#

# Do cleanup
Cleanup()

# Generate characters
files_data = GenerateCharacters()

1 / 1 : Bitter-Regular.otf ... 

In [8]:
output_df = pd.DataFrame(files_data)

In [9]:
output_df

Unnamed: 0,index,char,char_name,font_file,font_size,background_color,file_name
0,0,0,0,AbrilFatface-Regular.otf,18,215,./chars/1_AbrilFatface-Regular.otf_fs_18_bc_21...
1,0,0,0,AbrilFatface-Regular.otf,18,225,./chars/2_AbrilFatface-Regular.otf_fs_18_bc_22...
2,0,0,0,AbrilFatface-Regular.otf,18,235,./chars/3_AbrilFatface-Regular.otf_fs_18_bc_23...
3,0,0,0,AbrilFatface-Regular.otf,18,245,./chars/4_AbrilFatface-Regular.otf_fs_18_bc_24...
4,0,0,0,AbrilFatface-Regular.otf,18,0,./chars/5_AbrilFatface-Regular.otf_fs_18_bc_0....
...,...,...,...,...,...,...,...
183255,97,_,sym_underscore,Znikomit.otf,24,20,./chars/183256_Znikomit.otf_fs_24_bc_20.sym_un...
183256,97,_,sym_underscore,Znikomit.otf,24,30,./chars/183257_Znikomit.otf_fs_24_bc_30.sym_un...
183257,97,_,sym_underscore,Znikomit.otf,24,135,./chars/183258_Znikomit.otf_fs_24_bc_135.sym_u...
183258,97,_,sym_underscore,Znikomit.otf,24,145,./chars/183259_Znikomit.otf_fs_24_bc_145.sym_u...


In [10]:
output_df.to_csv('files_mapping.csv', index=False)

In [12]:
len(output_df['font_file'].unique())

85