# 폰트를 활용하여 데이터 생성 코드

## 1) 데이터 생성을 위한 함수

In [1]:
#-*-coding:utf-8-*-

import argparse
import sys
import glob
import numpy as np
import io, os
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
import collections

import random
import matplotlib.pyplot as plt
from matplotlib import font_manager
from tqdm import tqdm

In [2]:
SRC_PATH = '../get_data/fonts/source/'
TRG_PATH = '../get_data/fonts/target/'
OUTPUT_PATH = '../get_data/dataset-11172/'

In [3]:
## Image.new("L"-> 흑백모드, size, color-> 채널개수)


def draw_single_char(ch, font, canvas_size):
    image = Image.new('L', (canvas_size, canvas_size), color=255)   ## 이미지 생성
    drawing = ImageDraw.Draw(image)         ## 이미지 그리기 함수
    w, h = drawing.textsize(ch, font=font)  ## 문자열크기 반환 함수(측정할 텍스트, ImageFont 인스턴스)
    drawing.text(
        ((canvas_size-w)/2, (canvas_size-h)/2),
        ch,
        fill=(0),
        font=font
    )
    flag = np.sum(np.array(image))
    
    # 해당 font에 글자가 없으면 return None
    if flag == 255 * 128 * 128:
        return None
    
    return image

In [4]:
### 예제 데이터 학습용

def draw_example(ch, src_font, dst_font, canvas_size):
    dst_img = draw_single_char(ch, dst_font, canvas_size)
    
    # 해당 font에 글자가 없으면 return None
    if not dst_img:
        return 1
    
    src_img = draw_single_char(ch, src_font, canvas_size)
    example_img = Image.new("RGB", (canvas_size * 2, canvas_size), (255, 255, 255)).convert('L')
    example_img.paste(dst_img, (0, 0))
    example_img.paste(src_img, (canvas_size, 0))   
    return example_img


In [5]:
### 손글씨 학습용

def draw_handwriting(ch, src_font, canvas_size, dst_folder, label, count):
    dst_path = dst_folder + "%d_%04d" % (label, count) + ".png"
    dst_img = Image.open(dst_path)
    src_img = draw_single_char(ch, src_font, canvas_size)
    example_img = Image.new("RGB", (canvas_size * 2, canvas_size), (255, 255, 255)).convert('L')
    example_img.paste(dst_img, (0, 0))
    example_img.paste(src_img, (canvas_size, 0))
    return example_img

## 2) 데이터 생성작업

In [6]:
co = "0 1 2 3 4 5 6 7 8 9 A B C D E F"
start = "AC00"
end = "D7A3"

co = co.split(" ")

Hangul_Syllables = [a+b+c+d 
                    for a in co 
                    for b in co 
                    for c in co 
                    for d in co]

Hangul_Syllables = np.array(Hangul_Syllables)

s = np.where(start == Hangul_Syllables)[0][0]
e = np.where(end == Hangul_Syllables)[0][0]

Hangul_Syllables = Hangul_Syllables[s : e + 1]

In [7]:
for i in range(1,35):
    ran_fontnum = random.randrange(1,54)
    
    for it in range(1,3001):
        ran_char = random.randrange(1,11172)
        unicodeChars = chr(int(Hangul_Syllables[ran_char], 16))
        
        if ran_fontnum < 10:
            ran_font = '0' + str(ran_fontnum)
        else:
            ran_font = str(ran_fontnum)
        
        source_font = ImageFont.truetype(font = SRC_PATH + "source_font.ttf", size = 128)
        trg_font = ImageFont.truetype(font = TRG_PATH + ran_font +".ttf", size = 128)
        
        data = draw_example(unicodeChars,source_font, trg_font, 128)
        
        if data == 1:
            continue
        data.save(OUTPUT_PATH + "%d_%04d" % (i, it) + ".png")