In [1]:
import os
import numpy as np
from scipy.ndimage import gaussian_filter
import matplotlib.pyplot as plt
from PIL import Image, ImageFont, ImageDraw

## Generate font data

In [2]:
def draw_text(text, font, render_size, offset):
  img = Image.new("L", render_size, 255)
  draw = ImageDraw.Draw(img)
  draw.text(offset, text, font=font)
  return np.asarray(img)

def gen_text(text_len, rng=np.random.RandomState(0)):
  l = "abcdefghijklmnopqrstuvwxyz"
  L = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  n = "0123456789"
  text = []
  while len(text) < text_len:
    r = rng.uniform()
    if r<0.2:
      if rng.uniform()<0.1:
        text.append("!")
      else:
        text.append(".")
      text.append(" ")
      text.append(L[rng.randint(0, 26)])
    elif r<0.25:
      text.append(",")
      text.append(" ")
    elif r<0.255:
      text.append(n[rng.randint(0, 10)])
    else:
      text.append(l[rng.randint(0, 26)])
  return "".join(text)[:text_len]
  
txt_map = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 .,!"  
r_txt_map = dict([(k, v) for v, k in enumerate(txt_map)])

def gen_sample(text_len, font, render_size, offset, rng):
  text = gen_text(text_len, rng)
  img = draw_text(text, font, render_size, offset)
  img = (1/255)*img.astype(np.float32)
  label = np.array([r_txt_map[k] for k in text], dtype=np.uint8)
  return img, label

def gen_font_list():
  font_list_1 = [
    "data/static/DancingScript-Regular.ttf",
    "data/static/DancingScript-Medium.ttf",
    "data/static/DancingScript-SemiBold.ttf",
    "data/static/DancingScript-Bold.ttf"]
  font_list_1 = [[ImageFont.truetype(f, fs) for fs in [23, 24, 25, 26]] for f in font_list_1]
  font_list_2 = [ImageFont.truetype("data/IndieFlower-Regular.ttf", fs) for fs in [21, 22, 23, 24]]
  font_list = [font_list_1, [font_list_2, font_list_2, font_list_2, font_list_2]]
  return font_list

font_list = gen_font_list()
    
def gen_samples(n, k, rng, add_noise=False, text_len = 26, render_size = (256, 32)):  
  offset_r = rng.randint(0, 6, (n, 2))
  font_r = rng.randint(0, 4, (n, 2))
  imgs = np.empty((n, render_size[1], render_size[0]), np.float32)
  labels = np.empty((n, text_len), np.uint8)
  for i, offset, font_i in zip(range(n), offset_r, font_r):
    font = font_list[k][font_i[0]][font_i[1]]
    offset = tuple(offset)
    img, label = gen_sample(text_len, font, render_size, offset, rng)
    imgs[i] = img
    labels[i] = label
   
  if add_noise:
    noise = rng.normal(0, 0.1, imgs.shape)
    imgs += noise
    m = rng.uniform(0, 1, imgs.shape)
    imgs[m<0.01] = 0
    imgs[m>0.99] = 1
  imgs = np.clip(imgs, 0, 1)
  imgs = np.expand_dims(imgs, -1)
  return imgs, labels

def gen_char_samples(n, k, rng, add_noise=False, render_size = (32, 32)):
  offset_r = rng.randint(0, 6, (n, 2))
  font_r = rng.randint(0, 4, (n, 2))
  imgs = np.empty((n, render_size[1], render_size[0]), np.float32)
  labels = rng.randint(0, len(txt_map)-4, n).astype(np.uint8)
  for i, offset, font_i, label in zip(range(n), offset_r, font_r, labels):
    font = font_list[k][font_i[0]][font_i[1]]
    offset = tuple(offset)
    img = draw_text(txt_map[label], font, render_size, offset)
    imgs[i] = (1/255)*img.astype(np.float32)
   
  if add_noise:
    noise = rng.normal(0, 0.1, imgs.shape)
    imgs += noise
    m = rng.uniform(0, 1, imgs.shape)
    imgs[m<0.01] = 0
    imgs[m>0.99] = 1
  imgs = np.clip(imgs, 0, 1)
  imgs = np.expand_dims(imgs, -1)
  return imgs, labels

In [3]:
def save_txt_samples():
  n = 10000
  rng = np.random.RandomState(0)

  imgs_0, labels_0 = gen_samples(n, 0, rng)
  imgs_1, labels_1 = gen_samples(n, 1, rng)
  np.savez("txt_data_nn.npz", imgs_0=imgs_0, labels_0=labels_0, imgs_1=imgs_1, labels_1=labels_1)

  imgs_0, labels_0 = gen_samples(n, 0, rng, True)
  imgs_1, labels_1 = gen_samples(n, 1, rng, True)
  np.savez("txt_data.npz", imgs_0=imgs_0, labels_0=labels_0, imgs_1=imgs_1, labels_1=labels_1)
save_txt_samples()

In [4]:
def save_char_samples():
  n = 10000
  rng = np.random.RandomState(0)

  imgs_0, labels_0 = gen_char_samples(n, 0, rng)
  imgs_1, labels_1 = gen_char_samples(n, 1, rng)
  np.savez("char_data_nn.npz", imgs_0=imgs_0, labels_0=labels_0, imgs_1=imgs_1, labels_1=labels_1)

  imgs_0, labels_0 = gen_char_samples(n, 0, rng, True)
  imgs_1, labels_1 = gen_char_samples(n, 1, rng, True)
  np.savez("char_data.npz", imgs_0=imgs_0, labels_0=labels_0, imgs_1=imgs_1, labels_1=labels_1)
save_char_samples()

## Import handwriting

In [5]:
def glob_sentences(form_id):
    dir_path = os.path.join("data", form_id.split("-")[0], form_id)
    return [os.path.join(dir_path, f) for f in os.listdir(dir_path)]

def import_handwriting():
    lines = []
    with open("data/forms.txt") as f:
        for line in f:
            lines.append(line)
    lines = lines[16:]
    lines = [line.split()[:3] for line in lines]
    writers = {}
    for line in lines:
        k = int(line[1])
        if k not in writers:
            writers[k] = []
        writers[k].append((line[0], int(line[2])))

    writer_len = []
    for k, v in writers.items():
        writer_len.append((sum([i[1] for i in v]), k))
    writer_len.sort()

    sel_w = [w[1] for w in writer_len[-10:]]
    print(sel_w)
    sel_w = dict([(i, [f[0] for f in writers[i]]) for i in sel_w])
    
    # chosen writers: 0, 552
    f0 = sel_w[0]
    f1 = sel_w[552]
    
    sentences_0 = []
    for f in f0:
        sentences_0.extend(glob_sentences(f))
    sentences_1 = []
    for f in f1:
        sentences_1.extend(glob_sentences(f))
    print(f"sentences_0: {len(sentences_0)}")
    print(f"sentences_1: {len(sentences_1)}")

    sizes = []
    for p in sentences_1:
        img = Image.open(p)
        sizes.append(img.size)
    sizes = np.array(sizes)
    print(f"max: {sizes.max(0)}")
    
    imgs_1 = np.full((127, 116, 1829), 255, np.uint8)
    for i, p in enumerate(sentences_1):
        img = Image.open(p)
        s = img.size
        imgs_1[i, :s[1], :s[0]] = np.array(img)
    imgs_1 = (1/255)*imgs_1.astype(np.float32)
    
    imgs_0 = np.full(imgs_1.shape, 255, np.uint8)
    c = 0
    for p in sentences_0:
        img = Image.open(p)
        s = img.size
        if (s[1] <= imgs_0.shape[1]) and (s[0] <= imgs_0.shape[2]):
            imgs_0[c, :s[1], :s[0]] = np.array(img)
            c += 1
        if c == len(imgs_0):
            break
    imgs_0 = (1/255)*imgs_0.astype(np.float32)
    np.savez("wr_data.npz", imgs_0=imgs_0, imgs_1=imgs_1)
import_handwriting()

[584, 635, 548, 671, 567, 551, 634, 588, 552, 0]
sentences_0: 693
sentences_1: 127
max: [1829  116]


## Import signatures

In [6]:
def import_signatures():
    root = "data/sample_signature/sample_Signature"
    #choosen writers: 1, 3
    f1, f3 = [], []
    
    for f in os.listdir(os.path.join(root, "genuine")):
        i = int(f[4:7])
        p = os.path.join(root, "genuine", f)
        if i == 1:
            f1.append(p)
        elif i == 3:
            f3.append(p)

    f3_1 = []
    for f in os.listdir(os.path.join(root, "forged")):
        i = int(f[4:7])
        j = int(f[9:12])
        p = os.path.join(root, "forged", f)
        if i == 3 and j == 1:
            f3_1.append(p)

    train_x = np.full((len(f1), 800, 1600), 255)
    for i in range(len(f1)):
        a = np.array(Image.open(f1[i]))
        train_x[i, :a.shape[0], :a.shape[1]] = a
    train_x = np.expand_dims((1/255)*train_x.astype(np.float32), -1)
        
    train_y = np.full((len(f3), 800, 1600), 255)
    for i in range(len(f3)):
        a = np.array(Image.open(f3[i]))
        train_y[i, :a.shape[0], :a.shape[1]] = a
    train_y = np.expand_dims((1/255)*train_y.astype(np.float32), -1)
    
    test_x = np.full((len(f3_1), 800, 1600), 255)
    for i in range(len(f3_1)):
        a = np.array(Image.open(f3_1[i]))
        test_x[i, :a.shape[0], :a.shape[1]] = a
    test_x = np.expand_dims((1/255)*test_x.astype(np.float32), -1)
    
    np.savez("sig_data.npz", train_x=train_x, train_y=train_y, test_x=test_x)
import_signatures()