In [8]:
!pip install -q cairocffi editdistance
!apt install -q libcairo2-dev
!apt install -q graphviz
!pip install -q pydot
!pip install -q matplotlib graphviz pydot

Reading package lists...
Building dependency tree...
Reading state information...
libcairo2-dev is already the newest version (1.15.10-2ubuntu0.1).
0 upgraded, 0 newly installed, 0 to remove and 31 not upgraded.
Reading package lists...
Building dependency tree...
Reading state information...
graphviz is already the newest version (2.40.1-2).
0 upgraded, 0 newly installed, 0 to remove and 31 not upgraded.


In [0]:
import os
import itertools
import codecs
import re
import datetime
import cairocffi as cairo
import editdistance
import numpy as np
import random
import matplotlib.pyplot as plt
from random import randint
from random import seed
from scipy import ndimage
from PIL import Image
from keras.preprocessing import image

In [0]:

monogram_dir = "drive/My Drive/Colab Notebooks/monogram_files"

directory = monogram_dir

if not os.path.exists(directory):
  os.makedirs(directory)

In [0]:
# this creates larger "blotches" of noise which look
# more realistic than just adding gaussian noise
# assumes greyscale with pixels ranging from 0 to 1

def speckle(img):
    severity = np.random.uniform(0, 0.6)
    blur = ndimage.gaussian_filter(np.random.randn(*img.shape) * severity, 1)
    img_speck = (img + blur)
    img_speck[img_speck > 1] = 1
    img_speck[img_speck <= 0] = 0
    return img_speck


# paints the string in a random location the bounding box
# also uses a random font, a slight random rotation,
# and a random amount of speckle noise

def paint_text(text, w, h, rotate=False, ud=False, multi_fonts=False):
    surface = cairo.ImageSurface(cairo.FORMAT_RGB24, w, h)
    with cairo.Context(surface) as context:
        context.set_source_rgb(1, 1, 1)  # White
        context.paint()
        # font list
        if multi_fonts:
            fonts = ['Century Schoolbook', 'Courier', 'STIX', 'URW Chancery L', 'FreeMono', 'Arial', 'Times New Roman']
            context.select_font_face(np.random.choice(fonts), cairo.FONT_SLANT_NORMAL,
                                     np.random.choice([cairo.FONT_WEIGHT_BOLD, cairo.FONT_WEIGHT_NORMAL]))
        else:
            context.select_font_face('Courier', cairo.FONT_SLANT_NORMAL, cairo.FONT_WEIGHT_BOLD)
        # random font size
        font_size = random.randint(18,25)
        context.set_font_size(font_size)
        box = context.text_extents(text)
        border_w_h = (3, 3)
        if box[2] > (w - 2 * border_w_h[1]) or box[3] > (h - 2 * border_w_h[0]):
            raise IOError('Could not fit string into image. Max char count is too large for given image width.')

        # teach the RNN translational invariance by
        # fitting text box randomly on canvas, with some room to rotate
        max_shift_x = w - box[2] - border_w_h[0]
        max_shift_y = h - box[3] - border_w_h[1]
        top_left_x = np.random.randint(0, int(max_shift_x))
        if ud:
            top_left_y = np.random.randint(0, int(max_shift_y))
        else:
            top_left_y = h // 2
        context.move_to(top_left_x - int(box[0]), top_left_y - int(box[1]))
        context.set_source_rgb(0, 0, 0)
        context.show_text(text)

    buf = surface.get_data()
    a = np.frombuffer(buf, np.uint8)
    a.shape = (h, w, 4)
    a = a[:, :, 0]  # grab single channel
    a = a.astype(np.float32) / 255
    a = np.expand_dims(a, 0)
    if rotate:
        a = image.random_rotation(a, 3 * (w - top_left_x) / w + 1)
    a = speckle(a)

    return a

In [0]:
monogram_file = "drive/My Drive/Colab Notebooks/wordlist_mono_clean.txt"
h = 64
w = 128

with codecs.open(monogram_file, mode='r', encoding='utf-8') as f:
  lines = f.readlines()
  count = 0
  for line in lines:
    word = line.rstrip()
    
    rand = randint(0, 9999)
    if rand % 5 == 0 or rand % 8 == 0:
      if rand % 5 == 0:
        if count % 10 == 0:
          word = "{} {}".format(rand, word.upper())
        else:
          word = "{} {}".format(rand, word)
      else:
        if count % 10 == 0:
          word = "{} {}".format(word.upper(), rand)
        else:
          word = "{} {}".format(rand, word.upper())
    
    if len(word) >= 3 and len(word) <= 8:
      a = paint_text(word,h = h, w = w, rotate=True, ud=True, multi_fonts=True)
      b = a.reshape((h, w))
      #plt.imshow(b, cmap='Greys_r')
      plt.imsave("{}/{}.png".format(monogram_dir, word), b)
      count = count + 1
      if count > 5000:
        break;