In [4]:
from PIL import Image, ImageDraw, ImageFont
import numpy as np

import glob
import random
import os
import uuid

import torchvision.models as models
import torchvision.transforms as transforms

import torch

use_cuda = torch.cuda.is_available()


from tqdm import tqdm

extractor = models.vgg16(pretrained=True).features
if use_cuda:
    extractor = extractor.cuda()

MAX_WORD_LEN = 9
MIN_WORD_LEN= 5

In [5]:
def gen_image(text=None):
    if text is None:
        f = open(os.getcwd() + "/words.txt", "r+")
        lines = f.readlines()
        lines = list([i[:-1] for i in lines if MAX_WORD_LEN > len(i) > MIN_WORD_LEN])
        text = random.choice(lines)
        
        
    font_name = random.choice(glob.glob(os.getcwd() + "/fonts/*ttf"))
    
    font = ImageFont.truetype(font_name, 28, encoding="unic")
    
    text_width, text_height = font.getsize(text)
    
    canvas = Image.new('RGB', (text_width + 10, text_height + 10), "white")
    draw = ImageDraw.Draw(canvas)
    draw.text((5, 5), text, 'black', font)
    
    canvas = canvas.resize((244, 244), Image.ANTIALIAS)
    
    return canvas, text
    
    
def gen_dataset(size, folder='train'):
    for i in tqdm(range(size)):
        img, text = gen_image()
        uutext = text + "_" + str(uuid.uuid1())[:8] + ".png"
        img.save(f"data/{folder}/" + uutext)


In [6]:
'''
It will take a while..
gen_dataset(5000)
gen_dataset(1000, 'test')
'''

gen_dataset(10)

100%|██████████| 10/10 [00:00<00:00, 46.38it/s]


In [17]:
loader = transforms.Compose([
    transforms.ToTensor()])  # transform it into a torch tensor

import PIL.Image
from torch.autograd import Variable

dtype = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor

input_image_size = 512* 7* 7

def image_loader(image_name):
    image = PIL.Image.open(image_name)
    image = Variable(loader(image)).type(dtype)
    # fake batch dimension required to fit network's input dimensions
    image = image.unsqueeze(0)
    return image

def make_conv(save_format='torch', directory='\\data\\train\\*', file_name='train'):
    train_samples = glob.glob(os.getcwd() + directory)
    
    image_tensors = torch.LongTensor(len(train_samples), input_image_size)
    
    for i in tqdm(range(len(train_samples))):
        image_path = train_samples[i]

        image_name_cut = image_path.split("\\")[-1]

        word = image_name_cut.split("_")[0]

        image_input = extractor(image_loader(os.getcwd() + "/data/train/" + image_name_cut))
        image_input.data = image_input.data.squeeze(0)
        image_input.data = image_input.data.view(image_input.data.size(0), -1)
        image_input.data =  image_input.data.view(1, -1).squeeze(0)
        image_tensors[i] = image_input.data
        
    if save_format=='torch':
        torch.save(image_tensors, os.getcwd() + f"\\data\\{file_name}.pt")
    else:
        np.save( os.getcwd() +  f"\\data\\{file_name}.npy", image_tensors.numpy())

In [19]:
make_conv(save_format='torch')

100%|██████████| 10/10 [00:00<00:00, 98.64it/s]
