We are interested in generating meaningful interpolations and extrapolations based on the learned letterforms. therefore we start by evaluating the model's ability to interpolate between different variations in the same writing system such as weight (thickness), size, rotation, slant and style (font family). we then explore the extrapolation capabilities between letterforms of different writing systems.

We compare models with different latent dimensions and other hyperparameters. we also compare the results of the models trained on different datasets with different levels of augmentation.

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2


In [2]:
import torch
import os

use_gpu = True
device = torch.device("cuda:0" if use_gpu and torch.cuda.is_available() else "cpu")
MODELS_DIR = './results/models/'


# Load Pre-Trained Models

In [3]:
from src.vae import VariationalAutoencoder
from configs.config import vae_config
from configs.config import dotdict

def get_model_name(conf, ds_type, letter):
    latent_dims, capacity, beta = conf.latent_dims, conf.capacity, conf.variational_beta
    model_name = f"vae_{ds_type}_{letter}_{latent_dims}_{capacity}_{beta}"
    return model_name

def load_vae(conf, ds_type, letter, device):
    vae = VariationalAutoencoder(conf)
    model_name = get_model_name(conf, ds_type, letter)
    # load on cpu
    vae.load_state_dict(torch.load(MODELS_DIR + model_name + ".pt", map_location=device))
    return vae

example_config = dotdict({
    'latent_dims': 2,
    'capacity': 64,
    'variational_beta': 1.0
})

vae = load_vae(example_config, 'base', 'aleph', device)

In [4]:
# hyperparameter ranges for model evaluation
latent_dims = [2, 3]
capacity = [64, 128]
variational_beta = [0.1, 0.5, 1.0, 2.0]
letters = ["aleph", "shin", "mem"]
ds_types = ["base"]

In [5]:
import itertools 

models = {}

for ds_type, letter, latent_dim, cap, beta in itertools.product(ds_types, letters, latent_dims, capacity, variational_beta):
    vae_config = dotdict({
        'latent_dims': latent_dim,
        'capacity': cap,
        'variational_beta': beta,
    })
    model = VariationalAutoencoder(vae_config)
    model_name = get_model_name(vae_config, ds_type, letter)
    model.load_state_dict(torch.load(os.path.join(MODELS_DIR, model_name + ".pt")))
    models[model_name] = model

In [6]:
models

{'vae_base_aleph_2_64_0.1': VariationalAutoencoder(
   (encoder): Encoder(
     (conv1): Conv2d(1, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
     (conv2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
     (fc_mu): Linear(in_features=6272, out_features=2, bias=True)
     (fc_logvar): Linear(in_features=6272, out_features=2, bias=True)
   )
   (decoder): Decoder(
     (fc): Linear(in_features=2, out_features=6272, bias=True)
     (conv2): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
     (conv1): ConvTranspose2d(64, 1, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
   )
 ),
 'vae_base_aleph_2_64_0.5': VariationalAutoencoder(
   (encoder): Encoder(
     (conv1): Conv2d(1, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
     (conv2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
     (fc_mu): Linear(in_features=6272, out_features=2, bias=True)
     (fc_logvar): Linear(in_features=6272, out_fea

Load base dataset, this is a dataset where all letters are centered, not rotated and with a fixed size.

In [8]:
from torch.utils.data import DataLoader
from torchvision import transforms
from src.datasets import GraphemesDataset
data_dir = "datasets/base_dataset/"

img_transform = transforms.Compose([
    transforms.ToTensor()
])

dataset = GraphemesDataset(data_dir, test_size=0, by_letter=letter, transform=img_transform)
dataloader = DataLoader(dataset, batch_size=128, shuffle=True)
