In [2]:
from data import get_data
import numpy as np

In [3]:
data = np.load('../data/omniglot.npy', allow_pickle=True)

In [13]:
data.shape

(1623, 20, 3)

In [43]:
import os
from PIL import Image

data_dir = '.'

def get_subdirs(a_dir):
    return [os.path.join(a_dir, name) for name in os.listdir(a_dir)
            if os.path.isdir(os.path.join(a_dir, name))]

def load_and_save(save_file, size=None):
    data = []
    languages = get_subdirs(os.path.join('../data/omniglot'))
    for language_num, language in enumerate(languages):
        characters = get_subdirs(language)
        characters.sort()
        for character_num, character in enumerate(characters):
            character_images = []
            instances = os.listdir(character)
            instances.sort()
            print(language_num, len(characters))
            for instance in instances:
                im = Image.open(os.path.join(character, instance))
                if size:
                    im = im.resize((size, size), resample=Image.LANCZOS)
                image = np.array(im.getdata()).astype('float32').reshape(size, size) / 255.
                image = 1.0 - image  # invert the data as Omniglot is black on white

                character_images.append((image, character_num, language_num))
            data.append(character_images)
    return np.array(data)
#     np.save(save_file, np.array(data, dtype=object))

In [106]:
def extract_data(data, augment_data):
    images, char_nums = [], []
    if augment_data:
        for character in data:
            data = augment_character_set(data, character)
    for character_index, character in enumerate(data):
        for m, instance in enumerate(character):
#             print(instance)
            images.append(instance[0])
            char_nums.append(character_index)
#     print(np.array(images).shape)
#     images = np.expand_dims(np.array(images), 4)
    images = np.array(images)
    char_number = np.array(char_nums)
    return images, char_number


def augment_character_set(data, character_set):
    """
    :param data: Dataset the character belongs to.
    :param character_set: np array containing instances of a character.
    :return: Original data with added character sets for all defined permutations of the current character.
    """
    rotation_90, rotation_180, rotation_270 = [], [], []
    for instance in character_set:
        image, char_num, char_language_num = instance
        rotation_90.append((np.rot90(image, k=1), char_num, char_language_num))
        rotation_180.append((np.rot90(image, k=2), char_num, char_language_num))
        rotation_270.append((np.rot90(image, k=3), char_num, char_language_num))
    print(np.array(rotation_90).shape, np.array(rotation_180).shape, np.array(rotation_270).shape)
    augmented_data = np.array([rotation_90, rotation_180, rotation_270])
    return np.vstack((data, augmented_data))

In [None]:
all_data = load_and_save(None, size=28)

In [102]:
all_data.shape

(1623, 20, 3)

In [108]:
extract_data(all_data[:2], True)[1].shape

(20, 3) (20, 3) (20, 3)
(20, 3) (20, 3) (20, 3)


  print(np.array(rotation_90).shape, np.array(rotation_180).shape, np.array(rotation_270).shape)
  augmented_data = np.array([rotation_90, rotation_180, rotation_270])


(160,)

In [82]:
import tensorflow_datasets as tfds
data = tfds.load("omniglot", split="train", batch_size=-1)

In [87]:
import tensorflow as tf

In [88]:
tf.unique(data['alphabet'])

Unique(y=<tf.Tensor: shape=(30,), dtype=int64, numpy=
array([27, 30, 17, 12, 15, 37, 43, 48, 32,  3,  2, 21, 25, 13, 14, 35, 26,
       20,  0, 38,  4, 16, 41, 24, 11, 10, 31,  5, 45, 22])>, idx=<tf.Tensor: shape=(19280,), dtype=int32, numpy=array([ 0,  1,  2, ...,  1, 23, 28], dtype=int32)>)

In [None]:
labels = tf.constant([1,2,3])
features = np.zeros((3,2))
class_mask = tf.equal(tf.argmax(input=labels, axis=1), 1)
class_features = tf.boolean_mask(tensor=features, mask=class_mask)


In [112]:
labels.shape

TensorShape([3, 3])

In [120]:
labels = tf.constant([0,1,2,1])
labels = tf.one_hot(labels, 3, axis=1)
class_mask = tf.equal(tf.argmax(input=labels, axis=1), 1)
class_mask

<tf.Tensor: shape=(4,), dtype=bool, numpy=array([False,  True, False,  True])>

In [123]:
features = np.ones((4,2))
tf.boolean_mask(tensor=features, mask=class_mask)

<tf.Tensor: shape=(2, 2), dtype=float64, numpy=
array([[1., 1.],
       [1., 1.]])>

In [125]:
# tf.boolean_mask?