In [13]:
import tensorflow as tf
import numpy as np
from datasets import load_dataset, DatasetDict
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from tqdm import tqdm
import matplotlib.pyplot as plt

In [14]:
ds = load_dataset("jlbaker361/wikiart")

In [15]:
# constants
HEIGHT = 64
WIDTH = 64
CHANNELS = 3 # Should not matter 
BATCH_SIZE = 32
CONVERT_TO_BW = True


# Image Size Normalization, Augumentation, Convert to B&W


In [16]:
n_obs = 10
train_ds = ds['train']

In [17]:
# one-hot-encoding
# get unique class names

def get_unique_styles(train_ds: DatasetDict):
    return train_ds.unique('style')

def style2idx(style, style_list):
    return style_list.index(style)

# get unique styles in the dataset
# need to use full train dataset here
unique_styles = get_unique_styles(train_ds)



In [18]:
# # for cnn (needs to be one-hot encoded)
# def encode_labels_cnn(x, unique_labels):
#     x['label'] = np.zeros(len(unique_labels), dtype=np.int32)
#     x['label'][style2idx(x['style'], unique_labels)] = 1
#     return x

# # for other clfs 
# def encode_labels(x, label_encoder: LabelEncoder):
#     x['label'] = label_encoder.transform(x['style'])
#     return x
# label_encoder = LabelEncoder().fit(unique_styles)

# train_ds = train_ds.map(lambda x: encode_labels(x, label_encoder), num_proc=4, batched=True)

In [19]:
# processed_ds = train_ds.remove_columns(['text', 'name', 'gen_style'])
# SAVE_DIR = '../processed_data'
# processed_ds.save_to_disk(SAVE_DIR)


In [65]:
def augment_with_tf(image, shape):
    # resize image
    image = image.resize((HEIGHT,WIDTH))
    # convert rgb image to grey scale
    if CONVERT_TO_BW:
        image = image.convert('L') # better support for converting compared to tf.Image.rbg_to_grayscale
    # TODO: decide the following after subset, if subset add flipped images into dataset
    # image = tf.image.random_flip_left_right(image)
    # image = tf.image.rot90(image, k=tf.random.uniform(shape=[], minval=0, maxval=4, dtype=tf.int32))
    # image = tf.image.random_brightness(image, max_delta=0.1)
    # image = tf.image.random_contrast(image, lower=0.8, upper=1.2)
    # reshape img array to the desired shape
    image = tf.reshape(image, shape)
    image = np.array(image, dtype=np.float64)
    image = image * (1./255)

    return image

# augment over batches
def augment(examples, shape):
    examples['img_pixels'] = [augment_with_tf(img,shape) for img in examples['image']]
    return examples


In [53]:
# this should work differently for cnn vs other models
# other models would directly use the encoded labels (i.e "string" -> num)
# cnn needs (0,1,0,0) i.e. one hot encoded output for this
def to_tf_dataset(ds, shape, output_shape, batch_size=BATCH_SIZE):
    def generator():
        for row in ds:
            yield row['img_pixels'], row['label'] 
    dataset = tf.data.Dataset.from_generator(
        generator, 
        output_signature=(
            # tf.TensorSpec(shape=(HEIGHT, WIDTH, CHANNELS), dtype=tf.float32),  # Image shape
            tf.TensorSpec(shape=(shape), dtype=tf.float64),  # Image shape
            tf.TensorSpec(shape=(output_shape), dtype=tf.int64)  # Label shape, (nclass,) for cnn; () for others
        )
    )
    dataset = dataset.shuffle(buffer_size=1000).batch(batch_size)
    return dataset

In [47]:
# to be used in batches
def encode_labels(x, label_encoder: LabelEncoder, for_CNN):
    x['label'] = label_encoder.transform(np.array(x['style']).reshape(-1,1))
    if for_CNN:
        x['label']  = x['label'].toarray()
    return x


In [23]:
# encode labels first (encoders have different input reqs)


In [77]:
for_CNN = True # prepare dataset for CNN (i.e shape (64,64,3) or (64,64)) and one-hot encoded labels
unique_styles = np.array(unique_styles)

if for_CNN:
    encoder = OneHotEncoder().fit(unique_styles.reshape(-1,1))
    shape = (HEIGHT, WIDTH, CHANNELS)
    output_shape = (len(unique_styles))
    if CONVERT_TO_BW:
        shape = (HEIGHT, WIDTH)
else:
    encoder = LabelEncoder().fit(unique_styles)
    shape = (HEIGHT*WIDTH*CHANNELS)
    output_shape = ()
    if CONVERT_TO_BW:
        shape = (HEIGHT*WIDTH)


train_ds_encoded = train_ds.map(lambda x: encode_labels(x, encoder, for_CNN), batched=True, num_proc=4)
train_ds_augmented = train_ds_encoded.map(lambda x: augment(x, shape), batched=True, num_proc=4)

Map (num_proc=4): 100%|██████████| 73304/73304 [01:27<00:00, 835.94 examples/s] 


In [78]:
# save dataset
path = '../processed_data/'
if for_CNN:
    path += 'cnn' # input_shape = (HEIGHT, WIDTH), out_shape = (n_classes)
else: 
    path += 'regular' # shape = (HEIGHT*WIDTH), out_shape = ()
train_ds_augmented.save_to_disk(path)

Saving the dataset (11/11 shards): 100%|██████████| 73304/73304 [00:06<00:00, 10604.09 examples/s]


In [1]:
train_ds_augmented[0]

NameError: name 'train_ds_augmented' is not defined

In [None]:
# Use this to train models...
train_final = to_tf_dataset(train_ds_augmented, shape, output_shape)