## Tiny ImageNet pipeline
https://tiny-imagenet.herokuapp.com/

In [None]:
import zipfile
import numpy as np
import pandas as pd
import tensorflow as tf

In [None]:
# Get data

In [None]:
# Unzip tinyimagenet
path_to_zip_file = './drive/My Drive/inyimgnet.zip'
with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:
    zip_ref.extractall('')

# Get path to images
path_to_train = 'tiny-imagenet-200/train/*/images/*'
path_to_valid = 'tiny-imagenet-200/val/images/*'

train_list_ds = tf.data.Dataset.list_files(path_to_train)
valid_list_ds = tf.data.Dataset.list_files(path_to_valid)

#for f in train_list_ds.take(3):
#  print(f.numpy())
#for f in valid_list_ds.take(3):
#  print(f.numpy())

In [None]:
# Precalculated mean and std
#mean, std = [0.480, 0.448, 0.397], [0.276, 0.269, 0.282]

In [None]:
# Load data
# Train
def preproc_train(file_path):
    # Get interim label (filenames or class names)
    label = tf.strings.split(file_path, '/')[-3]

    # Get image
    img = tf.io.read_file(file_path)
    img = tf.image.decode_image(img, channels=3)

    # Flip horizontally randomly
    img = tf.image.random_flip_left_right(img)

    # Color changes
    img = tf.image.random_brightness(img, max_delta=0.1)
    img = tf.image.random_contrast(img, lower=0.9, upper=1.1)
    img = tf.image.random_saturation(img, lower=0.9, upper=1.1)
    img = tf.image.random_hue(img, max_delta=0.1)
    if tf.random.uniform(()) > 0.8:
        img = tf.image.rgb_to_grayscale(img)
        img = tf.image.grayscale_to_rgb(img)

    # Normalize images (0..1 range)
    img = tf.cast(img, tf.float32) / 255.

    # Random crop after padding
    img = tf.image.pad_to_bounding_box(img, 4, 4, 72, 72)
    img = tf.image.random_crop(img, size=[64,64,3])

    return img, label
train = train_list_ds.map(preproc_train, num_parallel_calls=tf.data.experimental.AUTOTUNE)

# Validation
def preproc_valid(file_path):
    # Get interim label (filenames or class names)
    label = tf.strings.split(file_path, '/')[-1]

    # Get image
    img = tf.io.read_file(file_path)
    img = tf.image.decode_image(img, channels=3)

    # Normalize images (0..1 range)
    img = tf.cast(img, tf.float32) / 255.

    return img, label
valid = valid_list_ds.map(preproc_valid, num_parallel_calls=tf.data.experimental.AUTOTUNE)

In [None]:
# Decode interim labels (filenames or class names)
# Helper variables
val_label_df = pd.read_csv('tiny-imagenet-200/val/val_annotations.txt', sep='\t', header=None)
val_name_to_label_dict = dict(val_label_df[[0,1]].values)
label_dict = dict(zip(val_label_df[1].unique(), np.arange(200)))

# Train set
def train_label(filename):
    label_str = filename.numpy().decode()
    return [label_dict[label_str]]
def train_label_encode(img, filename):
    [y,] = tf.py_function(train_label, [filename], [tf.float32])
    y.set_shape([])
    return img, y
train = train.map(train_label_encode, num_parallel_calls=tf.data.experimental.AUTOTUNE)

# Validation set
def valid_label(filename):
    filename_str = filename.numpy().decode()
    label_str = val_name_to_label_dict[filename_str]
    return [label_dict[label_str]]
def valid_label_encode(img, filename):
    [y,] = tf.py_function(valid_label, [filename], [tf.float32])
    y.set_shape([])
    return img, y
valid = valid.map(valid_label_encode, num_parallel_calls=tf.data.experimental.AUTOTUNE)

In [None]:
train = train.batch(batch_size)
valid = valid.batch(batch_size)