In [1]:
import tensorflow as tf
import tensorflow_addons as tfa
from keras.utils import np_utils
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.layers import Activation, AvgPool2D
from tensorflow.keras.layers import BatchNormalization as BN
from tensorflow.keras.layers import Concatenate, Conv2D, Dense, Dropout, Flatten
from tensorflow.keras.layers import GaussianNoise as GN
from tensorflow.keras.layers import Input, MaxPooling2D

BATCH_SIZE = 64
DP_PROB = 0.5
EPOCHS = 100
GN_PROB = 0.3
INPUT_SHAPE = (32, 32, 3)
LEARNING_RATE = 1e-3
NUM_CLASSES = 10


@tf.function
def normalize(image, tag):
    image = tf.cast(image, tf.float32)
    image = tf.divide(image, 255)
    return image, tag


@tf.function
def sample_beta_distribution(size, concentration_0=0.2, concentration_1=0.2):
    gamma_1_sample = tf.random.gamma(shape=[size], alpha=concentration_1)
    gamma_2_sample = tf.random.gamma(shape=[size], alpha=concentration_0)
    return gamma_1_sample / (gamma_1_sample + gamma_2_sample)


@tf.function
def mix_up(ds_one, ds_two, alpha=0.2):
    images_one, labels_one = ds_one
    images_two, labels_two = ds_two

    batch_size = tf.shape(images_one)[0]

    l = sample_beta_distribution(batch_size, alpha, alpha)
    x_l = tf.reshape(l, (batch_size, 1, 1, 1))
    y_l = tf.reshape(l, (batch_size, 1))

    images = images_one * x_l + images_two * (1 - x_l)
    labels = labels_one * y_l + labels_two * (1 - y_l)

    return (images, labels)


@tf.function
def data_augmentation(image, label):
    image = tf.image.random_brightness(image, 0.7)
    coin = tf.random.uniform((), minval=0.0, maxval=1.0, dtype=tf.dtypes.float32)
    if coin > 0.5:
        image = tf.image.random_crop(value=image, size=(25, 25, 3))
    image = tf.image.random_flip_left_right(image)
    image = tf.image.resize(image, [32, 32])
    return image, label


@tf.function
def random_cut_out(images, labels):
    images = tfa.image.random_cutout(images, (4, 4), constant_values=1)
    return images, labels


def activation(x):
    y = BN()(x)
    y = GN(GN_PROB)(y)
    y = Activation("relu")(y)
    return y


def inception_block(x, filters=[64, 128, 32, 16]):
    conv1x1 = Conv2D(filters[0], kernel_size=1, strides=1, padding="same")(x)
    conv1x1 = activation(conv1x1)
    conv3x3 = Conv2D(filters[1], kernel_size=3, strides=1, padding="same")(x)
    conv3x3 = activation(conv3x3)
    conv4x4 = Conv2D(filters[2], kernel_size=5, strides=1, padding="same")(x)
    conv4x4 = activation(conv4x4)
    max_pool = MaxPooling2D(3, strides=1, padding="same")(x)
    max_pool = Conv2D(
        filters[3], kernel_size=1, strides=1, padding="same", activation="relu"
    )(max_pool)
    return Concatenate(axis=-1)([conv1x1, conv3x3, conv4x4, max_pool])


def down_sample(x, filters):
    conv = Conv2D(filters, kernel_size=3, strides=2, padding="valid")(x)
    conv = activation(conv)
    pool = MaxPooling2D(3, strides=2)(x)
    input = Concatenate(axis=-1)([conv, pool])
    return input


def build_network():
    inputs = Input(shape=INPUT_SHAPE)

    x = Conv2D(96, kernel_size=3, strides=1, padding="same")(inputs)

    x = inception_block(x, [32, 32, 32, 32])
    x = inception_block(x, [32, 48, 48, 32])
    x = down_sample(x, 80)

    x = inception_block(x, [112, 48, 32, 48])
    x = inception_block(x, [96, 64, 32, 32])
    x = inception_block(x, [80, 80, 32, 32])
    x = inception_block(x, [48, 96, 32, 32])
    x = inception_block(x, [112, 48, 32, 48])
    x = down_sample(x, 96)

    x = inception_block(x, [176, 160, 96, 96])
    x = inception_block(x, [176, 160, 96, 96])

    x = AvgPool2D(7)(x)
    x = Dropout(DP_PROB)(x)

    x = Flatten()(x)
    x = Dense(NUM_CLASSES)(x)
    x = Activation("softmax")(x)

    return tf.keras.models.Model(inputs=inputs, outputs=x, name="inception")


def poly_decay(epoch):
    return LEARNING_RATE * (1 - (epoch / float(EPOCHS)))


(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

y_train = np_utils.to_categorical(y_train, NUM_CLASSES)
y_test = np_utils.to_categorical(y_test, NUM_CLASSES)

train_ds_one = (
    tf.data.Dataset.from_tensor_slices((x_train, y_train))
    .map(normalize)
    .shuffle(100)
    .repeat(EPOCHS)
    .map(data_augmentation)
    .batch(BATCH_SIZE)
    .map(random_cut_out)
)
train_ds_two = (
    tf.data.Dataset.from_tensor_slices((x_train, y_train))
    .map(normalize)
    .shuffle(100)
    .repeat(EPOCHS)
    .map(data_augmentation)
    .batch(BATCH_SIZE)
    .map(random_cut_out)
)

test = (
    tf.data.Dataset.from_tensor_slices((x_test, y_test))
    .map(normalize)
    .shuffle(100)
    .batch(BATCH_SIZE)
)
train = tf.data.Dataset.zip((train_ds_one, train_ds_two)).map(
    lambda ds_one, ds_two: mix_up(ds_one, ds_two, alpha=0.2),
    num_parallel_calls=tf.data.AUTOTUNE,
)

model = build_network()

model.compile(
    loss="categorical_crossentropy",
    optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
    metrics=["accuracy"],
)

model.summary()

model.fit(
    train,
    steps_per_epoch=len(x_train) // BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=test,
    callbacks=[LearningRateScheduler(poly_decay)],
)

scores = model.evaluate(test, verbose=1)
print("Test loss:", scores[0])
print("Test accuracy:", scores[1])


2022-01-08 11:51:01.316855: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-08 11:51:01.326123: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-08 11:51:01.326678: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-08 11:51:01.328424: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 32, 32, 3)]  0           []                               
                                                                                                  
 batch_normalization (BatchNorm  (None, 32, 32, 3)   12          ['input_1[0][0]']                
 alization)                                                                                       
                                                                                                  
 conv2d (Conv2D)                (None, 32, 32, 32)   896         ['batch_normalization[0][0]']    
                                                                                                  
 re_lu (ReLU)                   (None, 32, 32, 32)   0           ['conv2d[0][0]']             

2022-01-08 11:51:04.298133: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8202
2022-01-08 11:51:06.136261: W tensorflow/core/common_runtime/bfc_allocator.cc:275] Allocator (GPU_0_bfc) ran out of memory trying to allocate 2.54GiB with freed_by_count=0. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.
2022-01-08 11:51:06.136331: W tensorflow/core/common_runtime/bfc_allocator.cc:275] Allocator (GPU_0_bfc) ran out of memory trying to allocate 2.54GiB with freed_by_count=0. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.
2022-01-08 11:51:06.284983: W tensorflow/core/common_runtime/bfc_allocator.cc:275] Allocator (GPU_0_bfc) ran out of memory trying to allocate 2.44GiB with freed_by_count=0. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory

Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
 55/937 [>.............................] - ETA: 1:41 - loss: 1.4056 - accuracy: 0.4918

KeyboardInterrupt: 