<a href="https://colab.research.google.com/github/hellocybernetics/TensorFlow_Eager_Execution_Tutorials/blob/master/tutorials/02_intermediate/residual_network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline

In [0]:
tfk = tf.keras
L = tf.keras.layers
tfe = tf.contrib.eager

tf.enable_eager_execution()

In [3]:
# Hyper parameters
num_epochs = 10
num_classes = 10
batch_size = 256
learning_rate = 0.001

(x_train, y_train), (x_test, y_test) = tfk.datasets.cifar10.load_data()

print("training_data\n", x_train.shape)
print("test_data\n", x_test.shape)
print("training_label\n", y_train.shape)
print("test_label\n", y_test.shape)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
training_data
 (50000, 32, 32, 3)
test_data
 (10000, 32, 32, 3)
training_label
 (50000, 1)
test_label
 (10000, 1)


In [4]:
x_train_ = tf.convert_to_tensor(x_train, dtype=tf.float32)
y_train_ = tf.reshape(tf.one_hot(y_train, 10), (-1, 10))


print(x_train_.shape)
print(y_train_.shape)

(50000, 32, 32, 3)
(50000, 10)


In [18]:
train_dataset = (
    tf.data.Dataset.from_tensor_slices((x_train, y_train))
    .batch(batch_size)
    .shuffle(10000)
)

train_dataset = (
    train_dataset.map(lambda x, y: 
                      (tf.div(tf.cast(x, tf.float32), 255.0), 
                       tf.reshape(tf.one_hot(y, 10), (-1, 10))))
)

print(train_dataset)

<MapDataset shapes: ((?, 32, 32, 3), (?, 10)), types: (tf.float32, tf.float32)>


In [19]:
test_dataset = (
    tf.data.Dataset.from_tensor_slices((x_test, y_test))
    .batch(1000)
    .shuffle(10000)
)
test_dataset = (
    test_dataset.map(lambda x, y: 
                      (tf.div(tf.cast(x, tf.float32), 255.0), 
                       tf.reshape(tf.one_hot(y, 10), (-1, 10))))
)

print(test_dataset)

<MapDataset shapes: ((?, 32, 32, 3), (?, 10)), types: (tf.float32, tf.float32)>


In [0]:
def conv3x3(out_channels, strides=1):
    return L.Conv2D(out_channels, kernel_size=3, 
                    strides=strides, padding='same', use_bias=False)

### training flag
`call` method of `L.BatchNormalization` need to have `traininig` flag because this method have different behavior between traning and evaluation.  

In [0]:
# Residual block
class ResidualBlock(tfk.Model):
    def __init__(self, out_channels, strides=1, downsample=None):
        super(ResidualBlock, self).__init__(name='ResidualBlock')
        self.conv1 = conv3x3(out_channels, strides)
        self.bn1 = L.BatchNormalization(axis=-1)
        self.relu = L.ReLU()
        self.conv2 = conv3x3(out_channels)
        self.bn2 = L.BatchNormalization(axis=-1)
        self.downsample = downsample
    

    def call(self, x, training=False):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out, training=training)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out, training=training)
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return out

### tf.keras.Sequential
`call` method of `tf.keras.Sequential` have `training` flag. This flag affects all layers included by the `tf.keras.Sequential` instance.

In [0]:
class ResNet(tfk.Model):
    def __init__(self, block, layers, num_classes=10):
        super(ResNet, self).__init__(name='ResNet')
        self.in_channels = 16
        self.conv = conv3x3(16)
        self.bn = L.BatchNormalization(axis=-1)
        self.relu = L.ReLU()
        self.layer1 = self.make_layer(block, 16, layers[0])
        self.layer2 = self.make_layer(block, 32, layers[1], 2)
        self.layer3 = self.make_layer(block, 64, layers[2], 2)
        self.avg_pool = L.AvgPool2D(8)
        self.flatten = L.Flatten()
        self.fc = L.Dense(num_classes)
        
    def make_layer(self, block, out_channels, blocks, strides=1):
        downsample = None
        if (strides != 1) or (self.in_channels != out_channels):
            downsample = tfk.Sequential([
                conv3x3(out_channels, strides=strides),
                L.BatchNormalization(axis=-1)])
        layers = []
        layers.append(block(out_channels, strides, downsample))
        self.in_channels = out_channels
        for i in range(1, blocks):
            layers.append(block(out_channels))
        return tfk.Sequential(layers)
    
    def call(self, x, training=False):
        out = self.conv(x)
        out = self.bn(out, training=training)
        out = self.relu(out)
        out = self.layer1(out, training=training)
        out = self.layer2(out, training=training)
        out = self.layer3(out, training=training)
        out = self.avg_pool(out)
        out = self.flatten(out)
        out = self.fc(out)
        return out

In [0]:
model = ResNet(ResidualBlock, [2, 2, 2])

In [0]:
def loss_fn(y, y_pre):
    return tf.losses.softmax_cross_entropy(y, y_pre)

def accuracy(y, y_pre):
    return tfk.metrics.categorical_accuracy(y, y_pre)

optimizer = tf.train.AdamOptimizer(learning_rate)

In [34]:
for j in range(num_epochs):
    
    running_loss = 0
    running_acc = 0

    for i, (x_, y_) in enumerate(train_dataset):
        
        with tf.device("/gpu:0"):
            with tf.GradientTape() as tape:
                y_pre = model(x_, training=True)
                loss = loss_fn(y_, y_pre)
            acc = accuracy(y_, y_pre)
            grads = tape.gradient(loss, model.variables)
            optimizer.apply_gradients(zip(grads, model.variables))
            running_loss += loss
            running_acc += tf.reduce_mean(acc)
    
    print("-----epoch {} -----".format(j + 1))
    print("loss: ", running_loss.numpy()/(i + 1))
    print("acc: ", running_acc.numpy()/(i + 1))    

-----epoch 1 -----
loss:  1.4503074178890305
acc:  0.4708187336824378
-----epoch 2 -----
loss:  1.030255531778141
acc:  0.6327248008883729
-----epoch 3 -----
loss:  0.8536564573949698
acc:  0.6990194515306123
-----epoch 4 -----
loss:  0.7369005631427376
acc:  0.741896493094308
-----epoch 5 -----
loss:  0.6444133836395887
acc:  0.7760403691505899
-----epoch 6 -----
loss:  0.5748496152916733
acc:  0.7999720670738999
-----epoch 7 -----
loss:  0.5158630682497608
acc:  0.8208665653150908
-----epoch 8 -----
loss:  0.46397267555703925
acc:  0.8399194989885602
-----epoch 9 -----
loss:  0.405785541145169
acc:  0.8601921237244898
-----epoch 10 -----
loss:  0.36217514349489793
acc:  0.8758011642767458


In [42]:
test_accuracy = 0
for i, (x_, y_) in enumerate(test_dataset):
    y_pre = model(x_, training=False)
    test_accuracy += tf.reduce_mean(accuracy(y_, y_pre))
test_accuracy /= i + 1

print("test accuracy {:0.3f}".format(test_accuracy.numpy()))

test accuracy 0.723
