In [1]:
from tensorflow.keras.datasets import fashion_mnist

In [2]:
import tensorflow as tf

In [3]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1,3'

In [4]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

In [5]:
x_train = x_train[..., tf.newaxis]
x_test = x_test[..., tf.newaxis]

In [6]:
y_train = tf.keras.utils.to_categorical(y_train)
y_test = tf.keras.utils.to_categorical(y_test)

In [7]:
print(x_train.shape)
print(y_train.shape)

(60000, 28, 28, 1)
(60000, 10)


In [8]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  2


In [9]:
from tensorflow.keras.layers import Flatten, Dense, Input
from tensorflow.keras.models import Model

In [27]:
class Preprocess_Block(tf.keras.Model):
    def __init__(self):
        super(Preprocess_Block, self).__init__()
        self.conv_1 = tf.keras.layers.Conv2D(64, kernel_size=3, padding='same', strides=1, data_format='channels_last')
        self.conv_2 = tf.keras.layers.Conv2D(64, kernel_size=3, padding='same', strides=1, data_format='channels_last')
        self.conv_3 = tf.keras.layers.Conv2D(64, kernel_size=3, padding='same', strides=1, data_format='channels_last')
        self.max_pooling = tf.keras.layers.MaxPooling2D(pool_size=2, strides=1, padding='same')
    def call(self, x, training=None):
        x = self.conv_1(x)
        x = self.conv_2(x)
        x = self.conv_3(x)
        y = self.max_pooling(x)
        return y

In [43]:
class Bottleneck_Block(tf.keras.Model):
    def __init__(self, filters, stride=1, is_upsampling=False):
        super(Bottleneck_Block, self).__init__()
        filter_num_1,filter_num_2,filter_num_3 = filters
        self.is_upsampling = is_upsampling
        if self.is_upsampling:
            self.up_sampling = tf.keras.layers.Conv2D(filter_num_3, kernel_size=1, strides=stride, data_format='channels_last')
            
        self.conv_1 = tf.keras.layers.Conv2D(filter_num_1, kernel_size=1, strides=1, data_format='channels_last')
        self.bn_1 = tf.keras.layers.BatchNormalization()
        
        self.conv_2 = tf.keras.layers.Conv2D(filter_num_2,kernel_size=3, padding='same',strides=stride, data_format='channels_last')
        self.bn_2 = tf.keras.layers.BatchNormalization()
        
        self.conv_3 = tf.keras.layers.Conv2D(filter_num_3, kernel_size=1,padding= 'same', strides=1, data_format='channels_last')
        self.bn_3 = tf.keras.layers.BatchNormalization()
        
        self.relu = tf.keras.layers.Activation('relu')
        
    def call(self, x, training=None):
        
        if self.is_upsampling:
            origin = self.up_sampling(x)
        else:
            origin = x
        
        x = self.conv_1(x)
        x = self.bn_1(x)
        x = self.relu(x)
        
        x = self.conv_2(x)
        x = self.bn_2(x)
        x = self.relu(x)
        
        x = self.conv_3(x)
        x = self.bn_3(x)
        x += origin
        
        outputs = tf.nn.relu(x)
        return outputs
                                        

In [44]:
class Res_Block(tf.keras.Model):
    def __init__(self,filter_num, stride=1, block_number=1):
        super(Res_Block, self).__init__()
        self.block = tf.keras.models.Sequential()
        self.block.add(Bottleneck_Block(filters=filter_num, stride=stride, is_upsampling=True))
        for i in range(block_number-1):
            self.block.add(Bottleneck_Block(filters=filter_num, stride=1, is_upsampling=False))
    def call(self, x, training=None):
        output = self.block(x)
        return output

In [45]:
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

In [46]:
class ResNet(tf.keras.Model):
    def __init__(self, num_classes):
        super(ResNet, self).__init__()
        self.preprocess_block = Preprocess_Block()
        self.block_1 = Res_Block(filter_num=(64, 64, 256), stride=1, block_number=3)
        self.block_2 = Res_Block(filter_num=(128, 128, 512), stride=1, block_number=4)
        self.block_3 = Res_Block(filter_num=(256, 256, 1024), stride=1, block_number=6)
        self.block_4 =  Res_Block(filter_num=(512, 512, 2048), stride=1, block_number=6)
        self.avg_pooling_layer = tf.keras.layers.GlobalAveragePooling2D()
        self.fc_2048 = tf.keras.layers.Dense(2048, activation='relu')
        self.fc = tf.keras.layers.Dense(num_classes, activation='softmax')
        
    def call(self, inputs, training=None):
        x = self.preprocess_block(inputs)
        x = self.block_1(x)
        x = self.block_2(x)
        x = self.block_3(x)
        x = self.block_4(x)
        x = self.avg_pooling_layer(x)
        x = self.fc_2048(x)
        y = self.fc(x)
        return y

In [47]:
def preprocess(x, y):
    x = tf.cast(x, dtype=tf.float32)/255
    y = tf.cast(y, dtype=tf.uint32)
    return x, y

In [49]:
train_db = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).map(preprocess).batch(10)

In [50]:
test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test)).map(preprocess).batch(10)

In [51]:
model = ResNet(10)

In [52]:
loss_function = tf.keras.losses.CategoricalCrossentropy()

In [53]:
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

In [54]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')

In [55]:
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy')

In [56]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [25]:
@tf.function
def train_per_step(images, labels):
    with tf.GradientTape() as tape:
        predictions = model(images)
        loss = loss_function(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    train_loss(loss)
    train_accuracy(labels, predictions)

In [26]:
@tf.function
def test_per_step(images, labels):
    predictions = model(images)
    t_loss = loss_function(labels, predictions)

    test_loss(t_loss)
    test_accuracy(labels, predictions)

In [27]:
import numpy as np

In [28]:
with tf.device('/GPU:0'):
    EPOCHS = 1
    for epoch in range(EPOCHS):
      # 在下一个epoch开始时，重置评估指标
        train_loss.reset_states()
        train_accuracy.reset_states()
        test_loss.reset_states()
        test_accuracy.reset_states()
        step = np.ceil(x_train.shape[0] / 32)
        while(step>0):
            data = next(iter(train_db))
            train_per_step(data[0], data[1])
            step -= 1
        test_step = np.ceil(x_test.shape[0] / 32)
        while(test_step > 0):
            test_data = next(iter(test_db))
            test_per_step(test_data[0], test_data[1])
            test_step -= 1
        template = 'Epoch {}, Train Loss: {},Train Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
        print (template.format(epoch+1, train_loss.result(),train_accuracy.result()*100, test_loss.result(), test_accuracy.result()*100))

Epoch 1, Train Loss: 0.9463597536087036,Train Accuracy: 63.451663970947266, Test Loss: 0.901885986328125, Test Accuracy: 68.75


In [41]:
tb = tf.keras.callbacks.TensorBoard(log_dir='./log', histogram_freq=1)

In [None]:
model.fit_generator(train_db, callbacks=[tb], epochs=1, verbose=1)

