In [1]:
from keras.datasets import cifar100
import os
import sys
from six.moves import urllib
import gzip
import pickle
import numpy as np
import tensorflow as tf
%matplotlib inline
import matplotlib.pyplot as plt



Using TensorFlow backend.


In [2]:
# size of MNIST
# print(x_train.shape)
# print(y_train.shape)
# print(x_test.shape)
# print(y_test.shape)

In [3]:
# # show data
# _, (ax1) = plt.subplots(1)
# sample_data = x_train[0]
# ax1.imshow(sample_data, cmap=plt.cm.Greys)

In [4]:
from typing import List, Tuple
from tensorflow.contrib.layers import xavier_initializer
from tensorflow.contrib.layers import l1_regularizer, l2_regularizer

class ResNet(object):
    regularizers = {
        'l1' : l1_regularizer,
        'l2' : l2_regularizer
    }
    eps = 1e-12
    
    def __init__(self, input: tf.Tensor = None, output : tf.Tensor = None, batch : int = 256, n_label: int = 100):
        '''
        :param input: Input Tensor. Use tf.placeholder. If not provided input layer for CIFAR-10 is used
        :param output: Output Tensor. Use tf.placeholder. If not provided output layer for CIFAR-10 is used
        :param batch: Batch Size
        :param n_label: The number of labels for classification
        '''
        
        self.batch = batch
        self.n_label = n_label
        self.x_ts = tf.placeholder(tf.float32, shape=[None, 32,32,3]) if input is None else input
        self.y_ts = tf.placeholder(tf.int32) if output is None else output
        
        self.sess = None
        self._names = dict()
        self.layers = list()
        self.layers.append(self.x_ts)
        
        self.saver = None
        
    def create_variable(self,name:str,shape:tuple,dtype=tf.float32,initializer=xavier_initializer(), regularizer:str=None):
        if regularizer is not None:
            regularizer = regularizer.lower()
            regularizer = self.regularizers[regularizer]()
            
        v = tf.get_variable(self._naming(name), shape = shape, dtype= dtype, initializer=initializer,regularizer=regularizer)
        return v
    
    def conv(self, input_layer, filter: List[int], channel: List[int], stride: int, padding: str = 'SAME') -> Tuple[tf.Tensor, tf.Tensor]:
        """
        :param input_layer: Previous layer or tensor
        :param filter: [filter_height, filter_width]
        :param channel: [in_channels, out_channels]
        :param stride:
        :param padding:
        :return: [conv_layer, filter]
        """

        filter_ts = self.create_variable('filter', shape=(*filter, *channel))
        conv = tf.nn.conv2d(input_layer, filter=filter_ts, strides=[1, stride, stride, 1], padding=padding)
        return conv, filter_ts
    
    def batch_norm(self, input_layer, dimension):
        mean,variance = tf.nn.moments(input_layer, [0,1,2], keep_dims=False)
        beta = self.create_variable('batch_beta', dimension, dtype=tf.float32,
                                   initializer=tf.constant_initializer(0.0,tf.float32))
        gamma = self.create_variable('batch_gamma', dimension,  dtype =tf.float32,
                                    initializer=tf.constant_initializer(1.0,tf.float32))
        bn_layer = tf.nn.batch_normalization(input_layer, mean, variance, beta, gamma, self.eps)
        return bn_layer
    
    def conv_bn(self, input_layer, filter : List[int], channel: List[int], stride : int):
        '''
        ResNet에서는 Convolution 다음에는 항상 Batch Normalization을 넣는다.
           "We adopt batch normalization (BN) right after each convolution and before activation"
        filter: [filter_height, filter_width]
        channel: [in_channels, out_channels]
        '''
        out_channel = channel[1]
        h, _filter = self.conv(input_layer, filter = filter, channel=channel,stride = stride, padding='SAME')
        h = self.batch_norm(h, out_channel)
        return h
    
    def init_block(self, filter:List[int]=(7,7), channel: List[int]= (3,16), stride : int =1,max_pool: bool=True) -> tf.Tensor:
        """
        input -> Conv -> ReLU -> output
        :param filter: [filter_height, filter_width]
        :param channel: [in_channels, out_channels]
        :param stride:
        """
        init_conv,_filter = self.conv(self.x_ts, filter=filter, channel=channel, stride = stride)
        init_conv = tf.nn.relu(init_conv)
        if max_pool:
            # MaxPooling
            # ksize: The size of the window for each dimension of the input tensor
            # strides: The stride of the sliding window for each dimension of the input tensor
            output = tf.nn.max_pool(init_conv, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
        else:
            output = init_conv
            
        self.layers.append(output)
        return output
    
    def max_pool(self, input_layer, kernel: List[int], stride: List[int], padding: str = 'SAME') -> tf.Tensor:
        """
        :param input_layer:
        :param kernel: [width, height] Kernel Size
        :param stride: [width, height] Stirde Size
        :param padding:
        :return:
        """
        k_height, k_width = kernel
        stride_width, stride_height = stride
        output = tf.nn.max_pool(input_layer,
                               ksize=[1,k_height,k_width,1],
                               strides=[1, stride_width, stride_height, 1], padding = padding)
        self.layers.append(output)
        return output
    
    def avg_pool(self, input_layer, kernel: List[int], stride: List[int], padding: str = 'SAME') -> tf.Tensor:
        """
        :param input_layer:
        :param kernel: [width, height] Kernel Size
        :param stride: [width, height] Stirde Size
        :param padding:
        :return:
        """
        k_height, k_width = kernel
        stride_width, stride_height = stride
        output = tf.nn.avg_pool(input_layer,
                                ksize=[1, k_height, k_width, 1],
                                strides=[1, stride_width, stride_height, 1], padding=padding)
        self.layers.append(output)
        return output
    
    def residual_block(self, input_layer, filter: List[int], channel: List[int], stride:int = 1) -> tf.Tensor:
        """
        input -> Conv -> BN -> ReLU -> Conv -> BN -> Addition -> ReLU -> output
        :param input_layer: Usually previous layer
        :param filter: (width<int>, height<int>) The size of the filter
        :param channel: [in_channels, out_channels]
        :param stride: The size of the s
        :return:
        """
        input_channel, output_channel = channel
        
        h = self.conv_bn(input_layer, filter = filter, channel=[input_channel, output_channel], stride = stride)
        h = tf.nn.relu(h)
        h = self.conv_bn(h, filter=filter, channel= [output_channel, output_channel], stride = stride)
        
        if input_channel != output_channel:
            # Input channel 과 output channel이 dimension이 다르기 때문에 projection 을 통해서 dimension을 맞춰준다.
            inp, _filter = self.conv(input_layer, filter=[1,1], channel=[input_channel,output_channel], stride = stride)
        else:
            inp = input_layer
        
        h = tf.add(h, inp)
        h = tf.nn.relu(h)
        self.layers.append(h)
        return h
    
    def fc(self,input_layer):
        global_pool = tf.reduce_mean(input_layer, axis = [1,2])
        fc_w = self.create_variable(name='fc_w',shape =[global_pool.shape[-1], self.n_label])
        fc_b = self.create_variable(name='fc_b', shape =[self.n_label])
        
        output = tf.matmul(global_pool, fc_w) + fc_b
        self.layers.append(output)
        return output
    
    def loss(self):
        loss_f = tf.nn.sparse_softmax_cross_entropy_with_logits
        cross_entropy = loss_f(logits=self.last_layer, labels=self.y_ts, name='cross_entropy')
        cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy_mean')
        return cross_entropy_mean
    
    def compile(self, target=None) -> tf.Session:
        sess = tf.Session()
        sess.run(tf.global_variables_initializer())
        self.sess = sess
        return sess
    
    def save(self, path='/resnet_.ckpt'):
        if self.saver is None:
            self.saver = tf.train.Saver()
        self.saver.save(self.sess, path)
        
    def restore(self, path='/resnet_.ckpt'):
        if self.saver is None:
            self.saver = tf.train.Saver()
        print(f'Restoring "{path}" model')
        self.saver.restore(self.sess, path)
        
    @property
    def last_layer(self) -> tf.Tensor:
        return self.layers[-1]

    def _naming(self, name=None):
        if name is None or not name:
            name = 'variable'
        name = name.lower()
        self._names.setdefault(name, 0)
        self._names[name] += 1
        count = self._names[name]
        return f'{name}_{count:02}'

In [5]:
def model(resnet: ResNet):
    with tf.variable_scope('input_scope'):
        h = resnet.init_block(filter=[7, 7], channel=[3, 64], max_pool=False)
        
    with tf.variable_scope('residual01'),tf.device('/device:GPU:0'):
        h = resnet.residual_block(h, filter=[3, 3], channel=[64, 64])
        h = resnet.residual_block(h, filter=[3, 3], channel=[64, 64])
        h = resnet.residual_block(h, filter=[3, 3], channel=[64, 64])
        h = resnet.residual_block(h, filter=[3, 3], channel=[64, 64])
        h = resnet.residual_block(h, filter=[3, 3], channel=[64, 64])
        h = resnet.residual_block(h, filter=[3, 3], channel=[64, 64])
        
    with tf.variable_scope('residual02'),tf.device('/device:GPU:0'):
        h = resnet.max_pool(h, kernel=[2, 2], stride=[2, 2])
        h = resnet.residual_block(h, filter=[3, 3], channel=[64, 128])
        h = resnet.residual_block(h, filter=[3, 3], channel=[128, 128])
        h = resnet.residual_block(h, filter=[3, 3], channel=[128, 128])
        h = resnet.residual_block(h, filter=[3, 3], channel=[128, 128])
        h = resnet.residual_block(h, filter=[3, 3], channel=[128, 128])
        h = resnet.residual_block(h, filter=[3, 3], channel=[128, 128])
        h = resnet.residual_block(h, filter=[3, 3], channel=[128, 128])
        h = resnet.residual_block(h, filter=[3, 3], channel=[128, 128])
        
    with tf.variable_scope('residual03'),tf.device('/device:GPU:0'):
        h = resnet.max_pool(h, kernel=[2, 2], stride=[2, 2])
        h = resnet.residual_block(h, filter=[3, 3], channel=[128, 256])
        h = resnet.residual_block(h, filter=[3, 3], channel=[256, 256])
        h = resnet.residual_block(h, filter=[3, 3], channel=[256, 256])
        h = resnet.residual_block(h, filter=[3, 3], channel=[256, 256])
        h = resnet.residual_block(h, filter=[3, 3], channel=[256, 256])
        h = resnet.residual_block(h, filter=[3, 3], channel=[256, 256])
        h = resnet.residual_block(h, filter=[3, 3], channel=[256, 256])
        h = resnet.residual_block(h, filter=[3, 3], channel=[256, 256])
        h = resnet.residual_block(h, filter=[3, 3], channel=[256, 256])
        h = resnet.residual_block(h, filter=[3, 3], channel=[256, 256])
        h = resnet.residual_block(h, filter=[3, 3], channel=[256, 256])
        h = resnet.residual_block(h, filter=[3, 3], channel=[256, 256])
    
    with tf.variable_scope('residual04'),tf.device('/device:GPU:0'):
        h = resnet.max_pool(h, kernel=[2, 2], stride=[2, 2])
        h = resnet.residual_block(h, filter=[3, 3], channel=[256, 512])
        h = resnet.residual_block(h, filter=[3, 3], channel=[512, 512])
        h = resnet.residual_block(h, filter=[3, 3], channel=[512, 512])
        h = resnet.residual_block(h, filter=[3, 3], channel=[512, 512])
        h = resnet.residual_block(h, filter=[3, 3], channel=[512, 512])
        h = resnet.residual_block(h, filter=[3, 3], channel=[512, 512])
    
    with tf.variable_scope('fc'),tf.device('/device:GPU:0'):
        h = resnet.avg_pool(h, kernel=[2, 2], stride=[2, 2])
        h = resnet.fc(h)
    return h

In [6]:
epoch_ = 10
batch = 100
save_interval = 5000
visualize_interval = 20

In [7]:
from keras.preprocessing.image import ImageDataGenerator
from queue import deque

def train(resnet, interval = visualize_interval):
    with tf.variable_scope('fc'):
        loss = resnet.loss()
        adam = tf.train.AdamOptimizer()
        train_op = adam.minimize(loss)
        resnet.compile()

    # Get Data
    (train_x, train_y), (test_x, test_y) = tf.keras.datasets.cifar100.load_data(label_mode='fine')

    # Image Augmentation
    datagen = ImageDataGenerator(horizontal_flip=True, vertical_flip=True, featurewise_center=True,
                                 featurewise_std_normalization=True)
    datagen.fit(train_x)

    iter_count = 0
    _losses = deque(maxlen=interval)
    for epoch in range(1, epoch_ + 1):
        sample_count = 0

        for i, (sample_x, sample_y) in enumerate(datagen.flow(train_x, train_y, batch_size=batch)):
            print(sample_x.shape, sample_y.shape)
            feed_dict = {resnet.x_ts: sample_x, resnet.y_ts: sample_y}
            _loss, _ = resnet.sess.run([loss, train_op], feed_dict=feed_dict)

            # Visualize
            _losses.append(_loss)
            iter_count += 1
            sample_count += 1
            if i % interval == 0:
                _loss = np.mean(_losses)
                print(f'[epoch:{epoch:02}] loss:{_loss:<7.4}  '
                      f'sample_count:{sample_count:<5}  '
                      f'iteration:{iter_count:<5}  ')

            # Save
            if iter_count % save_interval == 0:
                print(f'Model has been successfully saved at iteration = {iter_count}')
                resnet.save()


def evaluate(resnet, batch_size= batch):
    correct_prediction = tf.equal(tf.argmax(resnet.last_layer, 1), y=resnet.y_ts)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    resnet.compile()

    # Get Data
    (train_x, train_y), (test_x, test_y) = tf.keras.datasets.cifar100.load_data(label_mode='fine')

    accuracies = list()
    for i in range(0, 10000, batch_size):
        if i + batch_size < 10000:
            _acc = resnet.sess.run(accuracy, feed_dict={
                resnet.x_ts: test_x[i:i + batch_size],
                resnet.y_ts: test_y[i:i + batch_size]})
            accuracies.append(_acc)

    print('Accuracy', np.mean(accuracies))


def main():
    resnet = ResNet(batch= batch)
    model(resnet)

    print('Start Training')
    train(resnet)
#     if mode == 'train':
#         print('Start Training')
#         train(resnet)
#     elif mode == 'test':
#         print('Start Evaluation')
#         evaluate(resnet)


if __name__ == '__main__':
    main()

Start Training
(100, 32, 32, 3) (100, 1)


InvalidArgumentError: labels must be 1-D, but got shape [100,1]
	 [[node fc_1/cross_entropy/cross_entropy (defined at <ipython-input-4-e21163978f52>:164)  = SparseSoftmaxCrossEntropyWithLogits[T=DT_FLOAT, Tlabels=DT_INT32, _device="/job:localhost/replica:0/task:0/device:GPU:0"](fc/add, _arg_Placeholder_1_0_1/_783)]]
	 [[{{node fc_1/cross_entropy_mean/_789}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_14244_fc_1/cross_entropy_mean", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

Caused by op 'fc_1/cross_entropy/cross_entropy', defined at:
  File "/home/pirl/.conda/envs/tf/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/pirl/.conda/envs/tf/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/home/pirl/.conda/envs/tf/lib/python3.6/asyncio/base_events.py", line 438, in run_forever
    self._run_once()
  File "/home/pirl/.conda/envs/tf/lib/python3.6/asyncio/base_events.py", line 1451, in _run_once
    handle._run()
  File "/home/pirl/.conda/envs/tf/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/tornado/ioloop.py", line 758, in _run_callback
    ret = callback()
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/tornado/gen.py", line 1233, in inner
    self.run()
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 370, in dispatch_queue
    yield self.process_one()
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/tornado/gen.py", line 346, in wrapper
    runner = Runner(result, future, yielded)
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/tornado/gen.py", line 1080, in __init__
    self.run()
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2843, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2869, in _run_cell
    return runner(coro)
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3044, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3209, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3291, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-fb7d8e7566e4>", line 79, in <module>
    main()
  File "<ipython-input-7-fb7d8e7566e4>", line 69, in main
    train(resnet)
  File "<ipython-input-7-fb7d8e7566e4>", line 6, in train
    loss = resnet.loss()
  File "<ipython-input-4-e21163978f52>", line 164, in loss
    cross_entropy = loss_f(logits=self.last_layer, labels=self.y_ts, name='cross_entropy')
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py", line 2049, in sparse_softmax_cross_entropy_with_logits
    precise_logits, labels, name=name)
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/ops/gen_nn_ops.py", line 7520, in sparse_softmax_cross_entropy_with_logits
    labels=labels, name=name)
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3274, in create_op
    op_def=op_def)
  File "/home/pirl/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1770, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): labels must be 1-D, but got shape [100,1]
	 [[node fc_1/cross_entropy/cross_entropy (defined at <ipython-input-4-e21163978f52>:164)  = SparseSoftmaxCrossEntropyWithLogits[T=DT_FLOAT, Tlabels=DT_INT32, _device="/job:localhost/replica:0/task:0/device:GPU:0"](fc/add, _arg_Placeholder_1_0_1/_783)]]
	 [[{{node fc_1/cross_entropy_mean/_789}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_14244_fc_1/cross_entropy_mean", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]


In [None]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()
tf.device('/device:GPU:0')