In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf #tensorflow
import numpy as np #numpy > save loss .. 
from collections import OrderedDict #layer ..
import os, random #dir, random..
import pickle #save & load
import math #xavier..
%matplotlib inline
from matplotlib import pyplot as plt
import datetime

print(tf.__version__)

In [None]:
#path
day = str(datetime.date.today().day)
#DATA_PATH = './cifar10/cifar-10-batches-py/' #python pickle
DATA_PATH = './cifar10/cifar-10-batches-bin/' #binary
CHECKPOINT = 'simple_res_net_train_'+day+'.ckpt'
CHECKPOINT_PATH = './checkpoint/res_'+day+'/'
BOARD_PATH = './tensorboard/board_res_'+day+'/'
#TEST_FILE_PATH='./cifar10/cifar-10-batches-py/test_batch' #python pickle
print(DATA_PATH)
print(CHECKPOINT)
print(CHECKPOINT_PATH)
print(BOARD_PATH)

In [None]:
#parameters

INPUT_SIDE = 32
INPUT_SIZE = INPUT_SIDE * INPUT_SIDE
N_CHANNEL = 3
BATCH_SIZE = 64
EPOCHS = 500
LR = 0.1
LR_DECAY_RATE = 0.5

LABEL_BYTES = 1 
NUM_EXAMPLES_PER_EPOCH = 50000 #NUMBER OF TRAIN DATA SET
NUM_BATCHES_PER_EPOCH = NUM_EXAMPLES_PER_EPOCH / BATCH_SIZE
NUM_TESTSET_PER_EPOCH = 10000
NUM_TEST_BATCHES_PER_EPOCH = NUM_TESTSET_PER_EPOCH / BATCH_SIZE

LIST_CLASS=['airplane', 'automobile', 'birds', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
N_CLASSES = len(LIST_CLASS)

training = tf.placeholder(tf.bool)

#loss & accuracy save
TRAIN_OUT_FILE_NAME = 'RES_NET_CIFAR10_'+day+'.log'
train_loss_out = open('TRAIN_LOSS'+TRAIN_OUT_FILE_NAME, 'w')
train_accr_out = open('TRAIN_ACCURACY'+TRAIN_OUT_FILE_NAME, 'w')

TEST_OUT_FILE_NAME = 'RES_NET_CIFAR10_'+day+'.log'
test_accr_out = open('TEST_ACCURACY'+TEST_OUT_FILE_NAME, 'w')

print("input image size : {}".format(INPUT_SIZE))
print("image channel : {}".format(N_CHANNEL))
print("batch size : {}".format(BATCH_SIZE))
print("num of class : {}".format(N_CLASSES))
print("training epochs : {}".format(EPOCHS))
print("learning rate : {}".format(LR))
print("learning decay rate : {}".format(LR_DECAY_RATE))

In [None]:
def batch_norm_for_dense_layer(x, n_out, phase_train):
    """
    batch normalization for dense layer 
    """
    with tf.variable_scope('bn'):
        beta = tf.Variable(tf.constant(0.0, shape=[n_out]),
                                     name='beta', trainable=True)
        gamma = tf.Variable(tf.constant(1.0, shape=[n_out]),
                                      name='gamma', trainable=True)
        batch_mean, batch_var = tf.nn.moments(x, [0], name='moments')
        ema = tf.train.ExponentialMovingAverage(decay=0.5)

        def mean_var_with_update():
            ema_apply_op = ema.apply([batch_mean, batch_var])
            with tf.control_dependencies([ema_apply_op]):
                return tf.identity(batch_mean), tf.identity(batch_var)

        mean, var = tf.cond(phase_train,
                            mean_var_with_update,
                            lambda: (ema.average(batch_mean), ema.average(batch_var)))
        normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, 1e-3)
    return normed
    
def batch_norm(x, n_out, phase_train):
    """
    Batch normalization on convolutional maps.
    Ref.: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow
    Args:
        x:           Tensor, 4D BHWD input maps
        n_out:       integer, depth of input maps
        phase_train: boolean tf.Varialbe, true indicates training phase
        scope:       string, variable scope
    Return:
        normed:      batch-normalized maps
    """
    with tf.variable_scope('bn'):
        beta = tf.Variable(tf.constant(0.0, shape=[n_out]),
                                     name='beta', trainable=True)
        gamma = tf.Variable(tf.constant(1.0, shape=[n_out]),
                                      name='gamma', trainable=True)
        batch_mean, batch_var = tf.nn.moments(x, [0,1,2], name='moments')
        ema = tf.train.ExponentialMovingAverage(decay=0.5)

        def mean_var_with_update():
            ema_apply_op = ema.apply([batch_mean, batch_var])
            with tf.control_dependencies([ema_apply_op]):
                return tf.identity(batch_mean), tf.identity(batch_var)

        mean, var = tf.cond(phase_train,
                            mean_var_with_update,
                            lambda: (ema.average(batch_mean), ema.average(batch_var)))
        normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, 1e-3)
    return normed

#xavier initialization
def xavier_init(n_inputs, n_outputs, uniform=True):
    """Set the parameter initialization using the method described.
    This method is designed to keep the scale of the gradients roughly the same
    in all layers.
    Xavier Glorot and Yoshua Bengio (2010):
           Understanding the difficulty of training deep feedforward neural
           networks. International conference on artificial intelligence and
           statistics.
    Args:
    n_inputs: The number of input nodes into each output.
    n_outputs: The number of output nodes for each input.
    uniform: If true use a uniform distribution, otherwise use a normal.
    Returns:
    An initializer.
    """
    if uniform:
        # 6 was used in the paper.
        init_range = math.sqrt(6.0 / (n_inputs + n_outputs))
        return tf.random_uniform_initializer(-init_range, init_range)
    else:
        # 3 gives us approximately the same limits as above since this repicks
        # values greater than 2 standard deviations from the mean.
        stddev = math.sqrt(3.0 / (n_inputs + n_outputs))
        return tf.truncated_normal_initializer(stddev=stddev)

#CIFAR 데이터 읽어오기 
def read_dataset(filename_queue):
    class DataRecord(object):
        pass
    
    result = DataRecord()
    
    label_bytes = LABEL_BYTES
    result.height = INPUT_SIDE
    result.width = INPUT_SIDE
    result.depth = N_CHANNEL
    image_bytes = result.height * result.width * result.depth
    record_bytes = label_bytes + image_bytes
    
    reader = tf.FixedLengthRecordReader(record_bytes = record_bytes)
    result.key, value = reader.read(filename_queue)
    print (value)
    record_bytes = tf.decode_raw(value, tf.uint8)
    
    result.label = tf.cast(tf.strided_slice(record_bytes, [0], [label_bytes]), tf.int32) #input, begin, end로 잘라냄 > 0~1
    
    depth_major = tf.reshape(tf.strided_slice(record_bytes, [label_bytes], # 1부터 image_bytes까지 image를 잘라낸 후 
                                              [label_bytes + image_bytes]), #channel x height x width로 변환함.
                             [result.depth, result.height, result.width])
    
    result.uint8image = tf.transpose(depth_major, [1,2,0]) #channel x height x width >>> height x width x chennl로 변경
    
    return result

#label, image 세팅해줌 
def _generate_image_and_label_batch(image, label, min_queue_examples,
                                    batch_size, shuffle):
    """Construct a queued batch of images and labels.
    Args:
      image: 3-D Tensor of [height, width, 3] of type.float32.
      label: 1-D Tensor of type.int32
      min_queue_examples: int32, minimum number of samples to retain
        in the queue that provides of batches of examples.
      batch_size: Number of images per batch.
      shuffle: boolean indicating whether to use a shuffling queue.
    Returns:
      images: Images. 4D tensor of [batch_size, height, width, 3] size.
      labels: Labels. 1D tensor of [batch_size] size.
    """
    # Create a queue that shuffles the examples, and then
    # read 'batch_size' images + labels from the example queue.
    num_preprocess_threads = 16 #프로세스 스레드 
    if shuffle: #셔플 하면 
        images, label_batch = tf.train.shuffle_batch([image, label],
            batch_size=batch_size,
            num_threads=num_preprocess_threads,
            capacity=min_queue_examples + 3 * batch_size,
            min_after_dequeue=min_queue_examples) #이거만 해주면 batch와 동일함. 
    #Minimum number elements in the queue after a dequeue, used to ensure a level of mixing of elements.
    else:
        images, label_batch = tf.train.batch([image, label],
            batch_size=batch_size,
            num_threads=num_preprocess_threads,
            capacity=min_queue_examples + 3 * batch_size)

    # Display the training images in the visualizer.
    tf.summary.image('images', images) #걍 보드용인듯

    return images, tf.reshape(label_batch, [batch_size])


def distorted_inputs(data_dir, batch_size): #좀 드럽게 인풋받아오기 
    """Construct distorted input for CIFAR training using the Reader ops.
    Args:
      data_dir: Path to the CIFAR-10 data directory.
      batch_size: Number of images per batch.
    Returns:
      images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
      labels: Labels. 1D tensor of [batch_size] size.
    """
    filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i) for i in range(1, 6)] 
    for f in filenames:
        if not tf.gfile.Exists(f):
            raise ValueError('Failed to find file: ' + f)

    # Create a queue that produces the filenames to read.
    filename_queue = tf.train.string_input_producer(filenames)

    # Read examples from files in the filename queue.
    read_input = read_dataset(filename_queue)
    reshaped_image = tf.cast(read_input.uint8image, tf.float32)

    height = INPUT_SIDE
    width = INPUT_SIDE

    # Image processing for training the network.  Note the many random
    # distortions applied to the image.

    # Randomly crop a [height, width] section of the image.
    distorted_image = tf.random_crop(reshaped_image, [height, width, 3]) # 랜덤으로 이미지를 크롭 h x w x c

    # Randomly flip the image horizontally.
    distorted_image = tf.image.random_flip_left_right(distorted_image) # 또 랜덤으로 좌우 플립 random_flip_up_down도 있긴 함
    #이런 느낌..? 으로 맥스텔타가지고 유니폼으로 어쩌구저쩌구
    #delta = random_ops.random_uniform([], -max_delta, max_delta, seed=seed)
    #return adjust_brightness(image, delta)

    # Because these operations are not commutative, consider randomizing
    # the order their operation.
    distorted_image = tf.image.random_brightness(distorted_image,    #랜덤으로 밝기 조절 
                                               max_delta=63)
    distorted_image = tf.image.random_contrast(distorted_image,      #콘트라스트 조절 랜덤. 기존 픽셀값의 최소 0.2 최대 1.8
                                             lower=0.2, upper=1.8)

    # Subtract off the mean and divide by the variance of the pixels.
    float_image = tf.image.per_image_standardization(distorted_image) #표준화시킴. mean으로빼고 var로 나눔

    # Set the shapes of tensors.
    float_image.set_shape([height, width, 3])
    read_input.label.set_shape([1])

    # Ensure that the random shuffling has good mixing properties.
    min_fraction_of_examples_in_queue = 0.4
    min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH * min_fraction_of_examples_in_queue)
    print('Filling queue with %d CIFAR images before starting to train. '
         'This will take a few minutes.' % min_queue_examples)

    # Generate a batch of images and labels by building up a queue of examples.
    return _generate_image_and_label_batch(float_image, read_input.label,
                                         min_queue_examples, batch_size,
                                         shuffle=True)


def inputs(eval_data, data_dir, batch_size):
    """Construct input for CIFAR evaluation using the Reader ops.
    Args:
    eval_data: bool, indicating if one should use the train or eval data set.
    data_dir: Path to the CIFAR-10 data directory.
    batch_size: Number of images per batch.
    Returns:
    images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
    labels: Labels. 1D tensor of [batch_size] size.
    """
    if not eval_data:
        filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i)
                     for i in xrange(1, 6)]
        num_examples_per_epoch = NUM_EXAMPLES_PER_EPOCH
    else:
        filenames = [os.path.join(data_dir, 'test_batch.bin')]
        num_examples_per_epoch = NUM_TESTSET_PER_EPOCH

    for f in filenames:
        if not tf.gfile.Exists(f):
             raise ValueError('Failed to find file: ' + f)

    # Create a queue that produces the filenames to read.
    filename_queue = tf.train.string_input_producer(filenames)

    # Read examples from files in the filename queue.
    read_input = read_dataset(filename_queue)
    reshaped_image = tf.cast(read_input.uint8image, tf.float32)

    height = INPUT_SIDE
    width = INPUT_SIDE

    # Image processing for evaluation.
    # Crop the central [height, width] of the image.
    resized_image = tf.image.resize_image_with_crop_or_pad(reshaped_image,
                                                         height, width)

    # Subtract off the mean and divide by the variance of the pixels.
    float_image = tf.image.per_image_standardization(resized_image)

    # Set the shapes of tensors.
    float_image.set_shape([height, width, 3])
    read_input.label.set_shape([1])

    # Ensure that the random shuffling has good mixing properties.
    min_fraction_of_examples_in_queue = 0.4
    min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH * min_fraction_of_examples_in_queue)

    # Generate a batch of images and labels by building up a queue of examples.
    return _generate_image_and_label_batch(float_image, read_input.label,
                                         min_queue_examples, batch_size,
                                         shuffle=False)


print("FUNCTIONS READY")

In [None]:
#NETWORK PARAMETERS

stddev = 0.1

weights = {
    #'conv' : tf.Variable(tf.random_normal([3, 3, N_CHANNEL, 16], stddev=stddev), name='conv'),
    'conv' : tf.get_variable(name="conv", shape=[3, 3, N_CHANNEL, 16], initializer=xavier_init(N_CHANNEL, 16)),
    'conv1_1x1' : tf.get_variable(name="conv1_1x1", shape=[1, 1, 16, 4], initializer=xavier_init(16, 4)),
    'conv1_3x3' : tf.get_variable(name="conv1_3x3", shape=[3, 3, 4, 4], initializer=xavier_init(4, 4)),
    'conv1_1x1_16' : tf.get_variable(name="conv1_1x1_16", shape=[1, 1, 4, 16], initializer=xavier_init(4, 16)),
    'conv2_1x1' : tf.get_variable(name="conv2_1x1", shape=[1, 1, 16, 4], initializer=xavier_init(16, 4)),
    'conv2_3x3' : tf.get_variable(name="conv2_3x3", shape=[3, 3, 4, 4], initializer=xavier_init(4, 4)),
    'conv2_1x1_16' : tf.get_variable(name="conv2_1x1_16", shape=[1, 1, 4, 16], initializer=xavier_init(4, 16)),
    
    # conv 16 + conv2 16 = 32filters 
    
    'conv3_1x1' : tf.get_variable(name="conv3_1x1", shape=[1, 1, 32, 8], initializer=xavier_init(32, 8)),
    'conv3_3x3' : tf.get_variable(name="conv3_3x3", shape=[3, 3, 8, 8], initializer=xavier_init(8, 8)),
    'conv3_1x1_32' : tf.get_variable(name="conv3_1x1_32", shape=[1, 1, 8, 32], initializer=xavier_init(8, 32)),
    'conv4_1x1' : tf.get_variable(name="conv4_1x1", shape=[1, 1, 32, 8], initializer=xavier_init(32, 8)),
    'conv4_3x3' : tf.get_variable(name="conv4_3x3", shape=[3, 3, 8, 8], initializer=xavier_init(8, 8)),
    'conv4_1x1_32' : tf.get_variable(name="conv4_1x1_32", shape=[1, 1, 8, 32], initializer=xavier_init(8, 32)),
    
    # conv2 지난거32 + conv4 32 = 64
    
    'conv5_1x1' : tf.get_variable(name="conv5_1x1", shape=[1, 1, 64, 16], initializer=xavier_init(64, 16)),
    'conv5_3x3' : tf.get_variable(name="conv5_3x3", shape=[3, 3, 16, 16], initializer=xavier_init(16, 16)),
    'conv5_1x1_64' : tf.get_variable(name="conv5_1x1_64", shape=[1, 1, 16, 64], initializer=xavier_init(16, 64)),
    'conv6_1x1' : tf.get_variable(name="conv6_1x1", shape=[1, 1, 64, 16], initializer=xavier_init(64, 16)),
    'conv6_3x3' : tf.get_variable(name="conv6_3x3", shape=[3, 3, 16, 16], initializer=xavier_init(16, 16)),
    'conv6_1x1_64' : tf.get_variable(name="conv6_1x1_64", shape=[1, 1, 16, 64], initializer=xavier_init(16, 64)),
    
    #conv4 지난거 64 + conv6 64 = 128
    
    'dense1' : tf.get_variable(name="dense1", shape=[16*16*128, 1000], initializer=xavier_init(16*16*128, 1000)),
    'dense2' : tf.get_variable(name="dense2", shape=[1000, N_CLASSES], initializer=xavier_init(1000, N_CLASSES))    
}
#conv net biases 현재 사용하지 않음. 
biases = {
    'conv' : tf.Variable(tf.random_normal([16], stddev=stddev, name='conv_b')),
    'conv1_1x1' : tf.Variable(tf.random_normal([4], stddev=stddev, name='conv1_1x1_b')),
    'conv1_3x3' : tf.Variable(tf.random_normal([4], stddev=stddev, name='conv1_3x3_b')),
    'conv1_1x1_16' : tf.Variable(tf.random_normal([16], stddev=stddev, name='conv1_1x1_16_b')),
    'conv2_1x1' : tf.Variable(tf.random_normal([4], stddev=stddev, name='conv2_1x1_b')),
    'conv2_3x3' :tf.Variable(tf.random_normal([4], stddev=stddev, name='conv2_3x3_b')),
    'conv2_1x1_16' : tf.Variable(tf.random_normal([16], stddev=stddev, name='conv2_1x1_16_b')),
    
    'conv3_1x1' : tf.Variable(tf.random_normal([8], stddev=stddev, name='conv3_1x1_b')),
    'conv3_3x3' : tf.Variable(tf.random_normal([8], stddev=stddev, name='conv3_3x3_b')),
    'conv3_1x1_32' : tf.Variable(tf.random_normal([32], stddev=stddev, name='conv3_1x1_32_b')),
    'conv4_1x1' : tf.Variable(tf.random_normal([8], stddev=stddev, name='conv4_1x1_b')),
    'conv4_3x3' : tf.Variable(tf.random_normal([8], stddev=stddev, name='conv4_3x3_b')),
    'conv4_1x1_32' : tf.Variable(tf.random_normal([32], stddev=stddev, name='conv4_1x1_32_b')),
    
    'conv5_1x1' : tf.Variable(tf.random_normal([16], stddev=stddev, name='conv5_1x1_b')),
    'conv5_3x3' : tf.Variable(tf.random_normal([16], stddev=stddev, name='conv5_3x3_b')),
    'conv5_1x1_64' : tf.Variable(tf.random_normal([64], stddev=stddev, name='conv5_1x1_64_b')),
    'conv6_1x1' : tf.Variable(tf.random_normal([16], stddev=stddev, name='conv6_1x1_b')),
    'conv6_3x3' : tf.Variable(tf.random_normal([16], stddev=stddev, name='conv6_3x3_b')),
    'conv6_1x1_64' : tf.Variable(tf.random_normal([64], stddev=stddev, name='conv6_1x1_64_b')),
    
    'dense1' : tf.Variable(tf.random_normal([1000], stddev=stddev, name='dense1_b')),
    'dense2' : tf.Variable(tf.random_normal([N_CLASSES], stddev=stddev, name='dense2_b'))
}

In [None]:
#model
#http://laonple.blog.me/220764986252 - bottleneck 
#conv 3x3
#conv (1x1, 3x3, 1x1) - relu > 16
#conv (1x1, 3x3, 1x1) relu > 32
#conv (1x1, 3x3, 1x1) relu > 64
#avg pooling 
#fc
#softmax
def ResNet(img_width, img_height, img_channel, _x, _w, _b, scope='ResNet', training=None, reuse=None):
    network = OrderedDict() #network layers

    # X RESHAPE
    _x_r = tf.reshape(_x, shape=[-1,img_width,img_height, img_channel])
    
    with tf.variable_scope(scope):
        with tf.name_scope('conv') as scope:
            conv = tf.nn.conv2d(_x_r, _w['conv'], strides=[1, 1, 1, 1], padding='SAME')
            conv = batch_norm(conv, 16, training)
            #conv = tf.layers.batch_normalization(conv, training=training, name='bn_conv')
            conv = tf.nn.relu(conv)
            network['conv'] = conv
        
        #16
        with tf.name_scope('conv2') as scope:
            conv1_1x1 = tf.nn.conv2d(conv, _w['conv1_1x1'], strides=[1, 1, 1, 1], padding='SAME')
            conv1_1x1 = tf.nn.relu(conv1_1x1)
            conv1_3x3 = tf.nn.conv2d(conv1_1x1, _w['conv1_3x3'], strides=[1, 1, 1, 1], padding='SAME')
            conv1_3x3 = tf.nn.relu(conv1_3x3)
            conv1_1x1_16 = tf.nn.conv2d(conv1_3x3, _w['conv1_1x1_16'], strides=[1, 1, 1, 1], padding='SAME')
            #conv1_1x1_16 = tf.layers.batch_normalization(conv1_1x1_16, training=training, name='bn_conv1')
            conv1_1x1_16 = batch_norm(conv1_1x1_16, 16, training)
            conv1_1x1_16 = tf.nn.relu(conv1_1x1_16)
            network['conv1_1x1_16'] = conv1_1x1_16
        
        #16
        with tf.name_scope('conv3') as scope:
            conv2_1x1 = tf.nn.conv2d(conv1_1x1_16, _w['conv2_1x1'], strides=[1, 1, 1, 1], padding='SAME')
            conv2_1x1 = tf.nn.relu(conv2_1x1)
            conv2_3x3 = tf.nn.conv2d(conv2_1x1, _w['conv2_3x3'], strides=[1, 1, 1, 1], padding='SAME')
            conv2_3x3 = tf.nn.relu(conv2_3x3)
            conv2_1x1_16 = tf.nn.conv2d(conv2_3x3, _w['conv2_1x1_16'], strides=[1, 1, 1, 1], padding='SAME')
            #conv2_1x1_16 = tf.layers.batch_normalization(conv2_1x1_16, training=training, name='bn_conv2')
            conv2_1x1_16 = batch_norm(conv2_1x1_16, 16, training)
            #32
            conv2_1x1_16 = tf.concat([conv, conv2_1x1_16], 3) 
            conv2_1x1_16 = tf.nn.relu(conv2_1x1_16)
            network['conv2_1x1_16'] = conv2_1x1_16
       
        #32       32 x 32 x 16+16 >>> 
        with tf.name_scope('conv4') as scope:
            conv3_1x1 = tf.nn.conv2d(conv2_1x1_16, _w['conv3_1x1'], strides=[1, 1, 1, 1], padding='SAME')
            conv3_1x1 = tf.nn.relu(conv3_1x1)
            conv3_3x3 = tf.nn.conv2d(conv3_1x1, _w['conv3_3x3'], strides=[1, 1, 1, 1], padding='SAME')
            conv3_3x3 = tf.nn.relu(conv3_3x3)
            conv3_1x1_32 = tf.nn.conv2d(conv3_3x3, _w['conv3_1x1_32'], strides=[1, 1, 1, 1], padding='SAME')
            #conv3_1x1_32 = tf.layers.batch_normalization(conv3_1x1_32, training=training, name='bn_conv3')
            conv3_1x1_32 = batch_norm(conv3_1x1_32, 32, training)
            conv3_1x1_32 = tf.nn.relu(conv3_1x1_32)
            network['conv3_1x1_32'] = conv3_1x1_32
        
        with tf.name_scope('conv5') as scope:
            conv4_1x1 = tf.nn.conv2d(conv3_1x1_32, _w['conv4_1x1'], strides=[1, 1, 1, 1], padding='SAME')
            conv4_1x1 = tf.nn.relu(conv4_1x1)
            conv4_3x3 = tf.nn.conv2d(conv4_1x1, _w['conv4_3x3'], strides=[1, 1, 1, 1], padding='SAME')
            conv4_3x3 = tf.nn.relu(conv4_3x3)
            conv4_1x1_32 = tf.nn.conv2d(conv4_3x3, _w['conv4_1x1_32'], strides=[1, 1, 1, 1], padding='SAME')
            #conv4_1x1_32 = tf.layers.batch_normalization(conv4_1x1_32, training=training, name='bn_conv4')
            conv4_1x1_32 = batch_norm(conv4_1x1_32, 32, training)

            #64
            conv4_1x1_32 = tf.concat([conv2_1x1_16, conv4_1x1_32 ], 3)
            conv4_1x1_32 = tf.nn.relu(conv4_1x1_32)
            network['conv4_1x1_32'] = conv4_1x1_32
    
        with tf.name_scope('conv6') as scope:
            conv5_1x1 = tf.nn.conv2d(conv4_1x1_32, _w['conv5_1x1'], strides=[1, 1, 1, 1], padding='SAME')
            conv5_1x1 = tf.nn.relu(conv5_1x1)
            conv5_3x3 = tf.nn.conv2d(conv5_1x1, _w['conv5_3x3'], strides=[1, 1, 1, 1], padding='SAME')
            conv5_3x3 = tf.nn.relu(conv5_3x3)
            conv5_1x1_64 = tf.nn.conv2d(conv5_3x3, _w['conv5_1x1_64'], strides=[1, 1, 1, 1], padding='SAME')
            #conv5_1x1_64 = tf.layers.batch_normalization(conv5_1x1_64, training=training, name='bn_conv5')
            conv5_1x1_64 = batch_norm(conv5_1x1_64, 64, training)
            conv5_1x1_64 = tf.nn.relu(conv5_1x1_64)
            network['conv5_1x1_64'] = conv5_1x1_64
        
        with tf.name_scope('conv7') as scope:
            conv6_1x1 = tf.nn.conv2d(conv5_1x1_64, _w['conv6_1x1'], strides=[1, 1, 1, 1], padding='SAME')
            conv6_1x1 = tf.nn.relu(conv6_1x1)
            conv6_3x3 = tf.nn.conv2d(conv6_1x1, _w['conv6_3x3'], strides=[1, 1, 1, 1], padding='SAME')
            conv6_3x3 = tf.nn.relu(conv6_3x3)
            conv6_1x1_64 = tf.nn.conv2d(conv6_3x3, _w['conv6_1x1_64'], strides=[1, 1, 1, 1], padding='SAME')
            #conv6_1x1_64 = tf.layers.batch_normalization(conv6_1x1_64, training=training, name='bn_conv6')
            conv6_1x1_64 = batch_norm(conv6_1x1_64, 64, training)
            
            #128
            conv6_1x1_64 = tf.concat([conv4_1x1_32, conv6_1x1_64],3)
            conv6_1x1_64 = tf.nn.relu(conv6_1x1_64)
            network['conv6_1x1_64'] = conv6_1x1_64
        
        with tf.name_scope('pool') as scope:
            pool = tf.nn.avg_pool(conv6_1x1_64, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
            network['pool'] = pool
            
        def dropout(pool):
            return tf.nn.dropout(pool, 0.5, name='dropout')
        def none(pool):
            return pool
        
        #with tf.name_scope('dropout') as scope:      
            #pool = tf.cond(training, lambda: dropout(pool), lambda: none(pool))
        
        with tf.name_scope('dense1') as scope:
            dense = tf.reshape(pool, [-1, _w['dense1'].get_shape().as_list()[0]])
            dense1 = tf.add(tf.matmul(dense, _w['dense1']), _b['dense1'])
            #dense1 =  tf.layers.batch_normalization(dense1, training=training, name='bn_dense')
            dense1 = batch_norm_for_dense_layer(dense1, 1000, training)
            dense1 = tf.nn.relu(dense1)
            network['dense1'] = dense1
            
        with tf.name_scope('logit') as scope:
            logit = tf.add(tf.matmul(dense1, _w['dense2']), _b['dense2'])
            network['logit'] = logit
            
        
    return network

# TRAIN

In [None]:
# for train
images, labels = distorted_inputs(DATA_PATH, BATCH_SIZE)
labels = tf.one_hot(indices=labels, depth=N_CLASSES, on_value=1, off_value=0, axis=1)
print(labels.shape)

out = ResNet(INPUT_SIDE, INPUT_SIDE, N_CHANNEL, images, weights, biases, 'ResNet', training)

for key, value in out.items():
    print (key, value)

In [None]:
#loss
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=out['logit']))
tf.summary.scalar("loss", loss) #save loss 
print("LOSS FUNCTION")

learning_rate = tf.placeholder(dtype=tf.float32)
learning_rate = tf.maximum(learning_rate, 0.0001)
tf.summary.scalar("learning_rate", learning_rate) #learning rate
print("LERANING RATE : {}".format(learning_rate))

#optimizer
adam = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(loss)
sgd = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(loss)

corr = tf.equal(tf.argmax(out['logit'], 1), tf.argmax(labels,1))
accr = tf.reduce_mean(tf.cast(corr, "float"))
tf.summary.scalar("accuracy", accr) #save accuracy

In [None]:
#SESSION INITIALIZE
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

#SAVER
saver = tf.train.Saver(max_to_keep=3) #최근 3개까지만 저장
save_step = 100 #save for 100 epoch
if not os.path.exists(CHECKPOINT_PATH):
    os.makedirs(CHECKPOINT_PATH)
print(CHECKPOINT_PATH)
    
#restore checkpoint
checkpoint = tf.train.latest_checkpoint(CHECKPOINT_PATH)
if checkpoint is not None:
    print(checkpoint)
    #saver.restore(sess, checkpoint)
    
#TENSOR BOARD
summary_op = tf.summary.merge_all()
summary_writer = tf.summary.FileWriter(BOARD_PATH, sess.graph)

#QUEUE
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord, sess=sess)

print("initialized")

In [None]:
#training
train_loss_for_plot = []
train_acc_for_plot = []
test_loss_for_plot = []
test_acc_for_plot = []

print('start')
for epoch in range(EPOCHS):
    print(epoch)
    global_stop = epoch #lr decay
    nNumBatch = 0 # batch 수
    AvgBatchCost = 0 #cost 합산
    for i in range(int(NUM_BATCHES_PER_EPOCH)):
        nNumBatch += 1
        if epoch < EPOCHS * 0.5:
            _, tmp_cost = sess.run([adam, loss], feed_dict={training: True, learning_rate : LR})
        else:
            #print('use sgd')
            _, tmp_cost = sess.run([sgd, loss], feed_dict={training: True, learning_rate : LR})
            
        AvgBatchCost += tmp_cost
    
        if nNumBatch % 100 == 0:#print
            train_acc = sess.run(accr, feed_dict={training: True, learning_rate : LR})
            print('\t[%d nNumBatch] train cost = %g, acc = %g' %(nNumBatch, AvgBatchCost/nNumBatch, train_acc ))
            print('\t\t learning rate = %g'%(LR))
            train_loss_for_plot.append(AvgBatchCost/nNumBatch)
            train_acc_for_plot.append(train_acc)
            
    if epoch % 10 == 0: #test
        if epoch % 350 == 0: #learning_rate decay
            LR = np.maximum(LR*LR_DECAY_RATE, 0.0001)
        save_path = saver.save(sess, CHECKPOINT_PATH + CHECKPOINT, global_step=epoch)
        print(save_path)

In [None]:
coord.request_stop()
coord.join(threads)
print('thread is stopped')
sess.close()
print('session closed')

In [None]:
# 1. 피드딕트로 넣어줘야 함. 이건 이미지 불러오고 corr, accuracy 까지 다 변경해야함
# 2. 다 해서 돌린다음에 안되는거같다 싶으면 nn -> layers.batch_norm으로 변경
# 3. 테스트 끝나면 대략 2000~3000에폭 돌려서 ckpt 가져와서 시각화 

In [None]:
print(train_loss_for_plot, file=train_loss_out)
print(train_acc_for_plot, file=train_accr_out)

In [None]:
#LOSS
x = np.linspace(0, len(y), len(y))
y = loss_for_plot
plt.plot(x,y, color="red")
plt.plot()
plt.xlabel("batch")
plt.ylabel("loss")
plt.title("LOSS")

In [None]:
#ACCURACY
x = np.linspace(0, len(y_ac), len(y_ac))
y_ac = acc_for_plot
plt.plot(x,y_ac, color="blue")
plt.plot()
plt.xlabel("batch")
plt.ylabel("accuracy")
plt.title("ACCURACY")

# TRAIN END

# TEST

In [None]:
# load test data
images_test, labels_test = inputs(eval_data=True, data_dir=DATA_PATH, batch_size=BATCH_SIZE )
labels_test = tf.one_hot(indices=labels_test, depth=N_CLASSES, on_value=1, off_value=0, axis=1)

In [None]:
#sesstion initialize
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

#queue
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord, sess=sess)

In [None]:
#restore checkpoint
saver = tf.train.Saver()

CHECKPOINT_PATH = './checkpoint'
#./checkpoint/res_13simple_res_net_train_13.ckpt-490
checkpoint = tf.train.latest_checkpoint(CHECKPOINT_PATH)
if checkpoint is not None:
    print(checkpoint)
    saver.restore(sess, checkpoint)
    print('restored')

In [None]:
test = ResNet(INPUT_SIDE, INPUT_SIDE, N_CHANNEL, images_test, weights, biases, 'ResNet', training)

In [None]:
corr_test = tf.equal(tf.argmax(test['logit'], 1), tf.argmax(labels_test,1))
accr_test = tf.reduce_mean(tf.cast(corr_test, "float"))
tf.summary.scalar("test_accuracy", accr_test) #save accuracy

In [None]:
#Test
test_acc_for_plot = []


print('start')
for step in range(int(EPOCHS/10)):
    test_acc = sess.run(accr_test, feed_dict={training: False})
    print('[%d EPOCH] TEST ACC = %g%%' %(step, test_acc*100))
    test_acc_for_plot.append(test_acc)
    
print('end')

In [None]:
print(test_acc_for_plot, file=test_accr_out)

In [None]:
y = test_acc_for_plot
x = np.linspace(0, len(y), len(y))

plt.plot(x,y, color="red")
plt.plot()
plt.xlabel(".")
plt.ylabel("loss")
plt.title("LOSS")

# SHOW TRAINED FEATURE

In [None]:
coord.request_stop()
coord.join(threads)
print('thread is stopped')
sess.close()
print('session closed')

In [None]:
#sesstion initialize
init = tf.global_variables_initializer()
init_local = tf.local_variables_initializer()
sess = tf.Session()
sess.run(init)

In [None]:
#restore checkpoint
saver = tf.train.Saver()

CHECKPOINT_PATH = './checkpoint'
#./checkpoint/res_13simple_res_net_train_13.ckpt-490
checkpoint = tf.train.latest_checkpoint(CHECKPOINT_PATH)
if checkpoint is not None:
    print(checkpoint)
    saver.restore(sess, checkpoint)
    print('restored')

In [None]:
# input 
datapath = 'cifar10/cifar-10-batches-py/'
test_file=['test_batch']

def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

In [None]:
data_num = 105
data = unpickle(datapath+test_file[0])
print(type(data))
for val in data.keys():
    print(val)
print(data[b'labels'][data_num])
print(LIST_CLASS[data[b'labels'][data_num]])
print(data[b'filenames'][data_num])
print(data[b'data'][data_num])
input_img = tf.cast(data[b'data'][data_num], tf.float32)

reshape_img = np.reshape(data[b'data'][data_num], [3, 32, 32])
img = np.transpose(reshape_img, (1,2,0))
plt.imshow(img)

In [None]:
Res = ResNet(INPUT_SIDE, INPUT_SIDE, N_CHANNEL, input_img, weights, biases, 'ResNet', training)
sess.run(init)

# SHOW FEATURE 

In [None]:
for layer_name in Res:
    if layer_name.startswith('dense'): break
    print(layer_name)
    feature = sess.run(Res[layer_name], feed_dict={training: False})
    # feature 
    print(feature.shape)
    for no in range(feature.shape[3]): #feature no.
        plt.matshow(feature[0,:,:,no], cmap=plt.cm.gray_r)
        plt.show()

In [None]:
#show feature
for layer_name in Res:
    if layer_name.startswith('dense'): break
    print(layer_name)
    plt.matshow(Res[layer_name].eval(session=sess, feed_dict={training: False})[0, :, :, 1], cmap=plt.cm.gray)
    plt.show()

# SHOW WEIGHTS

In [None]:
weight = sess.run(weights)
for key, value in weight.items():
    if key.startswith('dense'): continue
    print(key)
    print(value.shape)
    
    if key.endswith('1x1') or key.endswith('16') or key.endswith('32') or key.endswith('64'): 
        for i in range(value.shape[2]):
            for o in range(value.shape[3]):
                print("%d , %d : %g" %(i,o,value[0,0,i,o]))
        continue
    for i in range(value.shape[2]):
        for o in range(value.shape[3]):
            plt.imshow(value[:,:,i,o], cmap=plt.cm.gray)
            plt.show()