In [1]:
import tensorflow as tf
sess = tf.Session()

* example of tf.nn.conv2d

In [2]:
input_batch = tf.constant([
        [
            # first input
            [[0.0], [1.0]],
            [[2.0], [3.0]]
        ],
        [
            # second input
            [[2.0], [4.0]],
            [[6.0], [8.0]]
        ]
    ])

kernel = tf.constant([
        [
            [[1.0, 2.0]]
        ]
    ])

In [3]:
conv2d = tf.nn.conv2d(input_batch, kernel, strides=[1, 1, 1, 1], padding='SAME')

In [5]:
sess.run(conv2d)

array([[[[  0.,   0.],
         [  1.,   2.]],

        [[  2.,   4.],
         [  3.,   6.]]],


       [[[  2.,   4.],
         [  4.,   8.]],

        [[  6.,  12.],
         [  8.,  16.]]]], dtype=float32)

In [6]:
# input pixel과 tf.nn.conv2d에서 filter를 거친 pixel은 같은 index를 갖는다.
lower_right_image_pixel = sess.run(input_batch)[0][1][1]
lower_right_kernel_pixel = sess.run(conv2d)[0][1][1]

lower_right_image_pixel, lower_right_kernel_pixel

(array([ 3.], dtype=float32), array([ 3.,  6.], dtype=float32))

In [4]:
# 6x6x1 이미지에 3x3x1 커널 적용 예제
input_batch = tf.constant([
        [  # First Input (6x6x1)
            [[0.0], [1.0], [2.0], [3.0], [4.0], [5.0]],
            [[0.1], [1.1], [2.1], [3.1], [4.1], [5.1]],
            [[0.2], [1.2], [2.2], [3.2], [4.2], [5.2]],
            [[0.3], [1.3], [2.3], [3.3], [4.3], [5.3]],
            [[0.4], [1.4], [2.4], [3.4], [4.4], [5.4]],
            [[0.5], [1.5], [2.5], [3.5], [4.5], [5.5]],
        ],
    ])

kernel = tf.constant([  # Kernel (3x3x1)
        [[[0.0]], [[0.5]], [[0.0]]],
        [[[0.0]], [[1.0]], [[0.0]]],
        [[[0.0]], [[0.5]], [[0.0]]]
    ])

# NOTE: the change in the size of the strides parameter.
conv2d = tf.nn.conv2d(input_batch, kernel, strides=[1, 3, 3, 1], padding='SAME')
sess.run(conv2d)

array([[[[ 2.20000005],
         [ 8.19999981]],

        [[ 2.79999995],
         [ 8.80000019]]]], dtype=float32)

* activation functions

In [5]:
# relu example
features = tf.range(-2, 3)
sess.run([features, tf.nn.relu(features)])

[array([-2, -1,  0,  1,  2], dtype=int32), array([0, 0, 0, 1, 2], dtype=int32)]

In [6]:
# sigmoid
features = tf.to_float(tf.range(-1, 3))
sess.run([features, tf.sigmoid(features)])

[array([-1.,  0.,  1.,  2.], dtype=float32),
 array([ 0.26894143,  0.5       ,  0.7310586 ,  0.88079703], dtype=float32)]

In [7]:
# tanh
features = tf.to_float(tf.range(-1, 3))
sess.run([features, tf.tanh(features)])

[array([-1.,  0.,  1.,  2.], dtype=float32),
 array([-0.76159418,  0.        ,  0.76159418,  0.96402758], dtype=float32)]

In [24]:
# dropout
features = tf.constant([-0.1, 0.0, 0.1, 0.2])
sess.run([features, tf.nn.dropout(features, keep_prob=0.5)])

[array([-0.1,  0. ,  0.1,  0.2], dtype=float32),
 array([-0.2       ,  0.        ,  0.        ,  0.40000001], dtype=float32)]

* pooling layers

In [25]:
# max_pool
batch_size = 1
input_height = 3
input_width = 3
input_channels = 1

layer_input = tf.constant([
        [
            [[1.0], [0.2], [1.5]],
            [[0.1], [1.2], [1.4]],
            [[1.1], [0.4], [0.4]]
        ]
    ])

kernel = [batch_size, input_height, input_width, input_channels]
max_pool = tf.nn.max_pool(layer_input, kernel, [1, 1, 1, 1], "VALID")
sess.run(max_pool)

array([[[[ 1.5]]]], dtype=float32)

In [26]:
# avg_pool
batch_size = 1
input_height = 3
input_width = 3
input_channels = 1

layer_input = tf.constant([
        [
            [[1.0], [1.0], [1.0]],
            [[1.0], [0.5], [0.0]],
            [[0.0], [0.0], [0.0]]
        ]
    ])

kernel = [batch_size, input_height, input_width, input_channels]
avg_pool = tf.nn.avg_pool(layer_input, kernel, [1, 1, 1, 1], "VALID")
sess.run(avg_pool)

array([[[[ 0.5]]]], dtype=float32)

* normalization

In [4]:
# tf.nn.local_response_normalization
# [batch, image_height, image_width, image_channel]
layer_input = tf.constant([
        [[[1.0]], [[2.]], [[3.]]]
    ])

lrn = tf.nn.local_response_normalization(layer_input)
sess.run([layer_input, lrn])

[array([[[[ 1.]],
 
         [[ 2.]],
 
         [[ 3.]]]], dtype=float32), array([[[[ 0.70710677]],
 
         [[ 0.89442718]],
 
         [[ 0.94868326]]]], dtype=float32)]

* high level layers

In [8]:
image_input = tf.constant([
        [
            [[0., 0., 0.], [255., 255., 255.], [254., 0., 0.]],
            [[0., 191., 0.], [3., 108., 233.], [0., 191., 0.]],
            [[254., 0., 0.], [255., 255., 255.], [0., 0., 0.]]
        ]
    ])

# tf.nn.conv2d의 higher version
conv2d = tf.contrib.layers.convolution2d(
            image_input,
            num_outputs=4,
            kernel_size=(1,1), # filter height, width
            activation_fn=tf.nn.relu,
            stride=(1,1), # skips the stride values for image_batch and input_channels
            trainable=True)

sess.run(tf.initialize_all_variables())
sess.run(conv2d)

array([[[[   0.        ,    0.        ,    0.        ,    0.        ],
         [ 319.33734131,    0.        ,    5.65779114,  104.56098938],
         [ 151.72480774,   16.78249741,    0.        ,  158.22753906]],

        [[ 131.75291443,    0.        ,  135.71949768,    0.        ],
         [  68.17230988,    0.        ,    0.        ,    9.21641922],
         [ 131.75291443,    0.        ,  135.71949768,    0.        ]],

        [[ 151.72480774,   16.78249741,    0.        ,  158.22753906],
         [ 319.33734131,    0.        ,    5.65779114,  104.56098938],
         [   0.        ,    0.        ,    0.        ,    0.        ]]]], dtype=float32)

In [10]:
# tf.contrib.layers.fully_connected
features = tf.constant([
        [[1.2], [3.4]]
    ])

fc = tf.contrib.layers.fully_connected(features, num_outputs=2)
sess.run(tf.initialize_all_variables())
sess.run(fc)

array([[[ 0.        ,  0.06500373],
        [ 0.        ,  0.18417723]]], dtype=float32)

* image loading 후 TFRecord로 저장하기

In [2]:
image_filename = "/Users/jackleg/study/tensorflow_for_mi/test-input-image-large.jpg"
filename_queue = tf.train.string_input_producer([image_filename])

image_reader = tf.WholeFileReader()
_, image_file = image_reader.read(filename_queue)
image = tf.image.decode_jpeg(image_file)

In [10]:
# 자꾸 hang된다. 왜 그러지?
with tf.Session() as sess:
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    
    # 이미지를 TFRecord 포맷으로 저장하기
    image_label = b'\x01'
    
    image_loaded = sess.run(image)
    image_bytes = image_loaded.tobytes()
    image_height, image_width, image_channels = image_loaded.shape
    
    writer = tf.python_io.TFRecordWriter("./output/training-image.tfrecords")
    
    example = tf.train.Example(features=tf.train.Features(feature={
                'label': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_label])),
                'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_bytes]))
            }))
    
    writer.write(example.SerializeToString())
    writer.close()
    

In [12]:
filename_queue = tf.train.string_input_producer(["./output/training-image.tfrecords"])

with tf.Session() as sess:
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)

    # 저장한 TFRecord 읽기
    #tf_record_filename_queue = tf.train.string_input_producer(
    #                                tf.train.match_filenames_once("./output/training-image.tfrecords"))
    
    tf_record_reader = tf.TFRecordReader()
    #_, tf_record_serialized = tf_record_reader.read(tf_record_filename_queue)
    _, tf_record_serialized = tf_record_reader.read(filename_queue)
    
    tf_record_features = tf.parse_single_example(
                            tf_record_serialized,
                            features={
                                'label': tf.FixedLenFeature([], tf.string),
                                'image': tf.FixedLenFeature([], tf.string)
                            })
    
    tf_record_image = tf.decode_raw(tf_record_features['image'], tf.uint8)
    tf_record_image = tf.reshape(tf_record_image,
                                 [image_height, image_width, image_channels])
    
    tf_record_label = tf.cast(tf_record_features['label'], tf.string)
    
    # 원래 이미지와 TFRecord에서 읽은 이미지가 같은지 여부 확인
    print sess.run(tf.equal(image, tf_record_image))
    print sess.run(tf_record_label)
    


[[[ True  True  True]
  [ True  True  True]
  [ True  True  True]
  ..., 
  [ True  True  True]
  [ True  True  True]
  [ True  True  True]]

 [[ True  True  True]
  [ True  True  True]
  [ True  True  True]
  ..., 
  [ True  True  True]
  [ True  True  True]
  [ True  True  True]]

 [[ True  True  True]
  [ True  True  True]
  [ True  True  True]
  ..., 
  [ True  True  True]
  [ True  True  True]
  [ True  True  True]]

 ..., 
 [[ True  True  True]
  [ True  True  True]
  [ True  True  True]
  ..., 
  [ True  True  True]
  [ True  True  True]
  [ True  True  True]]

 [[ True  True  True]
  [ True  True  True]
  [ True  True  True]
  ..., 
  [ True  True  True]
  [ True  True  True]
  [ True  True  True]]

 [[ True  True  True]
  [ True  True  True]
  [ True  True  True]
  ..., 
  [ True  True  True]
  [ True  True  True]
  [ True  True  True]]]



* crop image

In [10]:
with tf.Session() as sess:
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    
    print sess.run(tf.image.central_crop(image, 0.1))

[[[  4 109 234]]]


In [6]:
with tf.Session() as sess:
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    
    real_image = sess.run(image)

    bounding_crop = tf.image.crop_to_bounding_box(
                        real_image, offset_height=0, offset_width=0, target_height=2, target_width=1)
    
    print sess.run(bounding_crop)

[[[0 0 0]]

 [[0 0 0]]]


* padding

In [15]:
with tf.Session() as sess:
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    
    real_image = sess.run(image)
    
    pad = tf.image.pad_to_bounding_box(
                    real_image, offset_height=0, offset_width=0, target_height=101, target_width=101)
    
    print sess.run(pad)

[[[  0   0   0]
  [  0   0   0]
  [  0   0   0]
  ..., 
  [254   0   0]
  [  0   0   0]
  [  0   0   0]]

 [[  0   0   0]
  [  0   0   0]
  [  0   0   0]
  ..., 
  [254   0   0]
  [  0   0   0]
  [  0   0   0]]

 [[  0   0   0]
  [  0   0   0]
  [  0   0   0]
  ..., 
  [254   0   0]
  [  0   0   0]
  [  0   0   0]]

 ..., 
 [[254   0   0]
  [254   0   0]
  [254   0   0]
  ..., 
  [  0   0   0]
  [  0   0   0]
  [  0   0   0]]

 [[  0   0   0]
  [  0   0   0]
  [  0   0   0]
  ..., 
  [  0   0   0]
  [  0   0   0]
  [  0   0   0]]

 [[  0   0   0]
  [  0   0   0]
  [  0   0   0]
  ..., 
  [  0   0   0]
  [  0   0   0]
  [  0   0   0]]]


* tf.image.resize_image_with_crop_or_pad()를 사용하면 이미지를 crop하고 난 후에 padding으로 resizing할 수 있음.

* flipping
 * 이미지 중 일부를 좌우로 flipping한 후 다시 위아래로 flipping.
 * tf.image.random_flip_left_right, tf.image.random_flip_up_down을 사용하면 랜덤하게 이미지를 flip할 수 있음.
 * 좀 더 다양한 training 데이터를 만들기 위함.

In [17]:
with tf.Session() as sess:
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    
    top_left_pixels = tf.slice(image, [0, 0, 0], [2, 2, 3])
    
    flip_horizon = tf.image.flip_left_right(top_left_pixels)
    flip_vertical = tf.image.flip_up_down(flip_horizon)
    
    print sess.run([top_left_pixels, flip_vertical])

[array([[[0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0]]], dtype=uint8), array([[[0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0]]], dtype=uint8)]


* control saturation, hue, contrast, brightness
 * tf.image.adjust_contrast, tf.image.adjust_hue, tf.image.adjust_saturation 등의 함수를 사용하면 된다.

In [19]:
# 이미지를 0.2만큼 밝게 만든다.
with tf.Session() as sess:
    example_red_pixel = tf.constant([254., 2., 15.])
    adjust_brightness = tf.image.adjust_brightness(example_red_pixel, 0.2)

    print sess.run(adjust_brightness)

[ 254.19999695    2.20000005   15.19999981]


* 이미지의 color scheme을 gray scale로 전환하기.
 * tf.image.rgb_to_grayscale()을 사용
 
* rgb를 hsv로 변경하기.
 * tf.image.rgb_to_hsv()를 사용.
 * hsv는 실수값을 사용하기 때문에 rgb의 경우 tf.image.convert_image_dtype을 이용해 값을 tf.float32로 변경해서 사용.
 
* 위 함수들과 반대로 tf.image.xxx_to_rgb 함수도 존재함.

1. download images from http://vision.stanford.edu/aditya86/ImageNetDogs/

2. convert images to TFRecords

In [2]:
import glob
from itertools import groupby
from collections import defaultdict

# sampleing용으로 일부 데이터만 사용.
image_filenames = glob.glob("./images/n02085*/*.jpg")

training_dataset = defaultdict(list)
testing_dataset = defaultdict(list)

# Split up the filename into its breed and corresponding filename. The breed is found by taking the directory name.
image_filename_with_breed = map(lambda filename: (filename.split("/")[2], filename), image_filenames)

# group each image by the breed which is the 0th element in the tuple returned above.
for dog_breed, breed_images in groupby(image_filename_with_breed, lambda x: x[0]):
    # Enumerate each breed's image and send ~20% of the images to 4 testing set.
    for i, breed_image in enumerate(breed_images):
        if i % 5 == 0:
            testing_dataset[dog_breed].append(breed_image[1])
        else:
            training_dataset[dog_breed].append(breed_image[1])
            
    # check that each breed includes at least 18% of the images for testing
    breed_training_count = len(training_dataset[dog_breed])
    breed_testing_count = len(testing_dataset[dog_breed])
    
    assert round(breed_testing_count*1.0 / (breed_training_count + breed_testing_count), 2) > 0.18, "Not enough testing images."

In [4]:
def write_records_file(dataset, record_location):
    """
    Fill a TFRecords file with the images found in `dataset` and include their category.
    
    Parameters
    ----------
    dataset: dict(list)
        Dictionary with each key being a label for the list of image filenames of its value.
    record_location: str
        Location to store the TFRecord output.
    """
    
    writer = None
    
    # Enumerating the dataset because the current index is used to breakup the files if they get over 100
    # images to avoid a slowdown in writing.
    current_index = 0
    for breed, images_filenames in dataset.items():
        print "processing %s..." % breed
        
        for image_filename in images_filenames:
            if current_index % 100 == 0:
                if writer:
                    writer.close()
                    
                record_filename = "{record_location}-{current_index}.tfrecords".format(
                                        record_location=record_location,
                                        current_index=current_index)
                
                writer = tf.python_io.TFRecordWriter(record_filename)
            current_index += 1
            
            image_file = tf.read_file(image_filename)
            
            # In ImageNet dogs, there are a few images which TensorFlow doesn't recognize as JPEGs. This
            # try/catch will ignore those images.
            try:
                image = tf.image.decode_jpeg(image_file)
            except:
                print(image_filename)
                continue
            
            # Converting to grayscale saves processing and memory but isn't required.
            grayscale_image = tf.image.rgb_to_grayscale(image)
            resized_image = tf.image.resize_images(grayscale_image, [250, 151])
            
            # tf.cast is used here because the resized images are floats but haven't been converted into
            # image floats where an RGB value is between [0, 1).
            image_bytes = sess.run(tf.cast(resized_image, tf.uint8)).tobytes()
            
            # instead of using the label as a string, it'd be more efficient to trun it into either an
            # integer index or a one-hot encoded rank one tensor.
            # https://en.wikipedia.org/wiki/One-hot
            image_label = breed.encode("utf-8")
            
            example = tf.train.Example(features=tf.train.Features(feature={
                        'label': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_label])),
                        'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_bytes]))
                    }))
            
            writer.write(example.SerializeToString())
    writer.close()

print "write testing files."
write_records_file(testing_dataset, "./output/testing-images/testing-image")

print "write training files."
write_records_file(training_dataset, "./output/training-images/training-image")

write testing files.
processing n02085620-Chihuahua...
processing n02085936-Maltese_dog...
processing n02085782-Japanese_spaniel...
write training files.
processing n02085620-Chihuahua...
processing n02085936-Maltese_dog...
processing n02085782-Japanese_spaniel...


3. load images
 * TFRecord 포맷으로 저장한 이미지 파일 읽기

In [2]:
filename_queue = tf.train.string_input_producer(
                    tf.train.match_filenames_once("./output/training-images/*.tfrecords"))
reader = tf.TFRecordReader()
_, serialized = reader.read(filename_queue)

features = tf.parse_single_example(
                serialized,
                features={
                    'label': tf.FixedLenFeature([], tf.string),
                    'image': tf.FixedLenFeature([], tf.string),
           })

record_image = tf.decode_raw(features['image'], tf.uint8)

# Changing the image into this shape helps train and visualize the output by converting it to
# be organized like an image.
image = tf.reshape(record_image, [250, 151, 1])

label = tf.cast(features['label'], tf.string)

min_after_dequeue = 10
batch_size = 3
capacity = min_after_dequeue + 3 * batch_size
image_batch, label_batch = tf.train.shuffle_batch(
                                [image, label],
                                batch_size=batch_size,
                                capacity=capacity,
                                min_after_dequeue=min_after_dequeue)

4. Model
 * mnist convolution example

In [3]:
# converting the images to a float of [0, 1) to match the expected input to convolution2d
float_image_batch = tf.image.convert_image_dtype(image_batch, tf.float32)

conv2d_layer_one = tf.contrib.layers.convolution2d(
                        float_image_batch,
                        num_outputs=32, # The number of filters to generate
                        kernel_size=(5, 5), # It's only the filter height and width.
                        activation_fn=tf.nn.relu,
                        weights_initializer=tf.random_normal_initializer(),
                        stride=(2, 2),
                        trainable=True)
pool_layer_one = tf.nn.max_pool(conv2d_layer_one, ksize=[1, 2, 2, 1],
                                strides=[1, 2, 2, 1],
                                padding='SAME')

# Note, The first and last dimension of the convolution output hasn't changed but the middle two dimensions have.
conv2d_layer_one.get_shape(), pool_layer_one.get_shape()

(TensorShape([Dimension(3), Dimension(125), Dimension(76), Dimension(32)]),
 TensorShape([Dimension(3), Dimension(63), Dimension(38), Dimension(32)]))

In [4]:
conv2d_layer_two = tf.contrib.layers.convolution2d(
                        pool_layer_one,
                        num_outputs=64,
                        kernel_size=(5, 5),
                        activation_fn=tf.nn.relu,
                        weights_initializer=tf.random_normal_initializer(),
                        stride=(1, 1),
                        trainable=True)

pool_layer_two = tf.nn.max_pool(conv2d_layer_two,
                                ksize=[1, 2, 2, 1],
                                strides=[1, 2, 2, 1],
                                padding='SAME')

conv2d_layer_two.get_shape(), pool_layer_two.get_shape()

(TensorShape([Dimension(3), Dimension(63), Dimension(38), Dimension(64)]),
 TensorShape([Dimension(3), Dimension(32), Dimension(19), Dimension(64)]))

In [5]:
# fully connected layer를 만들기 위해 마지막 layer를 변형시킴.
# first dimension: seperate each image
# second dimension: rank one tensor of each input tensor.
flattened_layer_two = tf.reshape(
                            pool_layer_two,
                            [
                                batch_size, # Each image in the image_batch
                                -1, # Every other dimension of the input
                            ])

flattened_layer_two.get_shape()

TensorShape([Dimension(3), Dimension(38912)])

In [8]:
hidden_layer_three = tf.contrib.layers.fully_connected(
                        flattened_layer_two,
                        512,
                        weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
                        activation_fn=tf.nn.relu)

# dropout some of the neurons, reducing their importance in the model
hidden_layer_three = tf.nn.dropout(hidden_layer_three, 0.1)

# the output of this are all the connections between the previous layers and the 120 different dog breeds
# available to train on.
# (여기에서 나는 3개만 사용함.)
final_fully_connected = tf.contrib.layers.fully_connected(
                            hidden_layer_three,
                            120, # number of dog breeds in the ImageNet Dogs dataset
                            weights_initializer=tf.truncated_normal_initializer(stddev=0.1))

In [9]:
# breed 이름을 정수에 매핑시키기 위해 tf.map_fn을 사용.
import glob

# Find every directory name in the imagenet-dogs directory
labels = list(map(lambda c: c.split("/")[-1], glob.glob("./images/*")))

# Match every label from label_batch and return the index where they exist in the list of classes.
train_labels = tf.map_fn(lambda l: tf.where(tf.equal(labels, l))[0,0:1][0], label_batch, dtype=tf.int64)