### Note

This notebook is the first attempt at CNN using tensorflow for experiment only. 

Major work and result is preduced using keras in the other notebook: **cogs181-final-project(keras).ipynb**


# Setup, Download and Preprocess

In [1]:
# To determine which version you're using:
!pip show tensorflow-gpu

Name: tensorflow-gpu
Version: 1.4.0.dev20171012
Summary: TensorFlow helps the tensors flow
Home-page: https://www.tensorflow.org/
Author: Google Inc.
Author-email: opensource@google.com
License: Apache 2.0
Location: /opt/conda/lib/python2.7/site-packages
Requires: enum34, backports.weakref, wheel, mock, tensorflow-tensorboard, numpy, protobuf, six


In [None]:
from tensorflow.python.client import device_lib

def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']
  
get_available_gpus()

In [2]:
!cd /tmp; wget --quiet http://pages.ucsd.edu/~ztu/courses/tiny-imagenet-200.zip
!cd /tmp; unzip -qq tiny-imagenet-200.zip; rm tiny-imagenet-200.zip

### Imports

In [3]:
import tensorflow as tf
import glob
import matplotlib.pyplot as plt
import random
import scipy.ndimage
import pandas as pd
import numpy as np
from collections import defaultdict
from PIL import Image
from tensorflow.contrib.learn.python.learn.datasets.mnist import DataSet

%matplotlib inline
plt.rcParams['figure.figsize'] = (10, 6)

def distort(filename):
    """Apply image distortions"""
    with tf.Graph().as_default():
        file = tf.read_file(filename)
        img = tf.image.decode_jpeg(file, 3)
        img = tf.image.adjust_saturation(img, 0.5)
        img = tf.image.adjust_hue(img, -0.05)
        with tf.Session() as sess:
            dist_img = sess.run(img)
    
    return dist_img

### Data Exploratory

In [4]:
import glob

path = "/tmp/tiny-imagenet-200/"
train_dirs = glob.glob(path + "train/*")
val_dirs   = glob.glob(path + "val/*")
test_dirs  = glob.glob(path + "test/*")

#### Loading All Labels

In [5]:
# This is global labels, there are 18K of them, but only 200 is used for training and validation
labels = []
for line in open(path + 'words.txt'):
    [classname, description ] = line.strip().split('\t')
    labels.append( (classname,description) )
label_dicts = dict(labels)

# There are only 200 used labels in both training and validation
used_labels =  [ d[-9:] for d in train_dirs ]
len(used_labels)

# Hot-Encode Labels
from sklearn.preprocessing import LabelBinarizer

bin_encoder = LabelBinarizer()
bin_encoder.fit_transform(used_labels).size

40000

#### Loading Train Data

In [6]:
train_data_paths = []
train_labels = []
for class_path in train_dirs:
    class_name = class_path[-9:]
    images = glob.glob(class_path + '/images/*')
    for image_path in images:
        train_labels.append( class_name )
        train_data_paths.append( image_path )
        
inter = list(zip(train_data_paths, train_labels))
random.shuffle(inter)
train_data_paths, train_labels = zip(*inter)

train_labels = bin_encoder.transform(train_labels) # hot-encode labels from string to number
len(train_labels)

100000

In [7]:
def get_image(img_path):
    img = scipy.ndimage.imread(img_path)
    if len(img.shape) != 3: # reshape back to RBG format
        img = np.repeat(img[:,:,np.newaxis], 3, axis=2)
    return img

def get_images(paths):
    X = []
    for p in paths:
        X.append(get_image(p))
    return X
        
def prepare_batch(X,y,batch_size=50):
    N = len(y) / batch_size
    i = 0
    yield X[:batch_size], y[:batch_size]
    while i < N:
        yield X[batch_size*i:batch_size*(i+1)], y[batch_size*i:batch_size*(i+1)]
        i = i + 1

In [12]:
# inter = list(zip(train_images, train_labels))
# random.shuffle(inter)
# images, labels = zip(*inter)
# train_data = DataSet( np.array(train_images), np.array(train_labels), reshape=False)
# del train_images, train_labels

### Loading Validation Data

In [8]:
val_labels = []
val_fns = []
val_images = []
for line in open(path + 'val/val_annotations.txt'):
  [fn, classname, _ , _, _, _ ] = line.strip().split('\t')
  
  img = scipy.ndimage.imread(path +'val/images/' + fn )
  if len(img.shape) != 3: # reshape back to RBG format
      img = np.repeat(img[:,:,np.newaxis], 3, axis=2)
      
  val_labels.append(classname)
  val_images.append(img)

# hot-encode labels from string to number
# val_labels = encoder.transform(val_labels)
val_labels = bin_encoder.transform(val_labels)

val_data = DataSet( np.array(val_images), np.array(val_labels), reshape=False)
del val_labels, val_images
val_data.num_examples

10000

In [9]:
# mini-validation set
_x_val, _y_val = val_data.next_batch(2000)
_x_val.shape, _y_val.shape

((2000, 64, 64, 3), (2000, 200))

# Data Analysis

In [10]:

# Network Parameters
n_classes = 200

def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pooling2d(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

X = tf.placeholder("float", [None, 64, 64, 3], name='x')
Y = tf.placeholder("float", [None, n_classes], name='y_true')

In [11]:
class ConvNet():
    
    def __init__(self, conv_fn, pooling_fn, activation_fn):
        self._conv_fn = conv_fn
        self._pooling_fn = pooling_fn
        self._activation_fn = activation_fn
    
    def conv_layer(self, x, n_filters, kernel_size):
        shape = [kernel_size] * 2
        shape = shape + [x.shape[-1].value, n_filters]
        
        w = weight_variable(shape)
        b = bias_variable([n_filters])
        return self._activation_fn(self._conv_fn(x, w) + b)
    
    def pool_layer(self, x, n_strides, kernel_size):
        return self._pooling_fn(x)
    
    def fc_layer(self, x, n_neurons):
        mult = lambda x, y: x * y
        
        shape = x.get_shape().as_list()[1:]
        flat_shape = reduce(mult, shape, 1)
        
        w = weight_variable([flat_shape, n_neurons])
        b = bias_variable([n_neurons])
        x_flat = tf.reshape(x, [-1, flat_shape])
        fc = self._activation_fn(tf.matmul(x_flat, w) + b)
        return fc
    
    def out_layer(self, x, n_classes):
        w = weight_variable([x.shape[-1].value, n_classes])
        b = bias_variable([n_classes])
        return tf.matmul(x, w) + b

https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/learn/python/learn/datasets/mnist.py

In [12]:
learning_rate = 1e-4

# Convolutions use a stride of one
# Plain old max pooling over 2x2 blocks
# Activation function is the ReLu function
conv_net = ConvNet(conv2d, max_pooling2d, tf.nn.relu)

# Reshape to match picture format [Height x Width x Channel]
x_image = tf.reshape(X, [-1, 64, 64, 3], name='X')

# Convolution Layer with 32 filters and a kernel size of 5
conv1 = conv_net.conv_layer(x_image, 32, 5)

# Convolution Layer with 64 filters and a kernel size of 5
conv2 = conv_net.conv_layer(conv1, 64, 5)

# Fully connected layer with 1024 neurons
fc = conv_net.fc_layer(conv2, 1024)

# Applying a dropout before the readout layer to reduce
# overfitting
keep_prob = tf.placeholder(tf.float32)
fc = tf.nn.dropout(fc, keep_prob)

# Readout layer
y_conv = conv_net.out_layer(fc, n_classes)

In [13]:
cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=y_conv, labels=Y))
optimizer     = tf.train.AdamOptimizer(learning_rate)
train_step    = optimizer.minimize(cross_entropy)

correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(Y, 1))
accuracy           = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


train_loss,train_acc  = [],[]
val_acc   , test_acc  = [],[]

In [None]:
# Training Parameters
epoch = 15             # two pass on all training_data
batch_size = 25        # in each iteration, train 50, total iteration = 100K / batch_size (in the case of 25 batch size, 4K iteration)

# Use interactive session instead
# with tf.Session() as sess:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
i = 0
for e in range(epoch):
    for img_paths, batch_y in prepare_batch(train_data_paths, train_labels, batch_size=batch_size):
        batch_x = get_images(img_paths)
        _, loss = sess.run([train_step, cross_entropy], feed_dict={X: batch_x, Y: batch_y, keep_prob: 0.5})
        train_accuracy = accuracy.eval(session=sess, feed_dict={X: batch_x, Y: batch_y, keep_prob: 1.0})
        train_acc.append(train_accuracy)
        train_loss.append(loss)
        i += 1
        if i % 2000 == 0:
            print('step %d, training accuracy %g' % (i, train_accuracy))

step 2000, training accuracy 0
step 4000, training accuracy 0
step 6000, training accuracy 0
step 8000, training accuracy 0
step 10000, training accuracy 0
step 12000, training accuracy 0
step 14000, training accuracy 0
step 16000, training accuracy 0
step 18000, training accuracy 0
step 20000, training accuracy 0
step 22000, training accuracy 0
step 24000, training accuracy 0
step 26000, training accuracy 0
step 28000, training accuracy 0


In [26]:
print('val accuracy %g' % accuracy.eval(session=sess, feed_dict={ X: val_data.images[:1000], Y: val_data.labels[:1000], keep_prob: 1.0}))
for i in range(9):
    print('val accuracy %g' % accuracy.eval(session=sess, feed_dict={ X: val_data.images[ i*1000 : (i+1)*1000 ], Y: val_data.labels[ i*1000:(i+1)*1000], keep_prob: 1.0}))

val accuracy 0.025
val accuracy 0.025
val accuracy 0.032
val accuracy 0.031
val accuracy 0.022
val accuracy 0.033
val accuracy 0.021
val accuracy 0.025
val accuracy 0.018
val accuracy 0.027


In [15]:
sess.close()