We extend our <a href="https://github.com/ilep/Personal-Training/blob/master/Tensorflow/0%20-%20Datacamp%20tutorial.ipynb">previous notebook</a> about image recognition using a basic fully connected NN to test more elaborated and accurate networks.

In [None]:
import os
import warnings
warnings.filterwarnings('ignore')
import skimage
from skimage.data import imread as skimg_imread
from skimage import transform 
from skimage.color import rgb2gray
import numpy

In [None]:
import tensorflow as tf

In [None]:
def load_data(data_directory):
    directories = [d for d in os.listdir(data_directory) 
                   if os.path.isdir(os.path.join(data_directory, d))]
    labels = []
    images = []
    for d in directories:
        label_directory = os.path.join(data_directory, d)
        file_names = [os.path.join(label_directory, f) 
                      for f in os.listdir(label_directory) 
                      if f.endswith(".ppm")]
        for f in file_names:
            images.append(skimg_imread(f))
            labels.append(int(d))
    return images, labels

data_folder = r'/Users/lantian/Desktop/Ivan/Marketing perso/Personal-Training/Tensorflow/data/Datacamp tutorial'

train_data_directory = os.path.join(data_folder, "Training")
test_data_directory = os.path.join(data_folder, "Testing")
images, labels = load_data(train_data_directory)
images28 = [transform.resize(image, (28, 28)) for image in images]
images28 = numpy.array(images28)
images28_gray = rgb2gray(images28)

#### Fully connected layer with a hidden layer

In [None]:
# Initialize placeholders 
x = tf.placeholder(dtype = tf.float32, shape = [None, 28, 28])
y = tf.placeholder(dtype = tf.int32, shape = [None])

In [None]:
images_flat = tf.contrib.layers.flatten(x)
n_hidden_rule_thumb = int((2/3)*28*28 + 62)

Rule of thumb for number of neuron in hidden layer <a href="https://www.ijcsmc.com/docs/papers/November2014/V3I11201499a19.pdf">here</a><hr>

<i>
<li>The number of hidden neurons should be in the range between the size of the input layer and the size of the output layer</li>

<li>The number of hidden neurons should be 2/3 of the input layer size, plus the size of theoutput layer</li>

<li>The number of hidden neurons should be less than twice the input layer size</li>
</i>

In [None]:
n_hidden_rule_thumb

In [None]:
hidden = tf.contrib.layers.fully_connected(images_flat, n_hidden_rule_thumb, tf.nn.relu)
logits = tf.contrib.layers.fully_connected(hidden, 62, tf.nn.relu)

In [None]:
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y, logits = logits)
loss = tf.reduce_mean(cross_entropy)
train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)

prediction = tf.argmax(logits, 1)
accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32))

#### TRAIN

In [None]:
import pandas

In [None]:
s_loss = pandas.Series(index=range(201))

tf.set_random_seed(1234)
sess = tf.Session()

sess.run(tf.global_variables_initializer())

for i in range(201):
    _, loss_value, accuracy_val = sess.run([train_op, loss, accuracy], feed_dict={x: images28_gray, y: labels})
    s_loss.loc[i] = loss_value
    if i % 10 == 0:
        # print('EPOCH', i)
        # print("Accuracy: ", accuracy_val)
        print("Loss: ", loss_value)
        pass

# sess.close()

#### TEST

In [None]:
# Import `skimage`
from skimage import transform

# Load the test data
test_images, test_labels = load_data(test_data_directory)

# Transform the images to 28 by 28 pixels
test_images28 = [transform.resize(image, (28, 28)) for image in test_images]

# Convert to grayscale
from skimage.color import rgb2gray
test_images28_gray = rgb2gray(numpy.array(test_images28))

# Run predictions against the full test set.
predicted = sess.run(prediction, feed_dict={x: test_images28_gray})

# Calculate correct matches 
match_count = sum([int(y == y_) for y, y_ in zip(test_labels, predicted)])

# Calculate the accuracy
accuracy = match_count / len(test_labels)

# Print the accuracy
print("Accuracy: {:.3f}".format(accuracy))

Only a disappointing improvement from 54.4% <a href="https://github.com/ilep/Personal-Training/blob/master/Tensorflow/0%20-%20Datacamp%20tutorial.ipynb">previously...</a>

https://towardsdatascience.com/a-guide-to-an-efficient-way-to-build-neural-network-architectures-part-ii-hyper-parameter-42efca01e5d7

#### CNN 

Based on this <a href="https://www.datacamp.com/community/tutorials/cnn-tensorflow-python">tuto</a>, we build a CNN. Others tutos on CNN: 
<li><a href="https://www.kaggle.com/pouryaayria/convolutional-neural-networks-tutorial-tensorflow">this one from kaggle</a></li>
<li><a href="https://ujjwalkarn.me/2016/08/11/intuitive-explanation-convnets/">An intuitive explanation of CNN</a></li>

In [4]:
images28_gray.shape

In [5]:
images28_gray = images28_gray.reshape(-1, 28, 28, 1)

In [6]:
images28_gray.shape

(4575, 28, 28, 1)

In this case we apply <a href="https://www.tensorflow.org/api_docs/python/tf/contrib/layers/one_hot_encoding">one hot encoding</a> on our labels vectors.  

In [7]:
from tensorflow.contrib.layers import one_hot_encoding

In [8]:
labels_oh = one_hot_encoding(labels, num_classes=len(numpy.unique(labels)))

In [9]:
type(labels_oh)

tensorflow.python.framework.ops.Tensor

As I get a Tensor (eval in session to extract the numpy array) and want to stick with tuto, I am instead using sklearn for encoding:

In [10]:
# import preprocessing from sklearn
from sklearn import preprocessing

In [11]:
enc = preprocessing.OneHotEncoder()
labels_oh = enc.fit_transform(numpy.reshape(labels,(len(labels),1)))

In [12]:
labels_oh[0,:]

<1x62 sparse matrix of type '<class 'numpy.float64'>'
	with 1 stored elements in Compressed Sparse Row format>

In [13]:
labels_oh = labels_oh.toarray()

In [14]:
# labels_oh[[0,137, 4300],:]

Now that our data is reshaped, we head over the following CNN network: 
<img src="http://res.cloudinary.com/dyd911kmh/image/upload/f_auto,q_auto:best/v1512486717/fashion-mnist-architecture_htbpsz.png"/>
<i>(output in our case is 62 units)</i>

In [15]:
training_iters = 200 
learning_rate = 0.001 
batch_size = 128

<i>The batch size means that your training images will be divided in a fixed batch size and at every batch it will take a fixed number of images and train them. It's recommended to use a batch size in the power of 2, since the number of physical processor is often a power of 2, using a number of virtual processor different from a power of 2 leads to poor performance. Also, taking a very large batch size can lead to memory errors so you have to make sure that the machine you run your code on has sufficient RAM to handle specified batch size.</i>

In [16]:
# (img shape: 28*28)
n_input = 28
n_classes = 62

In [17]:
# both placeholders are of type float
x = tf.placeholder("float", [None, 28,28,1]) 
y = tf.placeholder("float", [None, n_classes])

"None" means that the first dimensions (number of rows <=> number of images) will be computed when batch data will be fed by the placeholders.

In [18]:
def conv2d(x, W, b, strides=1):
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

x and W are necessary 4-D Tensors, according to <a href="https://www.tensorflow.org/api_docs/python/tf/nn/conv2d">doc</a>

<a href="https://stackoverflow.com/questions/37674306/what-is-the-difference-between-same-and-valid-padding-in-tf-nn-max-pool-of-t">Difference between SAME and VALID</a> padding in conv2d

In [19]:
def maxpool2d(x, k=2):
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],padding='SAME')

In [20]:
weights = {
    'wc1': tf.get_variable('W0', shape=(3,3,1,32), initializer=tf.contrib.layers.xavier_initializer()), 
    'wc2': tf.get_variable('W1', shape=(3,3,32,64), initializer=tf.contrib.layers.xavier_initializer()), 
    'wc3': tf.get_variable('W2', shape=(3,3,64,128), initializer=tf.contrib.layers.xavier_initializer()), 
    'wd1': tf.get_variable('W3', shape=(4*4*128,128), initializer=tf.contrib.layers.xavier_initializer()), 
    'out': tf.get_variable('W6', shape=(128,n_classes), initializer=tf.contrib.layers.xavier_initializer()), 
}
biases = {
    'bc1': tf.get_variable('B0', shape=(32), initializer=tf.contrib.layers.xavier_initializer()),
    'bc2': tf.get_variable('B1', shape=(64), initializer=tf.contrib.layers.xavier_initializer()),
    'bc3': tf.get_variable('B2', shape=(128), initializer=tf.contrib.layers.xavier_initializer()),
    'bd1': tf.get_variable('B3', shape=(128), initializer=tf.contrib.layers.xavier_initializer()),
    'out': tf.get_variable('B4', shape=(10), initializer=tf.contrib.layers.xavier_initializer()),
}

In [21]:
conv1 = conv2d(x, weights['wc1'], biases['bc1'])

In [22]:
conv1 = maxpool2d(conv1, k=2)

In [26]:
conv1

<tf.Tensor 'MaxPool:0' shape=(?, 14, 14, 32) dtype=float32>

In [23]:
conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])

In [24]:
conv2 = maxpool2d(conv2, k=2)

In [25]:
conv2

<tf.Tensor 'MaxPool_1:0' shape=(?, 7, 7, 64) dtype=float32>

In [27]:
conv3 = conv2d(conv2, weights['wc3'], biases['bc3'])

In [28]:
conv3 = maxpool2d(conv3, k=2)

In [29]:
conv3

<tf.Tensor 'MaxPool_2:0' shape=(?, 4, 4, 128) dtype=float32>

In [30]:
weights['wd1']

<tf.Variable 'W3:0' shape=(2048, 128) dtype=float32_ref>

In [31]:
fc1 = tf.reshape(conv3, [-1, weights['wd1'].get_shape().as_list()[0]])

In [32]:
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])

In [33]:
fc1 = tf.nn.relu(fc1)

In [35]:
fc1

<tf.Tensor 'Relu_3:0' shape=(?, 128) dtype=float32>

In [34]:
# Output, class prediction
# finally we multiply the fully connected layer with the weights and add a bias term. 
# out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])

ValueError: Dimensions must be equal, but are 62 and 10 for 'Add_1' (op: 'Add') with input shapes: [?,62], [10].

In [None]:
def conv_net(x, weights, biases):  

    # here we call the conv2d function we had defined above and pass the input image x, weights wc1 and bias bc1.
    conv1 = conv2d(x, weights['wc1'], biases['bc1'])
    # Max Pooling (down-sampling), this chooses the max value from a 2*2 matrix window and outputs a 14*14 matrix.
    conv1 = maxpool2d(conv1, k=2)

    # Convolution Layer
    # here we call the conv2d function we had defined above and pass the input image x, weights wc2 and bias bc2.
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
    # Max Pooling (down-sampling), this chooses the max value from a 2*2 matrix window and outputs a 7*7 matrix.
    conv2 = maxpool2d(conv2, k=2)

    conv3 = conv2d(conv2, weights['wc3'], biases['bc3'])
    # Max Pooling (down-sampling), this chooses the max value from a 2*2 matrix window and outputs a 4*4.
    conv3 = maxpool2d(conv3, k=2)


    # Fully connected layer
    # Reshape conv2 output to fit fully connected layer input
    fc1 = tf.reshape(conv3, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    # Output, class prediction
    # finally we multiply the fully connected layer with the weights and add a bias term. 
    out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
    return out

In [None]:
pred = conv_net(x, weights, biases)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

In [None]:
#Here you check whether the index of the maximum value of the predicted image is equal to the actual labelled image. and both will be a column vector.
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))

#calculate accuracy across all the given images and average them out. 
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [None]:
# Initializing the variables
init = tf.global_variables_initializer()

Attention mes images ne sont pas shufflées. 

In [None]:
with tf.Session() as sess:
    sess.run(init) 
    train_loss = []
    test_loss = []
    train_accuracy = []
    test_accuracy = []
    summary_writer = tf.summary.FileWriter('./Output', sess.graph)
    for i in range(training_iters):
        for batch in range(len(train_X)//batch_size):
            batch_x = train_X[batch*batch_size:min((batch+1)*batch_size,len(train_X))]
            batch_y = train_y[batch*batch_size:min((batch+1)*batch_size,len(train_y))]    
            # Run optimization op (backprop).
                # Calculate batch loss and accuracy
            opt = sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
            loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x, y: batch_y})
        
        print("Iter " + str(i) + ", Loss= " + \
                      "{:.6f}".format(loss) + ", Training Accuracy= " + \
                      "{:.5f}".format(acc))
        
        print("Optimization Finished!")

        # Calculate accuracy for all 10000 mnist test images
        test_acc,valid_loss = sess.run([accuracy,cost], feed_dict={x: test_X,y : test_y})
        train_loss.append(loss)
        test_loss.append(valid_loss)
        train_accuracy.append(acc)
        test_accuracy.append(test_acc)
        print("Testing Accuracy:","{:.5f}".format(test_acc))
    summary_writer.close()