## Classification with simple convnet
-------------

In [None]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range

In [None]:
pickle_file = 'esc10.pickle'

with open(pickle_file, 'rb') as f:
  save = pickle.load(f)
  dataset = save['dataset']
  labels = save['labels']
  del save  # hint to help gc free up memory
  print('Dataset', dataset.shape, labels.shape)

Reformat into a TensorFlow-friendly shape:
- convolutions need the image data formatted as a cube (width by height by #channels)
- labels as float 1-hot encodings.

In [None]:
image_width = dataset.shape[1]
image_height = dataset.shape[2]
num_labels = 10
num_channels = 1 # grayscale

import numpy as np

def reformat(dataset, labels):
  dataset = dataset.reshape(
    (-1, image_width, image_height, num_channels)).astype(np.float32)
  labels = (np.unique(labels) == labels[:,None]).astype(np.float32)
  return dataset, labels
dataset, labels = reformat(dataset, labels)
print('Dataset', dataset.shape, labels.shape)

In [None]:
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

I know that KFold is not the best thing for deep learning, but the original paper with ESC dataset use it, so either do I.

In [None]:
#from sklearn.cross_validation import train_test_split
#train_dataset, test_dataset, train_labels, test_labels = train_test_split(dataset, labels, test_size=0.1)

In [None]:
from sklearn.model_selection import KFold
KFolds_num = 10
kf = KFold(n_splits=KFolds_num, shuffle=True)
kf.get_n_splits(dataset, labels)

In [None]:
batch_size = 16
d_rate = 0.5
l2_penalty = 0.01
test_sz = dataset.shape[0] / KFolds_num
# test_sz = dataset.shape[0] * 0.1

graph = tf.Graph()


with graph.as_default():

    # Input data.
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_width, image_height, num_channels))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_test_dataset = tf.placeholder(tf.float32, shape=(test_sz, image_width, image_height, num_channels))

    # Variables.
    layer1_weights = tf.Variable(tf.truncated_normal([3, 3, 1, 16], stddev=0.1))
    layer1_biases  = tf.Variable(tf.zeros([16]))
    
    layer2_weights = tf.Variable(tf.truncated_normal([3, 3, 16, 32], stddev=0.1))
#     layer2_biases  = tf.Variable(tf.constant(1.0, shape=[16]))
    layer2_biases  = tf.Variable(tf.zeros([32]))

    layer3_weights = tf.Variable(tf.truncated_normal([3, 3, 32, 64], stddev=0.1))
#     layer3_biases  = tf.Variable(tf.constant(1.0, shape=[32]))
    layer3_biases  = tf.Variable(tf.zeros([64]))
    
    layer4_weights = tf.Variable(tf.truncated_normal([1024, 128], stddev=0.1))
#     layer4_biases  = tf.Variable(tf.constant(1.0, shape=[1024]))
    layer4_biases  = tf.Variable(tf.zeros([128]))
    
    layer5_weights = tf.Variable(tf.truncated_normal([128, 128], stddev=0.1))
#     layer5_biases  = tf.Variable(tf.constant(1.0, shape=[64]))
    layer5_biases  = tf.Variable(tf.zeros([128]))
    
    layer6_weights = tf.Variable(tf.truncated_normal([128, num_labels], stddev=0.1))
#     layer6_biases  = tf.Variable(tf.constant(1.0, shape=[num_labels]))
    layer6_biases  = tf.Variable(tf.zeros([num_labels]))

    # Model
    def model(data, is_training=False):
        drop_rate = d_rate if is_training else 0.0
        
        conv   = tf.nn.relu(tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME') + layer1_biases)
        hidden = tf.nn.max_pool(conv, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
        hidden = tf.nn.local_response_normalization(hidden)
        
        hidden = tf.nn.relu(tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='SAME') + layer2_biases)
        hidden = tf.nn.max_pool(hidden, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
        hidden = tf.nn.local_response_normalization(hidden)
        
        hidden = tf.nn.relu(tf.nn.conv2d(hidden, layer3_weights, [1, 2, 2, 1], padding='SAME') + layer3_biases)
        hidden = tf.nn.max_pool(hidden, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
        hidden = tf.nn.local_response_normalization(hidden)
        
        hidden = tf.nn.relu(tf.matmul(tf.contrib.layers.flatten(hidden), layer4_weights) + layer4_biases)
        hidden = tf.layers.dropout(hidden, rate=drop_rate)
        hidden = tf.nn.relu(tf.matmul(hidden, layer5_weights) + layer5_biases)
        hidden = tf.layers.dropout(hidden, rate=drop_rate)
        return tf.matmul(hidden, layer6_weights) + layer6_biases
    
    # Regularization
    l2_loss = l2_penalty * (tf.nn.l2_loss(layer1_weights) + 
                            tf.nn.l2_loss(layer2_weights) +
                            tf.nn.l2_loss(layer3_weights) +
                            tf.nn.l2_loss(layer4_weights) +
                            tf.nn.l2_loss(layer5_weights) +
                            tf.nn.l2_loss(layer6_weights))
    
    # Training computation.
    logits = model(tf_train_dataset, is_training=True)
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))
#     loss = tf.add(loss, l2_loss)

    # Optimizer.
    global_step = tf.Variable(0, trainable=False)
    learning_rate = 1e-3
    decay_steps = 100
    learning_rate = tf.train.natural_exp_decay(learning_rate, global_step, decay_steps, 0.25, staircase=True)
#     optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
#     optimizer = tf.train.AdadeltaOptimizer(learning_rate).minimize(loss)
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step)

    # Predictions for the training, validation, and test data.
    train_prediction = tf.nn.softmax(logits)
    test_prediction = tf.nn.softmax(model(tf_test_dataset, is_training=False))

In [None]:
num_steps = 501
acc = []

for train, test in kf.split(dataset, labels):
    train_dataset, train_labels = dataset[train], labels[train]
    test_dataset, test_labels = dataset[test], labels[test]
    with tf.Session(graph=graph) as session:
      test_list = []
      tf.global_variables_initializer().run()
      print('Initialized')
      print('step \t error \t tr_acc  ts_acc')
      for step in range(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 2 == 0):
          test_list.append(accuracy(test_prediction.eval(feed_dict={tf_test_dataset : test_dataset}), test_labels))
          print('%d \t %.3f \t %.1f \t %.1f'
                % (step, l, accuracy(predictions, batch_labels), test_list[-1]))

    acc.append(max(test_list))  
    print('Best test accuracy: %f%%' % acc[-1])
      

In [None]:
print(acc, np.mean(acc))