[View in Colaboratory](https://colab.research.google.com/github/joaoflf/ml-playground/blob/master/mobilenets_dogs.ipynb)

## Implementation of the Mobilenets paper

This is an implementation of the mobilenets architecture in tensorflow, using the Stanford Dogs Dataset to test its effectiveness and accuracy

----

Package imports and Tensorboard setup

In [0]:
#Kill Colab VM (if needed)
#!kill -9 -1

import os
if not os.path.exists('./colab_utils'):
  !git clone https://github.com/mixuala/colab_utils.git
    
import colab_utils.tboard
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from scipy.io import loadmat
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

now = datetime.utcnow().strftime('%Y%m%d%H%M%S')
ROOT = %pwd
root_log_dir = os.path.join(ROOT, 'tf_logs')
colab_utils.tboard.launch_tensorboard( bin_dir=ROOT, log_dir=root_log_dir )

Download, extract and load dataset

In [0]:

if not os.path.exists('./data'):
  !mkdir data
  !wget -O ./data/data.tar http://vision.stanford.edu/aditya86/ImageNetDogs/lists.tar
  !tar xvf ./data/data.tar -C ./data/
  !wget -O ./data/images.tar http://vision.stanford.edu/aditya86/ImageNetDogs/images.tar
  !tar xvf ./data/images.tar -C ./data/
  

In [0]:
train_file = loadmat('./data/train_list.mat')
train_list = train_file['file_list'].flatten()
train_list = ['./data/Images/' + val for sublist in train_list for val in sublist]
train_list = train_list[:len(train_list)//10]
train_labels = train_file['labels'].astype(int).flatten()
train_labels = train_labels[:len(train_list)] 

In [0]:
def create_variable( shape, name,):
  initializer = tf.contrib.layers.xavier_initializer()
  var = tf.Variable(initializer(shape=shape), name= name)
  return var

def depthwise_conv(input, name, channels, stride):

  W = create_variable([3, 3, channels, 1], name='dw_weight')
  tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, W)
  b = create_variable([channels], name='dw_bias')
  
  conv = tf.nn.bias_add(tf.nn.depthwise_conv2d(input, W, strides=stride, padding='SAME'), b, name='dw_conv')
  conv = tf.nn.relu6(tf.layers.batch_normalization(conv))
  return conv

def pointwise_conv(input, name, channels_in, channels_out):

  W = create_variable([1, 1, channels_in, channels_out], name='pw_weight')
  tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, W)
  b = create_variable([channels_out], name='pw_bias')
  
  conv = tf.nn.bias_add(tf.nn.conv2d(input, W, strides=[1,1,1,1], padding='SAME'), b, name='pw_conv')
  conv = tf.nn.relu6(tf.layers.batch_normalization(conv))
  return conv

def dw_separable_conv(input, name, channels_in, channels_out, stride):
  dw_conv = depthwise_conv(input, name, channels_in, stride)
  pw_conv = pointwise_conv(dw_conv, name, channels_in, channels_out)
  return pw_conv

In [0]:
conv_layers = [
    {
        'name': 'sep_dw_1',
        'channels_in': 32,
        'channels_out': 64,
        'stride': 1
    },
    {
        'name': 'sep_dw_2',
        'channels_in': 64,
        'channels_out': 128,
        'stride': 2
    },
    {
        'name': 'sep_dw_3',
        'channels_in': 128,
        'channels_out': 128,
        'stride': 1
    },
    {
        'name': 'sep_dw_4',
        'channels_in': 128,
        'channels_out': 256,
        'stride': 2
    },
    {
        'name': 'sep_dw_5',
        'channels_in': 256,
        'channels_out': 256,
        'stride': 1
    },
    {
        'name': 'sep_dw_6',
        'channels_in': 256,
        'channels_out': 512,
        'stride': 2
    },
    {
        'name': 'sep_dw_7',
        'channels_in': 512,
        'channels_out': 512,
        'stride': 1
    },
    {
        'name': 'sep_dw_8',
        'channels_in': 512,
        'channels_out': 512,
        'stride': 1
    },
    {
        'name': 'sep_dw_9',
        'channels_in': 512,
        'channels_out': 512,
        'stride': 1
    },
    {
        'name': 'sep_dw_10',
        'channels_in': 512,
        'channels_out': 512,
        'stride': 1
    },
    {
        'name': 'sep_dw_11',
        'channels_in': 512,
        'channels_out': 512,
        'stride': 1
    },
    {
        'name': 'sep_dw_12',
        'channels_in': 512,
        'channels_out': 1024,
        'stride': 2
    },
    {
        'name': 'sep_dw_13',
        'channels_in': 1024,
        'channels_out': 1024,
        'stride': 1,
    }
]

In [0]:
tf.reset_default_graph()
learning_rate=0.01
momentum = 0.9
num_epochs = 40
batch_size = 100

# Parse fn to read, decode and resize image, and return both image and label to dataset
def _parse_function(filename, label):
  image_string = tf.read_file(filename)
  image_decoded = tf.image.decode_jpeg(image_string, channels=3)
  image_resized = tf.image.resize_images(image_decoded, [224, 224])
  return image_resized, label

graph = tf.Graph()
with graph.as_default():
  filenames = tf.placeholder(tf.string, shape=[None])
  labels = tf.placeholder(tf.int64, shape=[None])

  # Build dataset, feed list of filenames, labels, shuffle, batch, repeat and iterate.
  dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
  dataset = dataset.map(_parse_function)
  dataset = dataset.shuffle(buffer_size=10000)
  dataset = dataset.batch(batch_size)
  dataset = dataset.repeat(num_epochs)
  iterator = dataset.make_initializable_iterator()

  #get outputs of each iteration to feed into model
  next_image, next_label = iterator.get_next()
  layers = []

  #initial conv layer
  init_conv_w = create_variable([3, 3, 3, 32],'initial_conv_w')
  init_conv_b = create_variable([32],'initial_conv_b')
  conv_1= tf.nn.relu(tf.layers.batch_normalization(tf.nn.bias_add(tf.nn.conv2d(next_image, init_conv_w, [1, 2, 2, 1], padding='SAME'),init_conv_b, name='initial_conv')))

  #depthwise separable conv layers
  layers.append(conv_1)

  for layer in conv_layers:
    with tf.variable_scope(layer['name']):
      dw_sep_conv = dw_separable_conv(layers[len(layers)-1], layer['name'], layer['channels_in'], layer['channels_out'], [1,layer['stride'],layer['stride'],1])
      layers.append(dw_sep_conv)

  #Avg pool, FC and Softmax layers
  avg_pool = tf.nn.avg_pool(layers[len(layers)-1], ksize=[1, 7, 7, 1], strides=[1, 1, 1, 1], padding='VALID', name='avg_pool')
  avg_pool_flat = tf.reshape(avg_pool, [-1, 1024])

  fc1_weights = create_variable([1024,120], 'fc1_w')
  fc1_bias = create_variable([120],'fc1_b')
  fc1 = tf.nn.bias_add(tf.matmul(avg_pool_flat, fc1_weights), fc1_bias, name='fc1')

  softmax = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=fc1, labels=next_label, name='softmax')

  #loss and accuracy 
  loss = tf.reduce_mean(softmax, name='loss') 
  loss_summary = tf.summary.scalar('Loss', loss)

  optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate, momentum=momentum).minimize(loss)

  pred_class = tf.argmax(fc1,1)
  actual_class = next_label

  correct_pred = tf.equal(pred_class, actual_class)
  accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

  init = tf.global_variables_initializer()


In [15]:

class Log_Hook(tf.train.SessionRunHook):
  def __init__(self, loss, accuracy):
    self.loss = loss
    self.accuracy =accuracy

  def begin(self):
    self.step = 0
    pass

  def before_run(self, run_context):
    self.step += 1
    return tf.train.SessionRunArgs([self.loss, self.accuracy])  

  def after_run(self, run_context, run_values):
    if (self.step % 100 == 0):
      loss_value, acc_value = run_values.results
      print("Iteration: " + str(self.step) + ", Minibatch Loss= " + "{:.3f}".format(loss_value) + ", Training Accuracy= " + "{:.3f}".format(acc_value))
      

with graph.as_default():
  log_hook = Log_Hook(loss, accuracy)

  now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
  logdir = "{}/run-{}/".format(root_log_dir, now)

  summary_hook = tf.train.SummarySaverHook(save_steps=1, output_dir=logdir, summary_op=tf.summary.merge_all())
  checkpoint_hook = tf.train.CheckpointSaverHook('./checkpoints', save_steps=100)


INFO:tensorflow:Create CheckpointSaverHook.


In [0]:
with graph.as_default():
  with tf.train.MonitoredTrainingSession(hooks=[log_hook]) as sess:
    sess.run(iterator.initializer, feed_dict={filenames: train_list, labels:train_labels})
    while not sess.should_stop():
      sess.run(optimizer)

  print('Done!')


INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Iteration: 100, Minibatch Loss= 2.572, Training Accuracy= 0.070
