[View in Colaboratory](https://colab.research.google.com/github/joaoflf/ml-playground/blob/master/mobilenetv2_dogs.ipynb)

## Train Stanford Dogs Dataset on MobilenetV2

Play with TensorFlow transfer learning capabilities, its Dataset API and train the model using MonitoredTrainingSession


---

Download data and Mobilenets model

In [0]:
#Kill Colab VM (if needed)
#!kill -9 -1

import os
if not os.path.exists('./colab_utils'):
  !git clone https://github.com/mixuala/colab_utils.git

if not os.path.exists('./models'):
  !git clone https://github.com/tensorflow/models/
  # setup path
  import sys
  sys.path.append('/content/models/research/slim')
  
if not os.path.exists('./data'):
  !mkdir data
  !wget -O ./data/data.tar http://vision.stanford.edu/aditya86/ImageNetDogs/lists.tar
  !tar xvf ./data/data.tar -C ./data/
  !wget -O ./data/images.tar http://vision.stanford.edu/aditya86/ImageNetDogs/images.tar
  !tar xvf ./data/images.tar -C ./data/
if not os.path.exists('./mobilenets'):
  !mkdir mobilenets
  !wget -O ./mobilenets/mobilenet_v2.tgz https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz
  !tar zxvf ./mobilenets/mobilenet_v2.tgz -C ./mobilenets

Import packages and setup TensorBoard

In [8]:
import colab_utils.tboard
import tensorflow as tf
from nets.mobilenet import mobilenet_v2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from scipy.io import loadmat
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import time
from pprint import pprint
import scipy.misc
from glob import glob

ROOT = %pwd
root_log_dir = os.path.join(ROOT, 'tf_logs')
colab_utils.tboard.launch_tensorboard( bin_dir=ROOT, log_dir=root_log_dir )

ngrok installed
status: tensorboard=True, ngrok=False
tensorboard url= http://8594a1db.ngrok.io


'http://8594a1db.ngrok.io'

Load image filenames and labels

In [0]:
train_file = loadmat('./data/train_list.mat')
train_list = train_file['file_list'].flatten()
train_list = ['./data/Images/' + val for sublist in train_list for val in sublist]
train_list = train_list[:len(train_list)//10]
train_labels = train_file['labels'].astype(int).flatten()
train_labels = train_labels[:len(train_list)]

Build the graph, using the dataset API to feed the images

In [0]:
tf.reset_default_graph()

learning_rate=0.01
momentum = 0.9
num_epochs = 40
batch_size = 100

# Parse fn to read, decode and resize image, and return both image and label to dataset
def _parse_function(filename, label):
  image_string = tf.read_file(filename)
  image_decoded = tf.image.decode_jpeg(image_string, channels=3)
  image_resized = tf.image.resize_images(image_decoded, [224, 224])
  return image_resized, label

graph = tf.Graph()
with graph.as_default():
  filenames = tf.placeholder(tf.string, shape=[None])
  labels = tf.placeholder(tf.int64, shape=[None])

  # Build dataset, feed list of filenames, labels, shuffle, batch, repeat and iterate.
  dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
  dataset = dataset.map(_parse_function)
  dataset = dataset.shuffle(buffer_size=10000)
  dataset = dataset.batch(batch_size)
  dataset = dataset.repeat(num_epochs)
  iterator = dataset.make_initializable_iterator()

  #get outputs of each iteration to feed into model
  next_image, next_label = iterator.get_next()

  #import mobilenet graph, feeding in the image batch as input
  with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=True)):
    logits, endpoints = mobilenet_v2.mobilenet(next_image, num_classes=120)

  #feed in logits from mobilenet and the labels batch to run softmax and calculate loss
  softmax = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=next_label, name='softmax')
  loss = tf.reduce_mean(softmax, name='loss')
  loss_summary = tf.summary.scalar('Loss', loss)

  #optimizing operation
  global_step = tf.train.get_or_create_global_step()
  optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss,global_step=global_step)

  #accuracy operation
  correct_pred = tf.equal(tf.argmax(logits,1), next_label)
  accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
  accuracy_summary = tf.summary.scalar('Accuracy', accuracy)

  saver = tf.train.Saver()
  init = tf.global_variables_initializer()

Hooks to run in MonitoredTrainingSession. They include logging, writting summary to TensorBoard and saving checkpoints

In [70]:

class Log_Hook(tf.train.SessionRunHook):
  def __init__(self, loss, accuracy):
    self.loss = loss
    self.accuracy =accuracy

  def begin(self):
    self.step = 0
    pass

  def before_run(self, run_context):
    self.step += 1
    return tf.train.SessionRunArgs([self.loss, self.accuracy])  

  def after_run(self, run_context, run_values):
    if (self.step % 100 == 0):
      loss_value, acc_value = run_values.results
      print("Iteration: " + str(self.step) + ", Minibatch Loss= " + "{:.3f}".format(loss_value) + ", Training Accuracy= " + "{:.3f}".format(acc_value))
      
with graph.as_default():
  log_hook = Log_Hook(loss, accuracy)

  now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
  logdir = "{}/run-{}/".format(root_log_dir, now)

  summary_hook = tf.train.SummarySaverHook(save_steps=1, output_dir=logdir, summary_op=tf.summary.merge_all())
  checkpoint_hook = tf.train.CheckpointSaverHook('./checkpoints', save_steps=100)


INFO:tensorflow:Create CheckpointSaverHook.


Run training session on 40 epochs and batches of 100

In [71]:
with graph.as_default():
  with tf.train.MonitoredTrainingSession(hooks=[log_hook, summary_hook, checkpoint_hook]) as sess:
    sess.run(iterator.initializer, feed_dict={filenames: train_list, labels:train_labels})
    while not sess.should_stop():
      sess.run(optimizer)

INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into ./checkpoints/model.ckpt.
Iteration: 100, Minibatch Loss= 1.732, Training Accuracy= 0.430
INFO:tensorflow:Saving checkpoints for 100 into ./checkpoints/model.ckpt.
Iteration: 200, Minibatch Loss= 1.129, Training Accuracy= 0.650
INFO:tensorflow:Saving checkpoints for 200 into ./checkpoints/model.ckpt.
Iteration: 300, Minibatch Loss= 0.681, Training Accuracy= 0.770
INFO:tensorflow:Saving checkpoints for 300 into ./checkpoints/model.ckpt.
Iteration: 400, Minibatch Loss= 0.245, Training Accuracy= 0.910
INFO:tensorflow:Saving checkpoints for 400 into ./checkpoints/model.ckpt.
INFO:tensorflow:Saving checkpoints for 479 into ./checkpoints/model.ckpt.


Test with an image with a Beagle pulled from the web. And... success!!

In [72]:
if not os.path.exists('./test'):
  !mkdir test
  !wget -O ./test/test_image.jpg https://www.what-dog.net/Images/faces2/scroll001.jpg
    
test_list = np.array(['./test/test_image.jpg'])

with graph.as_default():
  with tf.Session() as sess:
    saver.restore(sess, './checkpoints/model.ckpt-479')
    sess.run(iterator.initializer, feed_dict={filenames: test_list, labels:[1]})
    logits_output = sess.run(logits)
    idx = int(np.argmax(logits_output,1))
    print(idx)

INFO:tensorflow:Restoring parameters from ./checkpoints/model.ckpt-479
8
