# Dog Breed Identification
Cameron Cruz

In [1]:
import logging
import os
import json
import tensorflow as tf
import pandas as pd
import numpy as np
import cv2 as cv
from tqdm import trange
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split

  return f(*args, **kwds)
  return f(*args, **kwds)


### Utility functions

In [2]:
def set_logger(log_path):
    """Sets the logger to log info in terminal and file `log_path`.
    In general, it is useful to have a logger so that every output to the terminal is saved
    in a permanent file. Here we save it to `model_dir/train.log`.
    Example:
    ```
    logging.info("Starting training...")
    ```
    Args:
        log_path: (string) where to log
    """
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    if not logger.handlers:
        # Logging to a file
        file_handler = logging.FileHandler(log_path)
        file_handler.setFormatter(logging.Formatter('%(asctime)s:%(levelname)s: %(message)s'))
        logger.addHandler(file_handler)

        # Logging to console
        stream_handler = logging.StreamHandler()
        stream_handler.setFormatter(logging.Formatter('%(message)s'))
        logger.addHandler(stream_handler)


def save_dict_to_json(d, json_path):
    """Saves dict of floats in json file
    Args:
        d: (dict) of float-castable values (np.float, int, float, etc.)
        json_path: (string) path to json file
    """
    with open(json_path, 'w') as f:
        # We need to convert the values to float for json (it doesn't accept np.array, np.float, )
        d = {k: float(v) for k, v in d.items()}
        json.dump(d, f, indent=4)

### Load training set image ids and labels

In [3]:
data = pd.read_csv('data/labels.csv')
data.count()

id       10222
breed    10222
dtype: int64

### Fit One-hot Labels

In [4]:
lb = LabelBinarizer()
with open('classes.txt', 'r') as f:
    classes = [line.strip() for line in f]

lb.fit_transform(classes)

array([[1, 0, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 0, 1, 0],
       [0, 0, 0, ..., 0, 0, 1]])

### Create train-eval split (90-10)

In [5]:
file_ids = data['id'].tolist()
labels = data['breed'].tolist()

one_hot_labels = lb.transform(labels)

X_train, X_val, y_train, y_val = train_test_split(file_ids, one_hot_labels,
                                                   test_size=0.05,
                                                   random_state=42)

### Set params and logger

In [6]:
image_size = 64
num_classes = len(classes)
learning_rate = 1e-4
batch_size = 32
num_epochs = 25
train_size = len(X_train)
eval_size = len(X_val)
model_dir = 'model'
set_logger(os.path.join(model_dir, 'train.log'))

### Define parse_fn to get image from a file_id

In [7]:
def parse_fn(file_id, label):
    image_string = tf.read_file('data' + os.sep + 'train' + os.sep
                                + file_id + '.jpg')
    img = tf.image.decode_jpeg(image_string, channels=3)
    img = tf.image.convert_image_dtype(img, tf.float32)
    img = tf.image.resize_images(img, [image_size, image_size],
                                method=tf.image.ResizeMethod.AREA)
    return img, label

### Define function for real-time data augmentation during training

In [8]:
def train_fn(img, label):
    img = tf.image.random_flip_left_right(img) # Random horizontal flip
    img = tf.image.random_brightness(img, max_delta=32.0 / 255.0) # Random brightness
    img = tf.image.random_saturation(img, lower=0.5, upper=1.5) # Random saturation
    img = tf.clip_by_value(img, 0.0, 1.0) # Ensure values are still within [0, 1]
    return img, label

### Define input_fn to create data pipeline

In [9]:
def input_fn(mode, X, Y):
    assert len(X) == len(Y)
    
    num_samples = len(X)

    if mode == "train":
        dataset = (tf.data.Dataset.from_tensor_slices((tf.constant(X),
                                                       tf.constant(Y)))
                   .shuffle(num_samples)
                   .map(parse_fn, num_parallel_calls=4)
                   .map(train_fn, num_parallel_calls=4)
                   .batch(batch_size)
                   .prefetch(1)
                  )
    else:
        dataset = (tf.data.Dataset.from_tensor_slices((tf.constant(X),
                                                      tf.constant(Y)))
                  .map(parse_fn)
                  .batch(batch_size)
                  .prefetch(1)
                  )
    
    iterator = dataset.make_initializable_iterator()
    imgs, labls = iterator.get_next()
    iterator_init_op = iterator.initializer
    
    inputs = {'images': imgs, 'labels': labls,
              'iterator_init_op': iterator_init_op}
    return inputs

### Define network layers

In [10]:
def build_model(is_training, inputs):
    images = inputs['images']

    #assert images.get_shape().as_list() == [None, image_size, image_size, 3]

    out = images
    
    num_channels = 16
    bn_momentum = 0.90
    channels = [num_channels, num_channels * 2, num_channels * 4, num_channels * 8]
    for i, c in enumerate(channels):
        with tf.variable_scope('block_{}'.format(i+1)):
            out = tf.layers.conv2d(out, c, 3, padding='same')
            out = tf.layers.batch_normalization(out, momentum=bn_momentum, training=is_training)
            out = tf.nn.relu(out)
            out = tf.layers.max_pooling2d(out, 2, 2)

    assert out.get_shape().as_list() == [None, 4, 4, num_channels * 8]

    out = tf.reshape(out, [-1, 4 * 4 * num_channels * 8])
    with tf.variable_scope('fc_1'):
        out = tf.layers.dense(out, num_channels * 8)
        out = tf.layers.batch_normalization(out, momentum=bn_momentum, training=is_training)
        #out = tf.layers.dropout(out, rate=0.2, training=is_training)
        out = tf.nn.relu(out)
    with tf.variable_scope('fc_2'):
        logits = tf.layers.dense(out, num_classes)

    return logits

### Define graph ops

In [11]:
def model_fn(mode, inputs, reuse=False):
    is_training = (mode == 'train')
    labels = inputs['labels']

    # -----------------------------------------------------------
    # MODEL: define the layers of the model
    with tf.variable_scope('model', reuse=reuse):
        # Compute the output distribution of the model and the predictions
        logits = build_model(is_training, inputs)
        predictions = tf.argmax(logits, 1)

    # Define loss and accuracy
    loss = tf.losses.softmax_cross_entropy(onehot_labels=labels, logits=logits)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(labels, 1), predictions), tf.float32))

    # Define training step that minimizes the loss with the Adam optimizer
    if is_training:
        optimizer = tf.train.AdamOptimizer(learning_rate)
        global_step = tf.train.get_or_create_global_step()
        # Add a dependency to update the moving mean and variance for batch normalization
        with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
            train_op = optimizer.minimize(loss, global_step=global_step)


    # -----------------------------------------------------------
    # METRICS AND SUMMARIES
    # Metrics for evaluation using tf.metrics (average over whole dataset)
    with tf.variable_scope("metrics"):
        metrics = {
            'accuracy': tf.metrics.accuracy(labels=tf.argmax(labels, 1), predictions=tf.argmax(logits, 1)),
            'loss': tf.metrics.mean(loss)
        }

    # Group the update ops for the tf.metrics
    update_metrics_op = tf.group(*[op for _, op in metrics.values()])

    # Get the op to reset the local variables used in tf.metrics
    metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="metrics")
    metrics_init_op = tf.variables_initializer(metric_variables)

    # Summaries for training
    tf.summary.scalar('loss', loss)
    tf.summary.scalar('accuracy', accuracy)
    tf.summary.image('train_image', inputs['images'])

    #TODO: if mode == 'eval': ?
    # Add incorrectly labeled images
    mask = tf.not_equal(labels, predictions)

    # Add a different summary to know how they were misclassified
    """
    for label in range(0, num_classes):
        mask_label = tf.logical_and(mask, tf.equal(predictions, label))
        incorrect_image_label = tf.boolean_mask(inputs['images'], mask_label)
        tf.summary.image('incorrectly_labeled_{}'.format(label), incorrect_image_label)
    """

    # -----------------------------------------------------------
    # MODEL SPECIFICATION
    # Create the model specification and return it
    # It contains nodes or operations in the graph that will be used for training and evaluation
    model_spec = inputs
    model_spec['variable_init_op'] = tf.global_variables_initializer()
    model_spec["predictions"] = predictions
    model_spec['loss'] = loss
    model_spec['accuracy'] = accuracy
    model_spec['metrics_init_op'] = metrics_init_op
    model_spec['metrics'] = metrics
    model_spec['update_metrics'] = update_metrics_op
    model_spec['summary_op'] = tf.summary.merge_all()

    if is_training:
        model_spec['train_op'] = train_op

    return model_spec

### Define training

In [12]:
def train_sess(sess, model_spec, num_steps, writer):
    # Get relevant graph operations or nodes needed for training
    loss = model_spec['loss']
    train_op = model_spec['train_op']
    update_metrics = model_spec['update_metrics']
    metrics = model_spec['metrics']
    summary_op = model_spec['summary_op']
    global_step = tf.train.get_global_step()

    # Load the training dataset into the pipeline and initialize the metrics local variables
    sess.run(model_spec['iterator_init_op'])
    sess.run(model_spec['metrics_init_op'])

    # Use tqdm for progress bar
    t = trange(num_steps)
    for i in t:
        # Evaluate summaries for tensorboard only once in a while
        if i % 1 == 0:
            # Perform a mini-batch update
            _, _, loss_val, summ, global_step_val = sess.run([train_op, update_metrics, loss,
                                                              summary_op, global_step])
            # Write summaries for tensorboard
            writer.add_summary(summ, global_step_val)
        else:
            _, _, loss_val = sess.run([train_op, update_metrics, loss])
        # Log the loss in the tqdm progress bar
        t.set_postfix(loss='{:05.3f}'.format(loss_val))


    metrics_values = {k: v[0] for k, v in metrics.items()}
    metrics_val = sess.run(metrics_values)
    metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in metrics_val.items())
    logging.info("- Train metrics: " + metrics_string)

### Define evaluation

In [13]:
def evaluate_sess(sess, model_spec, num_steps, writer=None):
    update_metrics = model_spec['update_metrics']
    eval_metrics = model_spec['metrics']
    global_step = tf.train.get_global_step()

    # Load the evaluation dataset into the pipeline and initialize the metrics init op
    sess.run(model_spec['iterator_init_op'])
    sess.run(model_spec['metrics_init_op'])

    # compute metrics over the dataset
    for _ in range(num_steps):
        sess.run(update_metrics)

    # Get the values of the metrics
    metrics_values = {k: v[0] for k, v in eval_metrics.items()}
    metrics_val = sess.run(metrics_values)
    metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in metrics_val.items())
    logging.info("- Eval metrics: " + metrics_string)

    # Add summaries manually to writer at global_step_val
    if writer is not None:
        global_step_val = sess.run(global_step)
        for tag, val in metrics_val.items():
            summ = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=val)])
            writer.add_summary(summ, global_step_val)

    return metrics_val

### Create dataset iterators

In [14]:
train_inputs = input_fn('train', X_train, y_train)
eval_inputs = input_fn('eval', X_val, y_val)

### Define model specs

In [15]:
logging.info("Creating the model...")
train_model_spec = model_fn('train', train_inputs, reuse=tf.AUTO_REUSE)
eval_model_spec = model_fn('eval', eval_inputs, reuse=tf.AUTO_REUSE)

Creating the model...


### Train and evaluate

In [16]:
logging.info("Starting training for {} epoch(s)".format(num_epochs))
last_saver = tf.train.Saver() # will keep last 5 epochs
best_saver = tf.train.Saver(max_to_keep=1)  # only keep 1 best checkpoint (best on eval)
begin_at_epoch = 0

with tf.Session() as sess:
    # Initialize model variables
    sess.run(train_model_spec['variable_init_op'])

    # Reload weights from directory if specified
    """
    if restore_from is not None:
        logging.info("Restoring parameters from {}".format(restore_from))
        if os.path.isdir(restore_from):
            restore_from = tf.train.latest_checkpoint(restore_from)
            begin_at_epoch = int(restore_from.split('-')[-1])
        last_saver.restore(sess, restore_from)
    """

    # For tensorboard (takes care of writing summaries to files)
    train_writer = tf.summary.FileWriter(os.path.join(model_dir, 'train_summaries'), sess.graph)
    eval_writer = tf.summary.FileWriter(os.path.join(model_dir, 'eval_summaries'), sess.graph)

    best_eval_acc = 0.0
    for epoch in range(begin_at_epoch, begin_at_epoch + num_epochs):
        # Run one epoch
        logging.info("Epoch {}/{}".format(epoch + 1, begin_at_epoch + num_epochs))
        # Compute number of batches in one epoch (one full pass over the training set)
        num_steps = (train_size + batch_size - 1) // batch_size
        train_sess(sess, train_model_spec, num_steps, train_writer)

        # Save weights
        last_save_path = os.path.join(model_dir, 'last_weights', 'after-epoch')
        last_saver.save(sess, last_save_path, global_step=epoch + 1)

        # Evaluate for one epoch on validation set
        num_steps = (eval_size + batch_size - 1) // batch_size
        metrics = evaluate_sess(sess, eval_model_spec, num_steps, eval_writer)

        # If best_eval, best_save_path
        eval_acc = metrics['accuracy']
        if eval_acc >= best_eval_acc:
            # Store new best accuracy
            best_eval_acc = eval_acc
            # Save weights
            best_save_path = os.path.join(model_dir, 'best_weights', 'after-epoch')
            best_save_path = best_saver.save(sess, best_save_path, global_step=epoch + 1)
            logging.info("- Found new best accuracy, saving in {}".format(best_save_path))
            # Save best eval metrics in a json file in the model directory
            best_json_path = os.path.join(model_dir, "metrics_eval_best_weights.json")
            save_dict_to_json(metrics, best_json_path)

        # Save latest eval metrics in a json file in the model directory
        last_json_path = os.path.join(model_dir, "metrics_eval_last_weights.json")
        save_dict_to_json(metrics, last_json_path)
logging.info("Finished training.")

Starting training for 25 epoch(s)
Epoch 1/25
100%|██████████| 304/304 [00:18<00:00, 16.42it/s, loss=4.659]
- Train metrics: accuracy: 0.017 ; loss: 4.880
- Eval metrics: accuracy: 0.029 ; loss: 4.714
- Found new best accuracy, saving in model/best_weights/after-epoch-1
Epoch 2/25
100%|██████████| 304/304 [00:17<00:00, 17.56it/s, loss=4.412]
- Train metrics: accuracy: 0.040 ; loss: 4.587
- Eval metrics: accuracy: 0.033 ; loss: 4.576
- Found new best accuracy, saving in model/best_weights/after-epoch-2
Epoch 3/25
100%|██████████| 304/304 [00:17<00:00, 17.45it/s, loss=4.440]
- Train metrics: accuracy: 0.058 ; loss: 4.428
- Eval metrics: accuracy: 0.047 ; loss: 4.485
- Found new best accuracy, saving in model/best_weights/after-epoch-3
Epoch 4/25
100%|██████████| 304/304 [00:17<00:00, 17.56it/s, loss=4.169]
- Train metrics: accuracy: 0.072 ; loss: 4.311
- Eval metrics: accuracy: 0.064 ; loss: 4.416
- Found new best accuracy, saving in model/best_weights/after-epoch-4
Epoch 5/25
100%|██████