<a href="https://colab.research.google.com/github/johnymephisto/Machine-Learning-and-Deep-Learning-Experiments/blob/master/Transfer_Learning_with_tensorflow_hub.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#pip install -q tensorflow-hub

import os
from urllib import request
import zipfile
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.python import debug as tf_debug


%matplotlib inline


('1.12.0', True, True, '/device:GPU:0')

In [0]:
#will take from tmp if already downloaded dataset
data_dir = '/tmp/datasets/dogscats'


if not os.path.isdir(data_dir):
    # Download the data zip to our data directory and extract
    fallback_url = 'http://files.fast.ai/data/dogscats.zip'
    tf.keras.utils.get_file(
        os.path.join('/tmp', os.path.basename(fallback_url)), 
        fallback_url, 
        cache_dir='/tmp',
        extract=True)

In [0]:
def _img_string_to_tensor(image_string, image_size=(299, 299)):
    image_decoded = tf.image.decode_jpeg(image_string, channels=3)
    # Convert from full range of uint8 to range [0,1] of float32.
    image_decoded_as_float = tf.image.convert_image_dtype(image_decoded, dtype=tf.float32)
    # Resize to expected
    image_resized = tf.image.resize_images(image_decoded_as_float, size=image_size)
    
    return image_resized

def make_dataset(file_pattern, image_size=(299, 299), shuffle=False, batch_size=64, num_epochs=None, buffer_size=4096):
    
    def _path_to_img(path):
        # Get the parent folder of this file to get it's class name
        label = tf.string_split([path], delimiter='/').values[-2]
        
        # Read in the image from disk
        image_string = tf.read_file(path)
        image_resized = _img_string_to_tensor(image_string, image_size)
        
        return { 'image': image_resized }, label
    
    dataset = tf.data.Dataset.list_files(file_pattern)

    if shuffle:
        dataset = dataset.apply(tf.data.experimental.shuffle_and_repeat(buffer_size, num_epochs))
    else:
        dataset = dataset.repeat(num_epochs)

    dataset = dataset.map(_path_to_img, num_parallel_calls=os.cpu_count())
    dataset = dataset.batch(batch_size).prefetch(buffer_size)

    return dataset

In [0]:
def model_fn(features, labels, mode, params):
    is_training = mode == tf.estimator.ModeKeys.TRAIN
    module_training = is_training and params.get('train_module', False)

    module = hub.Module(params['module_spec'], trainable=module_training, name=params['module_name'])
    bottleneck_tensor = module(features['image'])

    NUM_CLASSES = len(params['label_vocab'])
    logit_units = 1 if NUM_CLASSES == 2 else NUM_CLASSES
    logits = tf.keras.layers.Dense(logit_units)(bottleneck_tensor)

    if NUM_CLASSES == 2:
        head = tf.contrib.estimator.binary_classification_head(label_vocabulary=params['label_vocab'])
    else:
        head = tf.contrib.estimator.multi_class_head(n_classes=NUM_CLASSES, label_vocabulary=params['label_vocab'])

    optimizer = tf.train.AdamOptimizer(learning_rate=params['learning_rate'])
    return head.create_estimator_spec(
        features, mode, logits, labels, optimizer=optimizer
    )

In [0]:
def train(model_directory, data_directory):

  
    params = {
        'module_spec': 'https://tfhub.dev/google/imagenet/inception_v3/feature_vector/1',
        'module_name': 'inception_v3',
        'learning_rate': 1e-3,
        'train_module': True,  # Retrain the last layer
        'label_vocab': os.listdir(os.path.join(data_dir, 'valid'))
    }

    run_config = tf.estimator.RunConfig()

    classifier = tf.estimator.Estimator(
        model_fn=model_fn,
        model_dir=model_directory,
        config=run_config,
        params=params
    )

    input_img_size = hub.get_expected_image_size(hub.Module(params['module_spec']))

    train_files = os.path.join(data_directory, 'train', '**/*.jpg')
    train_input_fn = lambda: make_dataset(train_files, image_size=input_img_size, batch_size=8, shuffle=True)
    train_spec = tf.estimator.TrainSpec(train_input_fn, max_steps=20)

    eval_files = os.path.join(data_directory, 'valid', '**/*.jpg')
    eval_input_fn = lambda: make_dataset(eval_files, image_size=input_img_size, batch_size=1)
    eval_spec = tf.estimator.EvalSpec(eval_input_fn)

    tf.estimator.train_and_evaluate(classifier, train_spec, eval_spec)

tf.logging.set_verbosity(tf.logging.INFO)
train('/tmp/dogscats/run1', data_dir)

INFO:tensorflow:Using config: {'_model_dir': '/tmp/dogscats/run3', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fdce0843780>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Not using Distribute Coordinator.
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The e