# 3_tf_hub_transfer_learning

training-data-analyst/courses/machine_learning/deepdive2/image_classification/labs/3_tf_hub_transfer_learning.ipynb

Keras has some convenient methods to read in image data. For instance [tf.keras.preprocessing.image.ImageDataGenerator](https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator) is great for small local datasets. A tutorial on how to use it can be found [here](https://www.tensorflow.org/tutorials/load_data/images), but what if we have so many images, it doesn't fit on a local machine? We can use [tf.data.datasets](https://www.tensorflow.org/api_docs/python/tf/data/Dataset) to build a generator based on files in a Google Cloud Storage Bucket.

### get image files from googleapi

In [None]:
data_dir = tf.keras.utils.get_file(
    'flower_photos',
    'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz',
    untar=True)

In [None]:
# https://docs.python.org/3/library/pathlib.html#basic-use
data_dir = pathlib.Path(data_dir)

# https://docs.python.org/3/library/pathlib.html#pathlib.Path.glob
data_dir.glob('*/*.jpg')

### decode image and csv using tensorflow

In [None]:
def decode_img(img, reshape_dims):
    # Convert the compressed string to a 3D uint8 tensor.
    img = tf.image.decode_jpeg(img, channels=IMG_CHANNELS)
    # Use `convert_image_dtype` to convert to floats in the [0,1] range.
    img = tf.image.convert_image_dtype(img, tf.float32)
    # Resize the image to the desired size.
    return tf.image.resize(img, reshape_dims)

def decode_csv(csv_row):
    record_defaults = ["path", "flower"]
    filename, label_string = tf.io.decode_csv(csv_row, record_defaults)
    image_bytes = tf.io.read_file(filename=filename)
    label = tf.math.equal(CLASS_NAMES, label_string)
    return image_bytes, label

### image augmentation

In [None]:
MAX_DELTA = 63.0 / 255.0  # Change brightness by at most 17.7%
CONTRAST_LOWER = 0.2
CONTRAST_UPPER = 1.8

def read_and_preprocess(image_bytes, label, random_augment=False):
    if random_augment:
        img = decode_img(image_bytes, [IMG_HEIGHT + 10, IMG_WIDTH + 10])
        img = tf.image.random_crop(img, (IMG_HEIGHT, IMG_WIDTH, 3))
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_brightness(img, MAX_DELTA)
        img = tf.image.random_contrast(img, CONTRAST_LOWER, CONTRAST_UPPER)
    else:
        img = decode_img(image_bytes, [IMG_WIDTH, IMG_HEIGHT])
    return img, label

### create dataset from csv file

In [None]:
def load_dataset(csv_of_filenames, batch_size, training=True):
    dataset = tf.data.TextLineDataset(filenames=csv_of_filenames) \
        .map(decode_csv).cache()

    if training:
        dataset = dataset \
            .map(read_and_preprocess_with_augment) \
            .shuffle(SHUFFLE_BUFFER) \
            .repeat(count=None)  # Indefinately.
    else:
        dataset = dataset \
            .map(read_and_preprocess) \
            .repeat(count=1)  # Each photo used once.

    # Prefetch prepares the next set of batches while current batch is in use.
    return dataset.batch(batch_size=batch_size).prefetch(buffer_size=AUTOTUNE)

In [None]:
# test code

train_path = "gs://cloud-ml-data/img/flower_photos/train_set.csv"
train_data = load_dataset(train_path, 1)
itr = iter(train_data)

image_batch, label_batch = next(itr)
img = image_batch[0]
plt.imshow(img)
print(label_batch[0])

In [None]:
module_selection = "mobilenet_v2_100_224"
module_handle = "https://tfhub.dev/google/imagenet/{}/feature_vector/4" \
    .format(module_selection)

import tensorflow_hub as hub
keras_layer = hub.KerasLayer(module_handle, trainable=False)



# 4_tpu_training

training-data-analyst/courses/machine_learning/deepdive2/image_classification/labs/4_tpu_training.ipynb

### define a TPU strategy

In [None]:
# define a TPU strategy
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(args.tpu_address) # TODO: Your code goes here
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.TPUStrategy(resolver) # TODO: Your code goes here

# create model and compile in strategy.scope
with strategy.scope():
    model.build_model()

model.fit()

### create a TPU and run

In [None]:
# run on google cloud shell
gcloud compute tpus execution-groups create \
 --name=my-tpu \
 --zone=us-central1-a \
 --tf-version=2.3.2 \
 --machine-type=n1-standard-1 \
 --accelerator-type=v3-8
 
# then automatically be logged in the TPU shell

In [None]:
# run in TPU
export TPU_NAME=my-tpu
python3 -m tpu_models.trainer.task \
    --tpu_address=$TPU_NAME \
    --hub_path=gs://$BUCKET/tpu_models \
    --job-dir=gs://$BUCKET/flowers_tpu_$(date -u +%y%m%d_%H%M%S)

# 2_mnist_models

training-data-analyst/courses/machine_learning/deepdive2/image_classification/labs/2_mnist_models.ipynb

### Scales images from a 0-255 int range to a 0-1 float range

In [None]:
def scale(image, label):
    """Scales images from a 0-255 int range to a 0-1 float range"""
    image = tf.cast(image, tf.float32)
    image /= 255
    image = tf.expand_dims(image, -1)
    return image, label

### Loads MNIST dataset into a tf.data.Dataset

In [None]:
def load_dataset(
        data, training=True, buffer_size=5000, batch_size=100, nclasses=10):
    """Loads MNIST dataset into a tf.data.Dataset"""
    (x_train, y_train), (x_test, y_test) = data
    x = x_train if training else x_test
    y = y_train if training else y_test
    # One-hot encode the classes
    y = tf.keras.utils.to_categorical(y, nclasses)
    dataset = tf.data.Dataset.from_tensor_slices((x, y))
    dataset = dataset.map(scale).batch(batch_size)
    if training:
        dataset = dataset.shuffle(buffer_size).repeat()
    return dataset

# 1_mnist_linear

training-data-analyst/courses/machine_learning/deepdive2/image_classification/labs/1_mnist_linear.ipynb

In [None]:
# load data from mnist 
# train/test values are numpy ndarray objects
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# get the number of classes
NCLASSES = tf.size(tf.unique(y_train).y)


In [None]:
# plot numpy ndarray objects 
import matplotlib.pyplot as plt
plt.imshow(x_test[IMGNO].reshape(HEIGHT, WIDTH))

### One-hot encode the classes

In [None]:
import tensorflow as tf
y = tf.keras.utils.to_categorical(y, NCLASSES)