### Loading images
https://www.tensorflow.org/tutorials/load_data/images

In [4]:
from __future__ import absolute_import, division, print_function, unicode_literals
import datetime
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import TensorBoard, LearningRateScheduler
import matplotlib.pyplot as plt
import resnet

AUTOTUNE = tf.data.experimental.AUTOTUNE

import IPython.display as display
from PIL import Image
import numpy as np
import os
import pathlib

In [None]:
# To make some of the conda packages work (Tensorboard and pydot)
PATH = os.getenv('PATH')
%env PATH=/home/henrik/anaconda3/envs/TF2/bin:$PATH

In [5]:
NUM_GPUS = 1
BS_PER_GPU = 128
BATCH_SIZE = 128
NUM_EPOCHS = 10

IMG_HEIGHT = 64
IMG_WIDTH = 64
NUM_CHANNELS = 3
NUM_CLASSES = 8
#NUM_TRAIN_SAMPLES = 40000

BASE_LEARNING_RATE = 0.1
LR_SCHEDULE = [(0.1, 30), (0.01, 45)]

In [6]:
def preprocess(x, y):
  x = tf.image.per_image_standardization(x)
  return x, y


def augmentation(x, y):
    x = tf.image.resize_with_crop_or_pad(
        x, IMG_HEIGHT + 8, IMG_WIDTH + 8)
    x = tf.image.random_crop(x, [IMG_HEIGHT, IMG_WIDTH, NUM_CHANNELS])
    x = tf.image.random_flip_left_right(x)
    return x, y	


def schedule(epoch):
  initial_learning_rate = BASE_LEARNING_RATE * BS_PER_GPU / 128
  learning_rate = initial_learning_rate
  for mult, start_epoch in LR_SCHEDULE:
    if epoch >= start_epoch:
      learning_rate = initial_learning_rate * mult
    else:
      break
  tf.summary.scalar('learning rate', data=learning_rate, step=epoch)
  return learning_rate

In [7]:
data_dir = pathlib.Path('/mnt/sdb/augere_export_class/')

DATASET_SIZE = len(list(data_dir.glob('*/*.png')))
class_names = np.array([item.name for item in data_dir.glob('*') if item.name != 'metadata.json'])
print ("Class names: ",class_names)

# Create a dataset of the file paths
list_ds = tf.data.Dataset.list_files(str(data_dir/'*/*'))

Class names:  ['Anatomic landmarks' 'Unknown' 'Protruding lesions' 'Flat lesions'
 'Lumen' 'Mucosa' 'Normal' 'Excavated lesions']


In [18]:
samples_per_class = []

for class_name in class_names:
    class_samples = len(list(data_dir.glob(class_name+'/*.png')))
    samples_per_class.append(class_samples)
    print('{0:18}: {1:3d}'.format(class_names, class_samples))

print ('\nTotal number of images:', DATASET_SIZE)

# If one class contains more than half of the entire sample size
if np.max(samples_per_class) > DATASET_SIZE//2:
    print ("But the dataset is mainly shit")

Anatomic landmarks: 6868
Unknown           : 274
Protruding lesions: 583
Flat lesions      : 908
Lumen             : 1446
Mucosa            : 251
Normal            : 33129
Excavated lesions : 1252

Total number of images: 44711
But the dataset is mainly shit


In [9]:
def get_label_test(file_path):
    # Not used, mainly for log
    label = [i for i, s in enumerate(class_names) if 'Normal' in s]
    return np.uint8(label)
    
def get_label_int(file_path):
    # convert the path to a list of path components
    parts = tf.strings.split(file_path, os.path.sep)
    # get class integer from class-list
    label_int64 = tf.reduce_min(tf.where(tf.equal(parts[-2], class_names)))
    # cast to tensor array with dtype=uint8
    label_uint8 = tf.dtypes.cast(label_int64, tf.uint8)
    return tf.reshape(label_uint8, [-1])

def get_label_bool(file_path):
    # convert the path to a list of path components
    parts = tf.strings.split(file_path, os.path.sep)
    # The second to last is the class-directory
    return parts[-2] == class_names

def decode_img(img):
    # convert the compressed string to a 3D uint8 tensor
    img = tf.image.decode_jpeg(img, channels=3)
    # Use `convert_image_dtype` to convert to floats in the [0,1] range.
    img = tf.image.convert_image_dtype(img, tf.float32)
    # resize the image to the desired size.
    return tf.image.resize(img, [IMG_WIDTH, IMG_HEIGHT])

def process_path(file_path):
    label = get_label_int(file_path)
    # load the raw data from the file as a string
    img = tf.io.read_file(file_path)
    img = decode_img(img)
    return img, label

labeled_ds = list_ds.map(process_path, num_parallel_calls=AUTOTUNE)

Showing an example image/label pair

In [None]:
for image, label in labeled_ds.take(1):
    plt.figure()
    #plt.figure(frameon=False, facecolor='white')
    fig = plt.imshow(image.numpy())
    plt.axis('off')
    print("Class:",class_names[label.numpy()][0])
    
# print(next(iter(labeled_ds)))

## Training
See https://lambdalabs.com/blog/tensorflow-2-0-tutorial-01-image-classification-basics/

https://github.com/lambdal/TensorFlow2-tutorial/tree/master/01-basic-image-classification

Splitting into training, test and validation data

In [11]:
train_size = int(0.7 * DATASET_SIZE)
val_size = int(0.15 * DATASET_SIZE)
test_size = int(0.15 * DATASET_SIZE)

train_ds = labeled_ds.take(train_size)
test_ds = labeled_ds.skip(train_size)
val_ds = test_ds.skip(val_size)
test_ds = test_ds.take(test_size)

In [12]:
def get_size(ds):
    return tf.data.experimental.cardinality(ds).numpy()

print ("{:32} {:>5}".format("Full dataset sample size:", get_size(labeled_ds)))
print ("{:32} {:>5}".format("Train dataset sample size:", get_size(train_ds)))
print ("{:32} {:>5}".format("Test dataset sample size:", get_size(test_ds)))
print ("{:32} {:>5}".format("Validation dataset sample size:", get_size(val_ds)))

Full dataset sample size:        44711
Train dataset sample size:       31297
Test dataset sample size:         6706
Validation dataset sample size:   6708


In [13]:
def prepare_for_training(ds, cache=True, shuffle_buffer_size=1000):
    # This is a small dataset, only load it once, and keep it in memory.
    # use `.cache(filename)` to cache preprocessing work for datasets that don't
    # fit in memory.
    if cache:
      if isinstance(cache, str):
        ds = ds.cache(cache)
      else:
        ds = ds.cache()

    ds = ds.shuffle(buffer_size=shuffle_buffer_size)

    # Repeat forever
    #ds = ds.repeat()

    ds = ds.batch(BATCH_SIZE)

    # `prefetch` lets the dataset fetch batches in the background while the model
    # is training.
    ds = ds.prefetch(buffer_size=AUTOTUNE)
    return ds

# Create training dataset
train_ds = prepare_for_training(train_ds, cache="./cache/train_ds.tfcache")
# Create test dataset
test_ds = prepare_for_training(test_ds, cache="./cache/test_ds.tfcache")
# Create validation dataset
val_ds = prepare_for_training(val_ds, cache="./cache/val_ds.tfcache")

In [42]:
tf.random.set_seed(22)
#train_dataset = train_dataset.map(augmentation).map(preprocess).shuffle(NUM_TRAIN_SAMPLES).batch(BS_PER_GPU * NUM_GPUS, drop_remainder=True)
#test_dataset = test_dataset.map(preprocess).batch(BS_PER_GPU * NUM_GPUS, drop_remainder=True)

input_shape = (IMG_HEIGHT, IMG_WIDTH, NUM_CHANNELS)
img_input = tf.keras.layers.Input(shape=input_shape)
opt = keras.optimizers.SGD(learning_rate=0.1, momentum=0.9)

if NUM_GPUS == 1:
    model = resnet.resnet56(img_input=img_input, classes=NUM_CLASSES)
    model.compile(
              optimizer=opt,
              loss='sparse_categorical_crossentropy',
              metrics=['sparse_categorical_accuracy'])
else:
    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
      model = resnet.resnet56(img_input=img_input, classes=NUM_CLASSES)
      model.compile(
                optimizer=opt,
                loss='sparse_categorical_crossentropy',
                metrics=['sparse_categorical_accuracy'])  

log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
file_writer = tf.summary.create_file_writer(log_dir + "/metrics")
file_writer.set_as_default()

tensorboard_callback = tf.keras.callbacks.TensorBoard(
  log_dir=log_dir,
  update_freq='batch',
  histogram_freq=1)

lr_schedule_callback = LearningRateScheduler(schedule)


history = model.fit(
    train_ds,
    epochs=NUM_EPOCHS,
    validation_data=test_ds,
    validation_freq=1,
    #steps_per_epoch=245, if ds.repeat() should be ceil(num_samples/batch_size)
    #validation_steps=245, sould be ceil(num_val_samples/batch_size)
    callbacks=[tensorboard_callback, lr_schedule_callback])

# Print record of loss and metric values during training
# print('\nhistory dict:', history.history)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Validate the results

`Tensorboard`

In [53]:
# Clear any logs from previous runs (move to .old instead?)
# !rm -rf ./logs/

env: PATH=/home/henrik/anaconda3/envs/TF2/bin:/home/henrik/anaconda3/bin:/home/henrik/anaconda3/condabin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin


In [59]:
from tensorboard import notebook

# Load the TensorBoard notebook extension
%load_ext tensorboard

notebook.list()

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
No known TensorBoard instances running.


In [None]:
# Start tensorboard
%tensorboard --logdir logs

# !kill 20058

`Keras`

In [None]:
model.evaluate(test_ds)

model.save('model.h5')

new_model = keras.models.load_model('model.h5')
 
new_model.evaluate(test_ds)

In [None]:
model.evaluate(val_ds)

## Predict new samples

In [None]:
image, label = next(iter(val_ds))
image = image.numpy()
print ("True label:", class_names[label.numpy()[0][0]])

plt.figure()
plt.imshow(image[0])
#print (image.numpy())
res = model.predict(image)

print ("Predicted label:", class_names[np.argmax(res[0])])

In [None]:
idx = 0
for lab in res:
    print ('{:3} True:{:20} Pred:{}'.format(idx, class_names[label[idx]], class_names[np.argmax(lab)]))
    idx += 1

In [None]:
model.predict_classes(image)