In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
#tf.enable_eager_execution() 
import IPython.display as display
from PIL import Image
import numpy as np
import pathlib
import matplotlib.pyplot as plt
import os
import seaborn as sns
from skimage import transform
import tensorflow_addons as tfa
import scipy.ndimage as ndimage
keras = tf.keras
import pandas as pd
import glob

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

In [None]:
tf.__version__

In [None]:
def getFileList(data_dir):
    data_dir = pathlib.Path(data_dir)
    class_names = np.array([item.name for item in data_dir.glob('*') if item.name != "LICENSE.txt"], dtype='<U10')
    num_files = len(list(data_dir.glob('*/*.jpg'))) + len(list(data_dir.glob('*/*.png')))
    return tf.data.Dataset.list_files(str(data_dir/'*/*'), shuffle=True), num_files, class_names

In [None]:
list_ds, image_count, CLASS_NAMES = getFileList("./training/")
list_ds_test, test_image_count, TEST_CLASS_NAMES = getFileList("./testing/") ##getFileList("./ads_testing/")
list_ds_eval, eval_image_count, EVAL_CLASS_NAMES = getFileList("./eval/")

In [None]:
eval_image_count

In [None]:
CLASS_NAMES

In [None]:
BATCH_SIZE = 32
IMG_HEIGHT = 224
IMG_WIDTH = 224
STEPS_PER_EPOCH    = int(np.ceil(image_count/BATCH_SIZE))
STEPS_PER_TEST     = int(np.ceil(test_image_count/BATCH_SIZE))
STEPS_PER_EVAL     = int(np.ceil(eval_image_count/BATCH_SIZE))

In [None]:
def get_label(file_path):
  # convert the path to a list of path components
  parts = tf.strings.split(file_path, os.path.sep)#, result_type='RaggedTensor')
  # The second to last is the class-directory
  return parts[-2] == CLASS_NAMES

In [None]:
def random_rotate_image(image):
  image = ndimage.rotate(image, np.random.uniform(-30, 30), reshape=False)
  return image

In [None]:
def tf_random_rotate_image(image, label):
  im_shape = image.shape
  [image,] = tf.py_function(random_rotate_image, [image], [tf.float32])
  image.set_shape(im_shape)
  return image, label

In [None]:
def decode_img(img):
  # convert the compressed string to a 3D uint8 tensor
  img = tf.image.decode_jpeg(img, channels=3)
  # Use `convert_image_dtype` to convert to floats in the [0,1] range.
  img = tf.image.convert_image_dtype(img, tf.float32)
  #rotated_images = tfa.image.rotate(img, random_angles)
  # resize the image to the desired size.
  return tf.image.resize(img, [IMG_WIDTH, IMG_HEIGHT])

In [None]:
def process_path(file_path):
  print(file_path)
  label = get_label(file_path)
  # load the raw data from the file as a string
  img = tf.io.read_file(file_path)
  img = decode_img(img)
  return img, label

In [None]:
# Set `num_parallel_calls` so multiple images are loaded/processed in parallel.
labeled_ds = list_ds.map(process_path, num_parallel_calls=AUTOTUNE)
labeled_ds = labeled_ds.map(tf_random_rotate_image)
labeled_ds_test     = list_ds_test.map(process_path, num_parallel_calls=AUTOTUNE)
labeled_ds_eval     = list_ds_eval.map(process_path, num_parallel_calls=AUTOTUNE)

In [None]:
def prepare_for_training(ds, cache=False, shuffle_buffer_size=1000, shuffle=True):
    # This is a small dataset, only load it once, and keep it in memory.
    # use `.cache(filename)` to cache preprocessing work for datasets that don't
    # fit in memory.
    if cache:
        if isinstance(cache, str):
            ds = ds.cache(cache)
        else:
            ds = ds.cache()

    if shuffle == True:
        ds = ds.shuffle(buffer_size=shuffle_buffer_size, reshuffle_each_iteration=False)

    # Repeat forever
    ds = ds.repeat()

    ds = ds.batch(BATCH_SIZE)

    # `prefetch` lets the dataset fetch batches in the background while the model
    # is training.
    ds = ds.prefetch(buffer_size=AUTOTUNE)

    return ds

In [None]:
# Find invalid images
def checkFiles(folder):
    filenames = glob.glob(folder+"/*jpg") #glob.glob("Results/*.png")
    for filename in filenames:
        print(filename)
        img = tf.io.read_file(filename)
        tf.image.decode_jpeg(img, channels=3)    

In [None]:
train_ds    = prepare_for_training(labeled_ds)
test_ds     = prepare_for_training(labeled_ds_test, shuffle=False)
eval_ds     = prepare_for_training(labeled_ds_eval, shuffle=False)

# MobileNet V2

In [None]:
IMG_SHAPE = (IMG_HEIGHT, IMG_WIDTH, 3)

# Create the base model from the pre-trained model MobileNet V2
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
                                               include_top=False,
                                               weights='imagenet')

In [None]:
base_model.trainable = True

In [None]:
# Let's take a look at the base model architecture
base_model.summary()

In [None]:
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()

In [None]:
prediction_layer = keras.layers.Dense(4)

In [None]:
model = tf.keras.Sequential([
  base_model,
  global_average_layer,
  prediction_layer
])


In [None]:
base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=base_learning_rate),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
initial_epochs = 5

In [None]:
history = model.fit(train_ds,
                    epochs=initial_epochs,
                    validation_data=eval_ds,
                    steps_per_epoch=STEPS_PER_EPOCH,
                    validation_steps=STEPS_PER_EVAL)

### Save Model

In [None]:
import time
path = str(time.time()).replace('.', '') + "/"
if not os.path.isdir(path):
    os.makedirs(path)
model.save(path + 'model.h5')
#model.load('models/test.h5')

In [None]:
def savecm(ds, filename, iterations):
    labels = []
    predictions = []
    for i in range(iterations):
        image_batch, label_batch = next(iter(ds))
        predictions = predictions + list(model.predict_classes([image_batch, label_batch]))
        labels = labels + list(np.argmax(label_batch, axis=1))
    print(tf.math.confusion_matrix(labels, predictions))
    c = np.array(tf.math.confusion_matrix(labels, predictions)).astype(np.float)
    p = np.array(tf.math.confusion_matrix(labels, predictions)).astype(np.float)
    #c = np.array(c) / np.array(c).astype(np.float).sum(axis=1)
    sums = c.sum(axis=1)
    sums_p = p.sum(axis=0)
    for i in range(4):
        for j in range(4):
            c[i, j] = c[i, j] / float(sums[i])
            p[i, j] = p[i, j] / float(sums_p[j])

    print("Recall")            
    sns.heatmap(c, xticklabels=CLASS_NAMES, yticklabels=CLASS_NAMES, annot=True)
    plt.yticks(rotation=0) 
    plt.savefig(path + filename+"recall.png")
    plt.show()  
    pd.DataFrame(c).to_csv(path+filename+"recall.csv")
    print("Precission")
    sns.heatmap(p, xticklabels=CLASS_NAMES, yticklabels=CLASS_NAMES, annot=True)
    plt.yticks(rotation=0) 
    plt.savefig(path + filename+"precission.png")
    plt.show()  
    pd.DataFrame(p).to_csv(path+filename+"precission.csv")

In [None]:
savecm(test_ds, 'cm_test', STEPS_PER_TEST)
savecm(eval_ds, 'cm_eval', STEPS_PER_EVAL)

In [None]:
pd.DataFrame(history.history).to_csv(path+'history.csv')

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('Accuracy', fontsize=16)
plt.xlabel('Epoch', fontsize=16)
plt.legend(['train', 'validation'], loc='lower right', fontsize=16)
plt.show()
# "Loss"
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('Loss', fontsize=16)
plt.xlabel('Epoch', fontsize=16)
plt.legend(['train', 'validation'], loc='upper right', fontsize=16)
plt.show()