### Data Vizualization

In [2]:
from pathlib import Path
import numpy as np
import tensorflow as tf
import sys
sys.path.append('../../../')
from utils import (plot_images_sample,
                   plot_image_pred,
                   unzip_data,
                   get_logger,
                   plot_training,
                   EnoughTrainingCallback)
LOCAL_TRAIN = '/home/david/Downloads/beers_train'
LOCAL_VAL = '/home/david/Downloads/beers_valid'
INPUT_SIZE = (150, 150, 3)
BATCH_SIZE = 20
logger = get_logger(__name__)
def get_data(directory, input_shape, batch_size, train=True):
    if train:
        datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1 / 255,
                                                                  rotation_range=40,
                                                                  width_shift_range=0.2,
                                                                  height_shift_range=0.2,
                                                                  shear_range=0.2,
                                                                  zoom_range=0.2,
                                                                  horizontal_flip=True,
                                                                  fill_mode='nearest')
    else:
        datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1 / 255)
    generator = datagen.flow_from_directory(directory,
                                            batch_size=batch_size,
                                            class_mode='categorical',
                                            target_size=(input_shape[0], input_shape[1]))
    return generator
def get_model(input_shape, pre_trained_layer):
    pre_trained = tf.keras.applications.InceptionV3(input_shape=input_shape,
                                                    include_top=False,
                                                    weights='imagenet')
    for layer in pre_trained.layers:
        layer.trainable = False
    last_layer = pre_trained.get_layer(pre_trained_layer)
    last_output = last_layer.output
    x = tf.keras.layers.Flatten()(last_output)
    x = tf.keras.layers.Dense(units=1024,
                              activation=tf.keras.activations.relu)(x)
    x = tf.keras.layers.Dropout(0.2)(x)
    x = tf.keras.layers.Dense(units=3, activation=tf.keras.activations.softmax)(x)
    model = tf.keras.Model(pre_trained.input, x)
    model.summary()
    model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.0001),
                  loss=tf.keras.losses.CategoricalCrossentropy(),
                  metrics=[tf.keras.metrics.CategoricalAccuracy(name='accuracy')])
    return model
def main():
    logger.info('starting training...')
    # data loading
    images_train = get_data(LOCAL_TRAIN, INPUT_SIZE, BATCH_SIZE, train=True)
    images_test = get_data(LOCAL_VAL, INPUT_SIZE, BATCH_SIZE, train=False)
    # model training
    enough_training_callback = EnoughTrainingCallback(metric='accuracy', threshold=0.97)
    model = get_model(INPUT_SIZE, pre_trained_layer='mixed7')
    history = model.fit(images_train,
                        validation_data=images_test,
                        epochs=4,
                        steps_per_epoch=len(images_train),
                        validation_steps=len(images_test),
                        callbacks=[enough_training_callback])
    plot_training(history, metrics=['loss', 'accuracy'])
    # model testing
    images, labels = images_test.next()
    preds = model.predict(images)
    preds_categorical = np.concatenate([1 - preds, preds], -1)
    logger.info('done!')
if __name__ == '__main__':
    main()

ModuleNotFoundError: No module named 'utils'

In [3]:
import logging
import zipfile
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import tensorflow as tf
def get_logger(name):
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    logger = logging.getLogger(name)
    logger.setLevel(logging.DEBUG)
    ch = logging.StreamHandler()
    ch.setLevel(logging.DEBUG)
    ch.setFormatter(formatter)
    logger.addHandler(ch)
    return logger
def plot_time_series(time: np.array, values: np.array, start=0, end=None, figsize=(12, 5)):
    fig, ax = plt.subplots(figsize=figsize)
    ax.plot(time[start:end], values[start:end])
    ax.grid()
    plt.setp(ax, xlabel='time', ylabel='value', title='time series plot')
    plt.show()
def plot_confusion_matrix(y_pred, y_true, class_labels=None):
    """
    plot confusion matrix for multi class
    """
    matrix = confusion_matrix(y_pred=y_pred, y_true=y_true)
    figsize = (1.5 * matrix.shape[0], 1.5 * matrix.shape[1])
    fig, ax = plt.subplots(figsize=figsize)
    sns.heatmap(matrix, 
                annot=True, 
                cbar=False, 
                fmt='d',
                ax=ax)
    if class_labels:
        ax.set_xticklabels(class_labels)
        ax.set_yticklabels(class_labels)
    ax.set_title("confussion matrix")
    ax.set_xlabel("predicted class")
    ax.set_ylabel("actual class")
    plt.tight_layout()
    plt.show()
def plot_training(history, metrics: list = ('loss',), figsize: tuple = (12, 5), skip=0, val=True):
    """
    plots training selected metrics for every batch
    """
    epochs = range(1 + skip, len(history.history[metrics[0]]) + 1)
    fig, ax_arr = plt.subplots(1, len(metrics), figsize=figsize)
    if not isinstance(ax_arr, np.ndarray):
        ax_arr = np.array(ax_arr).reshape(1, )
    for i, metric in enumerate(metrics):
        ax_arr[i].plot(epochs, history.history[metric][skip:], color='k', linestyle='solid', label=metric, linewidth=2)
        if val:
            ax_arr[i].plot(epochs, history.history[f"val_{metric}"][skip:], color='r', linestyle='dotted',
                           label=f'validation {metric}')
        ax_arr[i].set_ylabel(metric)
        ax_arr[i].set_xlabel('epochs')
        ax_arr[i].grid()
        ax_arr[i].legend()
    plt.show()
def plot_lr(history, figsize: tuple = (12, 8)):
    fig, ax = plt.subplots(figsize=figsize)
    ax.semilogx(history.history['lr'], history.history['loss'])
    ax.grid()
    plt.setp(ax, xlabel='lr', ylabel='loss', title='loss vs lr')
    plt.show()
def plot_time_series_forecast(ts, forecast, values, figsize: tuple = (16, 8)):
    fig, ax = plt.subplots(figsize=figsize)
    ax.plot(ts, forecast, 'r', linestyle='--', label='forecast')
    ax.plot(ts, values, 'b', label='real')
    ax.grid()
    ax.legend()
    plt.setp(ax, xlabel='time', ylabel='value', title='time series plot')
    plt.show()
def plot_images_sample(images: np.array = None,
                       labels: np.array = None,
                       path: Path = None,
                       samples: int = 16,
                       figsize: tuple = (8, 8)):
    """
    samples some images from dataset and
    plot a grid of images with its
    corresponding labels
    """
    assert (images is not None and labels is not None) or path
    if path:
        images = [img for img in path.glob('**/*') if img.is_file()]
        labels = [img.parent.name for img in images]
    h = int(np.sqrt(samples))
    w = h
    f, axarr = plt.subplots(h, w, figsize=figsize)
    for i in range(h):
        for j in range(w):
            index = np.random.choice(len(images))
            axarr[i, j].imshow(plt.imread(images[index]) if path else images[index])
            axarr[i, j].set_title(f"class: {labels[index]}")
            axarr[i, j].axis('off')
    plt.tight_layout()
    plt.show()
import numpy as np
def plot_image_pred(images: np.ndarray,
                    labels: np.ndarray,
                    preds: np.ndarray,
                    indexes: list,
                    class_names: list = None,
                    figsize: tuple = (8, 4)):
    for index in indexes:
        predicted_probas = preds[index]
        true_index = labels[index].astype('int')
        predicted_index = np.argmax(predicted_probas).astype('int')
        if class_names:
            # true_class = class_names[true_index]
            predicted_class = class_names[predicted_index]
        fig, axes = plt.subplots(ncols=2, nrows=1, figsize=figsize)
        axes[0].imshow(images[index])
        axes[0].axis('off')
        axes[0].set_title(f"predicted class: {predicted_class if class_names else predicted_index}",
                          color='blue' if true_index == predicted_index else 'red')
        axes[1].barh(class_names if class_names else [str(i) for i in range(len(predicted_probas))],
                     predicted_probas,
                     color='black')
        axes[1].get_children()[predicted_index].set_color('red')
        axes[1].get_children()[true_index].set_color('blue')
        axes[1].set_xlim(0, 1)
        axes[1].set_title("class probabilities")
        axes[1].set_xlabel("probability")
        axes[1].set_ylabel("class name")
        plt.tight_layout()
        plt.show()
def unzip_data(zip_dir, unzip_dir):
    zip_ref = zipfile.ZipFile(zip_dir, 'r')
    zip_ref.extractall(unzip_dir)
    zip_ref.close()
class EnoughTrainingCallback(tf.keras.callbacks.Callback):
    def __init__(self, metric, threshold):
        super(EnoughTrainingCallback, self).__init__()
        self.metric = metric
        self.threshold = threshold
        self.logger = get_logger(__name__)
    def on_epoch_end(self, epoch, logs=None):
        if logs.get(self.metric) > self.threshold:
            self.logger.info(f'reached over {self.threshold} {self.metric}, stopping training...')
            self.model.stop_training = True