In [1]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf

from matplotlib import pyplot as plt
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Input, Dropout, Lambda
from tensorflow.keras.activations import relu, softmax
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.regularizers import l2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.utils import img_to_array, load_img

### callback functions

In [2]:
optimizer = SGD(learning_rate=1e-2, momentum=9e-1)
weight_decay = 5e-4
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=1e-1, patience=20)
model_checkpoint = ModelCheckpoint(filepath='vgg11_lrn_checkpoint.hdf5', save_best_only=True)
classes = 6
epoch = 75

In [3]:
def vgg_net11(input_shape=(224, 224, 3), classes=None):
    # input layer
    input_layer = Input(shape=input_shape, name='input_')

    # first conv block
    x = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), kernel_regularizer=l2(weight_decay), padding='same',
               activation=relu)(input_layer)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid')(x)

    # second conv block
    x = Conv2D(filters=128, kernel_size=(3, 3), strides=(1, 1), kernel_regularizer=l2(weight_decay), padding='same',
               activation=relu)(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid')(x)

    # third conv block
    x = Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), kernel_regularizer=l2(weight_decay), padding='same',
               activation=relu)(x)
    x = Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), kernel_regularizer=l2(weight_decay), padding='same',
               activation=relu)(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid')(x)

    # fourth conv block
    x = Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), kernel_regularizer=l2(weight_decay), padding='same',
               activation=relu)(x)
    x = Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), kernel_regularizer=l2(weight_decay), padding='same',
               activation=relu)(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid')(x)

    # fifth conv block
    x = Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), kernel_regularizer=l2(weight_decay), padding='same',
               activation=relu)(x)
    x = Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), kernel_regularizer=l2(weight_decay), padding='same',
               activation=relu)(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid')(x)

    # classifier
    x = Flatten()(x)
    x = Dense(units=512, activation=relu)(x)
    x = Dropout(rate=0.5)(x)
    x = Dense(units=512, activation=relu)(x)
    x = Dropout(rate=0.5)(x)
    x = Dense(units=classes, activation=softmax)(x)

    model = Model(input_layer, x)
    model.compile(optimizer=optimizer, loss=categorical_crossentropy, metrics=['accuracy'])
    model.summary()

    return model


### utility functions

In [4]:
def save_train_history(history_dict, file_path):
    temp = pd.DataFrame(history_dict.history)

    try:
        temp.to_pickle(file_path)

    except FileNotFoundError:
        print('file not found.')


def create_metric_graph(history_dict, metric='accuracy', validation=True, file_path=None):
    epochs = list(range(len(history_dict.history[metric])))

    plt.figure(figsize=[12, 6], dpi=300)
    sns.lineplot(x=epochs,
                 y=history_dict.history[metric],
                 marker='o',
                 label='training')

    if validation:
        sns.lineplot(x=epochs,
                     y=history_dict.history[f'val_{metric}'],
                     marker='o',
                     linestyle='--',
                     label='validation')

    plt.xlabel('epoch')
    plt.ylabel(f'{metric}')

    if file_path is not None:
        plt.savefig(file_path)

    plt.show()


def create_loss_graph(history_dict, validation=True, file_path=None):
    epochs = list(range(len(history_dict.history['loss'])))

    plt.figure(figsize=[12, 6], dpi=300)
    sns.lineplot(x=epochs,
                 y=history_dict.history['loss'],
                 marker='o',
                 label='training')

    if validation:
        sns.lineplot(x=epochs,
                     y=history_dict.history['val_loss'],
                     marker='o',
                     linestyle='--',
                     label='validation')

    plt.xlabel('epoch')
    plt.ylabel('loss')

    if file_path is not None:
        plt.savefig(file_path)

    plt.show()


def create_learning_rate(history_dict, file_path=None):
    epochs = list(range(len(history_dict.history['lr'])))
    figure, axes = plt.subplots(nrows=1, ncols=2, figsize=[12, 6], dpi=300)

    sns.lineplot(x=epochs,
                 y=history_dict.history['lr'],
                 ax=axes[0])

    sns.scatterplot(x=history_dict.history['loss'],
                    y=history_dict.history['lr'],
                    label='training loss',
                    ax=axes[1])

    sns.scatterplot(x=history_dict.history['val_loss'],
                    y=history_dict.history['lr'],
                    label='validation loss',
                    ax=axes[1])

    axes[0].set_title('learning rate change through training')
    axes[1].set_title('learning rate changes with loss')

    axes[0].set_xlabel('epoch')
    axes[0].set_ylabel('learning rate')
    axes[1].set_xlabel('loss')
    axes[1].set_ylabel('learning rate')

    if file_path is not None:
        plt.savefig(file_path)

    plt.show()


In [12]:
model = vgg_net11(input_shape=(150, 150, 3), classes=classes)

In [6]:
plot_model(model,
           to_file='vgg11_lrn.png',
           show_dtype=True,
           show_shapes=True,
           show_layer_names=True)

In [7]:
more_img_generator = ImageDataGenerator(rotation_range=90,
                                        width_shift_range=0.4,
                                        height_shift_range=0.4,
                                        brightness_range=(0.1, 1.0),
                                        shear_range=0.4,
                                        channel_shift_range=150,
                                        horizontal_flip=True,
                                        vertical_flip=True,
                                        fill_mode='nearest')

if not os.path.isdir('data/augmented'):
    os.mkdir('data')
    os.mkdir('data/augmented')

for sub in os.listdir('../input/intel-image-classification/seg_train/seg_train'):
    files_in_sub = []
    dst_path = os.path.join('data/augmented', sub)
    
    if not os.path.isdir(dst_path):
        os.mkdir(dst_path)

    for img_file in os.listdir(os.path.join('../input/intel-image-classification/seg_train/seg_train', sub)):
        img = img_to_array(
            load_img(os.path.join('../input/intel-image-classification/seg_train/seg_train', sub, img_file), target_size=(150, 150)))
        files_in_sub.append(img)

    files_in_sub = np.asarray(files_in_sub)

    img_gen = more_img_generator.flow(files_in_sub,
                                      save_to_dir=dst_path,
                                      save_format='jpg',
                                      save_prefix='aug-')

    for i in range(4):
        for j in range(len(img_gen)):
            img_gen.next()

In [10]:
generator = ImageDataGenerator(rescale=1 / 255.,
                               validation_split=0.3)
train_gen = generator.flow_from_directory(directory='data/augmented/',
                                          target_size=(150,150),
                                          batch_size=128,
                                          subset='training')
valid_gen = generator.flow_from_directory(directory='data/augmented/',
                                          target_size=(150,150),
                                          batch_size=128,
                                          subset='validation')
test_gen = generator.flow_from_directory('../input/intel-image-classification/seg_test/seg_test',
                                         target_size=(150,150))

In [None]:
history = model.fit(x=train_gen,
                    epochs=epoch,
                    validation_data=valid_gen,
                    callbacks=[reduce_lr, model_checkpoint])

In [None]:
save_train_history(history, 'vgg11_lrn.pkl')

In [None]:
create_metric_graph(history, file_path='accuracy_vgg11_lrn.png')

In [None]:
create_loss_graph(history, file_path='loss_vgg11_lrn.png')

In [None]:
create_learning_rate(history, file_path='learning_rate_vgg11_lrn.png')

In [None]:
model.evaluate(test_gen)