In [None]:
"""
Author: Amruth Karun M V
Date: 12-Oct-2021
"""

import os
import pandas as pd
import numpy as np
import zipfile
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Dense, Activation, Dropout, Flatten, 
    Conv2D, MaxPooling2D, BatchNormalization
)

from sklearn import metrics
import matplotlib.pyplot as plt
%matplotlib inline

TRAIN_PATH = "../input/cifar10/cifar10/train"
TEST_PATH = "../input/cifar10/cifar10/test"
EPOCHS = 100
BATCH_SIZE = 256
LEARNING_RATE = 0.001


def load_data(input_path, shuffle=False):
    """
    Loads input data fro directory
    Arguments:
        input_path -- input data path
        shuffle    -- whether data needs to be shuffled or not
    Returns: Data generator
    """
    
    data_generator = keras.preprocessing.image.ImageDataGenerator()
    data_generator = data_generator.flow_from_directory(directory=input_path, target_size=(224,224), shuffle=shuffle)
    
    return data_generator


def load_model():
    """
    Creates a keras VGG-16 model
    Arguments: None
    Returns: VGG-16 Model
    """
    
    model = Sequential()
    model.add(Conv2D(input_shape=(224,224,3),filters=64,kernel_size=(3,3),padding="same", activation="relu"))
    model.add(Conv2D(filters=64,kernel_size=(3,3),padding="same", activation="relu"))
    model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))
    
    model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
    model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
    model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))

    model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
    model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
    model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
    model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))
    
    model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
    model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
    model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
    model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))
    
    model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
    model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
    model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
    model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))
    
    model.add(Flatten())
    model.add(Dense(units=4096,activation="relu"))
    model.add(Dense(units=4096,activation="relu"))
    model.add(Dense(units=10, activation="softmax"))
    model.summary()
   
    opt = Adam(lr=LEARNING_RATE)
    model.compile(loss = keras.losses.categorical_crossentropy, optimizer=opt,
    metrics=['accuracy'])
    
    return model


def plot_curves(history):
    """
    Plots loss and accuracy and loss plots for
    training and validation datasets
    Arguments: 
        history -- training history
    Returns: None
    """
   
    plt.plot(history.history['loss'], color='b', label="Training loss")
    plt.plot(history.history['val_loss'], color='r', label="validation loss")
    plt.xticks(np.arange(1, EPOCHS, 10))
    plt.legend()
    plt.title('Training Loss VS Validation Loss')
    plt.show()
    
    plt.plot(history.history['accuracy'], color='b', label="Training accuracy")
    plt.plot(history.history['val_accuracy'], color='r',label="Validation accuracy")
    plt.xticks(np.arange(1, EPOCHS, 10))
    plt.title('Training Accuracy VS Validation Accuracy')
    plt.legend()
    plt.show()
    

def get_confusion_matrix(model, input_path):
    """
    Calculates the confusion matrix
    for the input data
    Arguments:
        model       -- trained model
        input_path  -- input data path
    Returns: None
    """

    data_generator = load_data(input_path)
    predictions = model.predict(data_generator, BATCH_SIZE)
    y_pred = np.argmax(predictions, axis=1)
    y_true = data_generator.classes
    class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

    print("Accuracy score = ", metrics.accuracy_score(y_true, y_pred))
    cm = metrics.confusion_matrix(y_true, y_pred)
    metrics.ConfusionMatrixDisplay(cm, display_labels=class_names).plot(cmap=plt.cm.Blues,
                                                                       xticks_rotation='vertical')
    plt.show()

    
def train_model():
    """
    Trains VGG-16 model and saves the 
    trained weights to an H5 file.
    Arguments: None
    Returns: None
    """
    
    train_generator = load_data(TRAIN_PATH, True)
    val_generator = load_data(TEST_PATH, True)
    
    # Loads VGG-16 model
    model = load_model()
    earlystop = keras.callbacks.EarlyStopping(patience=20)
    callbacks = [earlystop]
    
    history = model.fit(
        train_generator, 
        batch_size=BATCH_SIZE,
        epochs=EPOCHS,
        validation_data=val_generator,
        validation_steps=val_generator.samples//BATCH_SIZE,
        steps_per_epoch=train_generator.samples//BATCH_SIZE,
        callbacks=callbacks)
    
    plot_curves(history)
    model.save_weights("model_vgg16.h5")
    print("Model saved successfully!")
    
    return model

    


In [None]:
# Train the model
model = train_model()
print("Confusion matrix for train data: ")
get_confusion_matrix(model, TRAIN_PATH)
print("Confusion matrix for val data: ")
get_confusion_matrix(model, TEST_PATH)