In [1]:
import sys
import pandas as pd
import cv2
import os 
import glob
import gc
from joblib import Parallel, delayed

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpli
import warnings
warnings.filterwarnings("ignore", module="matplotlib")
pd.options.mode.chained_assignment = None

from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss

from keras.applications import vgg16
from keras.preprocessing import image
from keras import optimizers, callbacks
from keras.models import Sequential, load_model, Model
from keras.layers import Flatten, Dense 
from keras.utils import to_categorical
from tensorflow.python.client import device_lib
import tensorflow as tf

from mlflow import log_metric, log_param, log_artifact

Using TensorFlow backend.


In [2]:
lista = ['mercat_independencia','societat_general','desconegut','farmacia_albinyana','ajuntament','mnactec',
         'escola_enginyeria','masia_freixa','castell_cartoixa','dona_treballadora','catedral',
         'teatre_principal','estacio_nord']

In [3]:
def preprocess_image(img, mode='vgg16'):

    if(mode=='vgg16'):
        img = img.astype(np.float16)
        img[:, :, 0] -= 103.939
        img[:, :, 1] -= 116.779
        img[:, :, 2] -= 123.68
    return img

In [4]:
def load_image(img_file, img_reshape_size):

    img = cv2.imread(img_file)
    img = cv2.resize(img, img_reshape_size)
    img = preprocess_image(img)
    
    return img

In [5]:
def load_train_dataset(dataset_dir, img_reshape_size, nprocs=10):

    X = []
    y = []
    # Train dataset
    for i in range(13):
        path = os.path.join(dataset_dir,lista[i],'*.jpg')
        files = glob.glob(path)

        X.extend(Parallel(n_jobs=nprocs)(delayed(load_image)(im_file, img_reshape_size) for im_file in files))
        y.extend([i]*len(files))
        print('folder classifier/training/'+str(lista[i]), 'loaded')

    X = np.asarray(X, dtype=np.float16)
    y = np.asarray(y)
    return X, y

In [6]:
def load_val_dataset(dataset_dir, img_reshape_size, nprocs=10):

    X = []
    y = []
    # validation dataset
    for i in range(13):
        path = os.path.join(dataset_dir,lista[i],'*.jpg')
        files = glob.glob(path)

        X.extend(Parallel(n_jobs=nprocs)(delayed(load_image)(im_file, img_reshape_size) for im_file in files))
        y.extend([i]*len(files))
        print('folder classifier/validation/'+str(lista[i]), 'loaded')

    X = np.asarray(X, dtype=np.float16)
    y = np.asarray(y)
    return X, y

In [9]:
def create_VGG16_model(n_classes=13, n_layers_train=2, learning_rate=0.0001):


    vgg16_base = vgg16.VGG16(weights='imagenet', include_top=True, input_shape=(224,224,3))    


    fc2 = vgg16_base.get_layer('fc2').output
    mypredictions = Dense(n_classes, activation='softmax', name='mypredictions')(fc2)
    model = Model(inputs=vgg16_base.input, outputs=mypredictions)
    
    model.summary()
    # Freeze the layers except the last n_layers_train layers
    for layer in model.layers[:-n_layers_train]:
        layer.trainable = False
    for layer in model.layers[:]:
        print(layer, layer.trainable)    

    model.compile(loss='categorical_crossentropy',
          optimizer=optimizers.Adam(lr=learning_rate),
          metrics=['accuracy'])

    model.summary()
    return model

class LogMlFlowMetrics(callbacks.Callback):
    def on_epoch_end(self, epoch, logs):
        log_metric("train_loss", logs.get('loss'))
        log_metric("train_acc", logs.get('acc'))
        log_metric("val_loss", logs.get('val_loss'))
        log_metric("val_acc", logs.get('val_acc'))

if __name__ == '__main__':

    batch_size = 100
    n_epoch = 10
    learning_rate = 0.0001
    n_layers_train = 3
    data_augmentation = True
    

    log_param("batch_size", batch_size)
    log_param("n_epoch", n_epoch)
    log_param("learning_rate", learning_rate)
    log_param("n_layers_train", n_layers_train)
    log_param("data_augmentation", data_augmentation)


    img_reshape_size = (224,224)


    dataset_dir_train = os.path.join('classifier','training')
    dataset_dir_val = os.path.join('classifier','validation')


    print('Loading dataset train...')
    X_train, y_train = load_train_dataset(dataset_dir_train, img_reshape_size) 
    print('Loading dataset validation...')

    X_val, y_val = load_val_dataset(dataset_dir_val, img_reshape_size)
    


    y_train = to_categorical(y_train)
    y_val = to_categorical(y_val)


    print('X_train shape:', X_train.shape)
    print('y_train shape:', y_train.shape)#
    print('X_val shape:', X_val.shape)
    print('y_val shape:', y_val.shape)


    if(data_augmentation==True):
        train_datagen = image.ImageDataGenerator(
                                                width_shift_range=0.1,
                                                height_shift_range=0.1,
                                                shear_range=0.1,
                                                rotation_range=8,
                                                fill_mode='nearest'
                                                )
        val_datagen = image.ImageDataGenerator()
    else:
        train_datagen = image.ImageDataGenerator()
        val_datagen = image.ImageDataGenerator()

    train_generator = train_datagen.flow(x=X_train, y=y_train,
                                batch_size=batch_size,
                                shuffle=False,
                                seed=42)
    val_generator = train_datagen.flow(x=X_train, y=y_train,
                                batch_size=batch_size,
                                shuffle=False,
                                seed=42)
    

    model = create_VGG16_model(n_layers_train=n_layers_train, learning_rate=learning_rate)
    

    tensorboard_log_dir='VGG16_lr'+str(learning_rate)+'_train'+str(n_layers_train)+'_epochs'+str(n_epoch)+'_data_aug'+str(data_augmentation)

    tb_callback = callbacks.TensorBoard(log_dir=os.path.join('Graph', tensorboard_log_dir),
                                       histogram_freq=0, 
                                       write_graph=True, 
                                       write_images=False)

    earlystop = callbacks.EarlyStopping(monitor='val_loss', patience=2, \
                              verbose=1, mode='auto')

    mlflow_callback = LogMlFlowMetrics()
    

    model_history = model.fit_generator(train_generator,
                                        validation_data=val_generator,
                                        validation_steps=5,
                                        shuffle=True,
                                        epochs=n_epoch,
                                        steps_per_epoch=np.ceil(X_train.shape[0]//batch_size),
                                        callbacks=[tb_callback, earlystop, mlflow_callback],
                                        verbose=1,
                                        use_multiprocessing=True)
    

    if not os.path.isdir('Model'):
        os.mkdir('Model')
    filename = 'VGG16_lr'+str(learning_rate)+'_train'+str(n_layers_train)+'_epochs'+str(n_epoch)+'_data_aug'+str(data_augmentation)+'.h5'

    model_file = os.path.join('Model', filename)
    model.save(model_file)
    log_artifact(model_file)
    print('File', filename, 'saved')

Loading dataset train...
('folder classifier/training/mercat_independencia', 'loaded')
('folder classifier/training/societat_general', 'loaded')
('folder classifier/training/desconegut', 'loaded')
('folder classifier/training/farmacia_albinyana', 'loaded')
('folder classifier/training/ajuntament', 'loaded')
('folder classifier/training/mnactec', 'loaded')
('folder classifier/training/escola_enginyeria', 'loaded')
('folder classifier/training/masia_freixa', 'loaded')
('folder classifier/training/castell_cartoixa', 'loaded')
('folder classifier/training/dona_treballadora', 'loaded')
('folder classifier/training/catedral', 'loaded')
('folder classifier/training/teatre_principal', 'loaded')
('folder classifier/training/estacio_nord', 'loaded')
Loading dataset validation...
('folder classifier/validation/mercat_independencia', 'loaded')
('folder classifier/validation/societat_general', 'loaded')
('folder classifier/validation/desconegut', 'loaded')
('folder classifier/validation/farmacia_al

In [8]:
X_train.shape[0]//batch_size

11