### Import packages

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [2]:
%tensorflow_version 1.x
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
import random
import pickle
from sklearn.model_selection import KFold
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from sklearn.metrics import accuracy_score, log_loss
from keras.applications import VGG16
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import backend as K
from keras.optimizers import Adam, RMSprop, SGD
import matplotlib.pyplot as plt
from keras.applications import MobileNet
from sklearn.metrics import classification_report
from sklearn.utils import class_weight
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import LeakyReLU, ReLU
import json
import gc

np.random.seed(2020)
random_state = 52
CATEGORIES = ["c0","c1","c2","c3","c4","c5","c6","c7","c8","c9"]
PATH_COLAB = "/content/drive/My Drive/Deep Learning Project/progetto/"

with open(PATH_COLAB+'best_params_mobilenet.json') as json_file:
    best_params = json.load(json_file)

TensorFlow 1.x selected.


Using TensorFlow backend.


In [0]:
def restore_data(path):
    data = dict()
    if os.path.isfile(path):
        file = open(path, 'rb')
        data = pickle.load(file)
    return data

def preprocessing(X, Y, img_rows, img_cols, color_type=3):
    
    Y = np.array(Y, dtype=np.uint8)

    if color_type == 1:
        X = np.array(X, dtype=np.uint8).reshape(-1, img_rows, img_cols, 1)
    else:
        X = np.array(X, dtype=np.uint8).reshape(-1, img_rows, img_cols, 3)

    Y = to_categorical(Y, 10)
    X = X.astype('float32')
    X /= 255
    
    return X, Y

In [0]:
def best_fine_tuning_mobilenet(img_rows, img_cols, color_type, mobilenet_base):

    model = Sequential()
    model.add(mobilenet_base)
    model.add(Flatten())
    model.add(Dense(best_params["n_units_1"]))
    model.add(ReLU())
    model.add(Dropout(best_params["dropout_rate_1"]))
    model.add(Dense(best_params["n_units_2"]))
    model.add(ReLU())
    model.add(BatchNormalization())
    model.add(Dropout(best_params["dropout_rate_2"]))
    model.add(Dense(10, activation = 'softmax'))

    model.summary()

    model.compile(optimizer = SGD(lr=best_params["lr"], momentum=best_params["momentum"]), loss='categorical_crossentropy', metrics = ["accuracy"])

    return model
    

In [0]:
def predictions_(X, Y, dim_val, sum_score_acc, sum_score_log_loss, model, k, type_img):

    df = pd.DataFrame(columns=['k', 'accuracy', 'log_loss', 'type'])

    print("\n ------ ", type_img)

    predictions = model.predict(X, batch_size=32, verbose=1)

    predictions_arg = [y.argmax() for y in predictions]
    Y_arg = [y.argmax() for y in Y]

    predictions_arg = np.array(predictions_arg)  
    Y_arg = np.array(Y_arg)

    score_acc = accuracy_score(Y_arg, predictions_arg)
    score_log_loss = log_loss(Y, predictions)

    print('Score Accuracy: ', score_acc)
    print('Score Log Loss: ', score_log_loss)

    sum_score_acc += score_acc*dim_val
    sum_score_log_loss += score_log_loss*dim_val

    print(classification_report(Y_arg, predictions_arg, target_names=CATEGORIES))

    df = df.append({'k': k, 'accuracy':score_acc, 'log_loss':score_log_loss, 'type':str(type_img)}, ignore_index=True)

    return df, sum_score_acc, sum_score_log_loss



def weighted_avarage(df, sum_score_acc, sum_score_log_loss, tot_imgs, type_img):

    print("\n ------ ", type_img)

    score_acc = sum_score_acc/(tot_imgs)
    print("Weighted avarage accuracy: ", score_acc)

    score_log_loss = sum_score_log_loss/(tot_imgs)
    print("Weighted avarage log loss: ", score_log_loss)

    df['weighted_mean_loss'] = score_log_loss
    df['weighted_mean_acc'] = score_acc

    return df





def training_model(img_rows, img_cols, color_type=3, final_layer=8):

    df_val_all = pd.DataFrame(columns=['k', 'accuracy', 'log_loss', 'type'])
    df_train_all = pd.DataFrame(columns=['k', 'accuracy', 'log_loss', 'type'])

    batch_size = 32
    epochs = 20

    sum_score_acc_val = 0
    sum_score_acc_train = 0 

    sum_score_log_loss_val = 0
    sum_score_log_loss_train = 0


    name_model = 'best_config_mobilenet'

    for k in range(1,6):

        print("\n\nFold " + str(k) + ' out of 5')

        X_train, Y_train, X_val, Y_val = restore_data(PATH_COLAB + "data/k_" + str(k) + ".dat")
        X_train, Y_train = preprocessing(X_train, Y_train, img_rows, img_cols, color_type)
        X_val, Y_val = preprocessing(X_val, Y_val, img_rows, img_cols, color_type)

        datagen = ImageDataGenerator(
          height_shift_range=0.5, 
          width_shift_range = 0.5, 
          zoom_range = 0.5, 
          rotation_range=20 
        )

        data_generator = datagen.flow(X_train, Y_train, batch_size = 32, seed = random_state)

        mobilenet = MobileNet(weights="imagenet", include_top=False, input_shape=(img_rows, img_cols, color_type))
        model = best_fine_tuning_mobilenet(img_rows, img_cols, color_type, mobilenet)

        kfold_weights_path = os.path.join(PATH_COLAB + 'architecture/'+ name_model, 'weights_kfold_' + str(k) + '.h5')

        if not os.path.isfile(kfold_weights_path) or RESTORE_WEIGHTS == False:
            callbacks = [
                EarlyStopping(monitor='val_loss', patience=5, verbose=0),
                ModelCheckpoint(kfold_weights_path, monitor='val_loss', save_best_only=True, verbose=0)
            ]
            class_weights = class_weight.compute_sample_weight(class_weight='balanced', y=Y_train)
            
            history = model.fit_generator(data_generator, steps_per_epoch = len(X_train) // batch_size, 
                                epochs=epochs, shuffle=True, verbose=2, validation_data=(X_val, Y_val),
                                class_weight=class_weights, callbacks=callbacks)
          
        if os.path.isfile(kfold_weights_path):
            model.load_weights(kfold_weights_path)

        df_val, sum_score_acc_val, sum_score_log_loss_val = predictions_(X_val, Y_val, X_val.shape[0], sum_score_acc_val, 
                                                                         sum_score_log_loss_val, model, k, 'validation')
        df_val_all = df_val_all.append(df_val)

        df_train, sum_score_acc_train, sum_score_log_loss_train = predictions_(X_train, Y_train, X_val.shape[0], sum_score_acc_train,
                                                                               sum_score_log_loss_train,model, k, 'training')
        df_train_all = df_train_all.append(df_train)
        
    
    print("---- Final score ----\n")
    tot_images = X_train.shape[0] + X_val.shape[0]

    df_val_final = weighted_avarage(df_val_all, sum_score_acc_val, sum_score_log_loss_val, tot_images, 'validation')
    df_train_final = weighted_avarage(df_train_all, sum_score_acc_train, sum_score_log_loss_train, tot_images,'training')

    #df.to_csv(PATH_COLAB+'dataframes/'+name_model+'.csv')

    return df_val_final, df_train_final

In [0]:
RESTORE_WEIGHTS = True

In [7]:
df_val, df_train = training_model(128,128,3) 



Fold 1 out of 5
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenet_1.00_128 (Model)   (None, 4, 4, 1024)        3228864   
_________________________________________________________________
flatten_1 (Flatten)          (None, 16384)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 387)               6340995   
_________________________________________________________________
re_lu_1 (ReLU)               (None, 387)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 387)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 107)               41516     
_____________________

In [8]:
df_val

Unnamed: 0,k,accuracy,log_loss,type,weighted_mean_loss,weighted_mean_acc
0,1,0.890495,0.335144,validation,0.385727,0.876695
0,2,0.914954,0.269744,validation,0.385727,0.876695
0,3,0.885554,0.338757,validation,0.385727,0.876695
0,4,0.8471,0.478869,validation,0.385727,0.876695
0,5,0.83477,0.538071,validation,0.385727,0.876695


In [9]:
df_train

Unnamed: 0,k,accuracy,log_loss,type,weighted_mean_loss,weighted_mean_acc
0,1,0.991084,0.031438,training,0.05853,0.982589
0,2,0.988026,0.039592,training,0.05853,0.982589
0,3,0.961281,0.131281,training,0.05853,0.982589
0,4,0.976206,0.077526,training,0.05853,0.982589
0,5,0.992766,0.024984,training,0.05853,0.982589


In [13]:
df_all = pd.concat([df_train, df_val]).reset_index().drop(['index'], axis=1)
df_all

Unnamed: 0,k,accuracy,log_loss,type,weighted_mean_loss,weighted_mean_acc
0,1,0.991084,0.031438,training,0.05853,0.982589
1,2,0.988026,0.039592,training,0.05853,0.982589
2,3,0.961281,0.131281,training,0.05853,0.982589
3,4,0.976206,0.077526,training,0.05853,0.982589
4,5,0.992766,0.024984,training,0.05853,0.982589
5,1,0.890495,0.335144,validation,0.385727,0.876695
6,2,0.914954,0.269744,validation,0.385727,0.876695
7,3,0.885554,0.338757,validation,0.385727,0.876695
8,4,0.8471,0.478869,validation,0.385727,0.876695
9,5,0.83477,0.538071,validation,0.385727,0.876695


In [0]:
df_all.to_csv(PATH_COLAB+'dataframes/best_config_mobilenet.csv')