In [42]:
import os
import warnings
import json
import boto3
import pickle
import pandas as pd
import numpy as np
import dask.bag as db
import dask.dataframe as dd
import tensorflow as tf
import keras.applications.inception_resnet_v2 as inception_resnet
from keras import backend
from keras.models import Model, Input, Sequential, load_model
from keras.preprocessing.image import load_img, img_to_array
from keras.utils.np_utils import to_categorical
from keras.layers import Dense, Dropout, Input, Conv2D
from keras.callbacks import (ModelCheckpoint, LearningRateScheduler,
                             EarlyStopping, ReduceLROnPlateau)
from PIL import Image
from operator import itemgetter
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from dask.distributed import Client
from dask.diagnostics import ProgressBar
from tqdm.notebook import tqdm

In [2]:
print(f"Tensor Flow Version: {tf.__version__}")
print(f"Keras Version: {tf.keras.__version__}")
gpu = len(tf.config.list_physical_devices('GPU'))>0
print("GPU is", "available" if gpu else "NOT AVAILABLE")


Tensor Flow Version: 2.4.0
Keras Version: 2.4.0
GPU is available


In [3]:
physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [4]:
from IPython.display import display, clear_output
from IPython.display import Audio
from IPython.core.display import HTML
import numpy as np

def alert(duration=2):
    """ makes sound on client using javascript (works with remote server) """      
    framerate = 44100
    freq=300
    t = np.linspace(0,duration,framerate*duration)
    data = np.sin(2*np.pi*freq*t)
    display(Audio(data,rate=framerate, autoplay=True))

# Constant Variables

In [5]:
BUCKET = 'kapeles'
DATA_PATH ='BDCC_Dataset/downloads/manual'
TRAIN_PATH =  os.path.join(DATA_PATH, 'train')
TEST_PATH = os.path.join(DATA_PATH, 'test')
SAMPLE_PATH = os.path.join(DATA_PATH, 'sample')

VECTOR_PATH = 'BDCC_Dataset/inception_resnet_vectors'
TARGET_SIZE = (1024, 1024)

# Extract image vectors based on InceptionResNetV2

## Create Inception Resnset V2 base

In [6]:
# Inception Resnet V2 base without top layer and specified input shape
base = inception_resnet.InceptionResNetV2(weights='imagenet',
                                          include_top=False,
                                          input_shape=(TARGET_SIZE +
                                                       tuple([3])),
                                          pooling='avg')
# Freeze layers
for layer in base.layers:
    layer.trainable=False
base.summary()

Model: "inception_resnet_v2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 1024, 1024,  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 511, 511, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 511, 511, 32) 96          conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 511, 511, 32) 0           batch_normalization[0][0]        
________________________________________________________________________________

## Predict each images using Inception Resnet V2 base

In [7]:
s3 = boto3.resource('s3', region_name='us-east-2')
bucket = s3.Bucket(BUCKET)

In [7]:
def get_image(key, bucket):
    """Return numpy array of image from the s3 bucket"""
    obj = bucket.Object(key)
    response = obj.get()
    file_stream = response['Body']
    im = Image.open(file_stream).resize(TARGET_SIZE)
    return np.array(im)
    

def write_json_file(data, filename):
    """Write json file in the VECTOR_PATH"""
    key = os.path.join(VECTOR_PATH, filename)
    (s3.Object(BUCKET, key)
     .put(Body=bytes(json.dumps(data).encode('UTF-8'))))
    

def read_json_file(filename):
    """Reads json file in the VECTOR_PATH"""
    obj = bucket.Object(os.path.join(VECTOR_PATH, filename))
    response = obj.get()
    file_content = response['Body'].read()
    return json.loads(file_content)

In [None]:
file_no = 100
train_images = list(bucket.objects.filter(Prefix=TRAIN_PATH))
count = len(train_images)
print('Extracting image vectors for {} images'.format(count))
images = []
for idx in tqdm(range(count)):
    obj = train_images[idx]
    try:
        # Get the numpy matrix of the image
        dct = dict()
        img = get_image(obj.key, bucket)
        
        # Preprocess input based on the training of convolutional base
        nimage = inception_resnet.preprocess_input(img)
        nimage = np.expand_dims(nimage, axis=0)
        
        # Extracted image vectors
        image_vector = base.predict(nimage)
        image_vector = np.reshape(image_vector, image_vector.shape[1:])
        
        dct["name"] = os.path.basename(obj.key)
        dct["value"] = image_vector.tolist()
        images.append(dct)
        
        # Check the image is in the hundredth or the last image
        if (not file_no // (idx + 1)) or count == (idx + 1):
            file_name = f'train_{file_no//100}.json'
            write_json_file(images, file_name)
            file_no += 100
            images = []
    except Exception as e:
        alert(5)
        print('Unable to read {}:{}'.format(idx, obj.key)) 

Extracting image vectors for 35127 images


HBox(children=(FloatProgress(value=0.0, max=35127.0), HTML(value='')))

In [None]:
file_no = 100
test_images = list(bucket.objects.filter(Prefix=TEST_PATH))
count = len(test_images)
print('Extracting image vectors for {} images'.format(count))
images = []
for idx in tqdm(range(count)):
    obj = test_images[idx]
    try:
        # Get the numpy matrix of the image
        dct = dict()
        img = get_image(obj.key, bucket)
        
        # Preprocess input based on the training of convolutional base
        nimage = inception_resnet.preprocess_input(img)
        nimage = np.expand_dims(nimage, axis=0)
        
        # Extracted image vectors
        image_vector = base.predict(nimage)
        image_vector = np.reshape(image_vector, image_vector.shape[1:])
        
        dct["name"] = os.path.basename(obj.key)
        dct["value"] = image_vector.tolist()
        images.append(dct)
        
        # Check the image is in the hundredth or the last image
        if (not file_no // (idx + 1)) or count == (idx + 1):
            file_name = f'test_{file_no//100}.json'
            write_json_file(images, file_name)
            file_no += 100
            images = []
    except Exception as e:
        alert(5)
        print('Unable to read {}:{}'.format(idx, obj.key)) 

In [56]:
file_no = 100
sample_images = list(bucket.objects.filter(Prefix=SAMPLE_PATH))
count = len(sample_images)
print('Extracting image vectors for {} images'.format(count))
images = []
for idx in tqdm(range(count)):
    obj = sample_images[idx]
    try:
        # Get the numpy matrix of the image
        dct = dict()
        img = get_image(obj.key, bucket)
        
        # Preprocess input based on the training of convolutional base
        nimage = inception_resnet.preprocess_input(img)
        nimage = np.expand_dims(nimage, axis=0)
        
        # Extracted image vectors
        image_vector = base.predict(nimage)
        image_vector = np.reshape(image_vector, image_vector.shape[1:])
        
        dct["name"] = os.path.basename(obj.key)
        dct["value"] = image_vector.tolist()
        images.append(dct)
        
        # Check the image is in the hundredth or the last image
        if (not file_no // (idx + 1)) or count == (idx + 1):
            file_name = f'sample_{file_no//100}.json'
            write_json_file(images, file_name)
            file_no += 100
            images = []
    except Exception as e:
        alert(5)
        print('Unable to read {}:{}'.format(idx, obj.key)) 

Extracting image vectors for 11 images


HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))

Unable to read 10:BDCC_Dataset/downloads/manual/sampleSubmission.csv



# Setup

## Create Dask Cluster

In [8]:
# Create dask cluster
client = Client('172.31.31.30:8786')

# Register progress bar
pbar = ProgressBar()
pbar.register()

# Get AWS Config
with open('setting_aws.pkl', 'rb') as f:
    settings_aws = pickle.load(f)

# Show cluster
client

0,1
Client  Scheduler: tcp://172.31.31.30:8786  Dashboard: http://172.31.31.30:8787/status,Cluster  Workers: 2  Cores: 12  Memory: 50.47 GB


# Prepare dataset

In [217]:
test_label_path = ('s3://kapeles/BDCC_Dataset/downloads/'
                   'kagg-foru-mess-atta_9052_2877_reti_solupq'
                   '89ZoBOgYh9qJuSYbz7faT1us7bBw8bXbg_qHmrYMk.csv')
train_label_path = ('s3://kapeles/BDCC_Dataset/downloads/manual'
                    '/trainLabels.csv')
train_label = dd.read_csv(train_label_path, storage_options=settings_aws)
test_label = dd.read_csv(test_label_path,
                         usecols=['image', 'level'],
                         storage_options=settings_aws)

all_labels = train_label.append(test_label).compute()
all_labels.image = all_labels.image.apply(lambda x: str(x)+'.jpeg')
del train_label_path, test_label_path, train_label, test_label

## Split dataset 60 20 20

In [218]:
x_train, x_test, y_train, y_test = train_test_split(all_labels.image,
                                                    all_labels.level,
                                                    test_size=0.2,
                                                    random_state=2021)
train_labels = all_labels.loc[all_labels.image.isin(x_train)]
x_train, x_val, y_train, y_val = train_test_split(train_labels.image,
                                                  train_labels.level,
                                                  test_size=0.25,
                                                  random_state=2021)

del all_labels, train_labels

## Perform sampling to solve imbalance data

In [219]:
n_samples = int(y_train.value_counts().mean())
x_train.reset_index(drop=True, inplace=True)
y_train.reset_index(drop=True, inplace=True)
new_x_train, new_y_train = pd.Series(dtype='object'), pd.Series(dtype='int32')
for cls in y_train.unique():
    idx = y_train[y_train==cls].index
    x = x_train.loc[idx]
    y = y_train.loc[idx]
    
    new_x, new_y = resample(x, y, replace=True, n_samples=n_samples,
                            random_state=2021)
    new_x_train = new_x_train.append(new_x)
    new_y_train = new_y_train.append(new_y)

x_train, y_train = new_x_train, new_y_train
del new_x_train, new_y_train

In [220]:
y_train.value_counts()

4    10644
3    10644
2    10644
1    10644
0    10644
dtype: int64

## Create Generator

Create a data generator to get batches from train data.

In [158]:
def data_generator(captions, data, batch_size):
    """
    Create a generator that for 2 inputs (image vector, caption sequence)
    output is the next word in the sequence

    Parameters
    ----------
        captions : dict
            image filename as key and level as value
        data : dask bag
            image filaname as key and vectors from convolutional base as value
        img_map : list or iterator
            list or iterator to include, exclude data not in the img_map
        batch_size : int
            batch_size
    
    Returns
    -------
        result : 2 tuples
            first element is the 2 inputs image vector from the convolutional
            base and text to sequence of the caption
            second element is the output is the next word in the sequence
            ([image_vector, sequence], next_word)
    """
    x, y = [], []
    n = 0
    while 1:
        for image_name, vector in data.items():
            # Get image vector based on the convolutional base
            target = captions[image_name]
            # Set the value of target to binary
            target = to_categorical([target], num_classes=5)[0]
            x.append(vector)
            y.append(target)
            n += 1
            if n == batch_size:
                yield np.array(x), np.array(y)
                x, y = [], []
                n = 0

## Read data using Dask Bag

In [10]:
def join_dict(x, y):
    """Update dictionary x with values of dictionary y"""
    x.update(y)
    return x

In [221]:
# Read training data from S3
data = (db.read_text(os.path.join('s3://', BUCKET, VECTOR_PATH, '*.json'),
                     storage_options=settings_aws)
        .map(json.loads)
        .flatten()
        .persist())

In [None]:
train_data = (data.filter(lambda x: x['name'] in x_train.values)
              .map(lambda x: {x['name']: np.asarray(x['value'])})
              .fold(binop=lambda x, y: join_dict(x, y),
                    combine=lambda x, y: join_dict(x, y))
              .compute())

In [None]:
val_data = (data.filter(lambda x: x['name'] in x_val.values)
            .map(lambda x: {x['name']: np.asarray(x['value'])})
            .fold(binop=lambda x, y: join_dict(x, y),
                  combine=lambda x, y: join_dict(x, y))
            .compute())

In [None]:
# Make train labels to dictionary for easier implementation
train_labels = pd.Series(y_train.values,index=x_train.values).to_dict()
val_labels = pd.Series(y_val.values, index=x_val.values).to_dict()

# Training the model

## Playing with Batch size

In [183]:
def create_toplayers():
    """Create top layer using inceptionb resnet v2 base"""
    # Create input layer based on the output of the convolutional base
    lyr_input = Input(shape=(base.layers[-1].output.shape.as_list()[1], ))

    # Add dropout layer
    lyr_dropout1 = Dropout(0.25)(lyr_input)
    # Add Dense
    lyr_dense1 = Dense(254, activation='relu')(lyr_dropout1)
    # Add dropout layer
    lyr_dropout2 = Dropout(0.25)(lyr_dense1)

    # Create output layer
    output = Dense(5, activation='softmax')(lyr_dropout2)

    model = Model(inputs=[lyr_input], outputs=[output])
    model.compile(optimizer = 'adam', loss = 'categorical_crossentropy',
                  metrics = ['accuracy'])
    return model

### Batch size = 512

In [184]:
batch_size = 512
train_gen = data_generator(train_labels, train_data, batch_size)
val_gen = data_generator(val_labels, val_data, batch_size)
train_steps = len(train_labels) // batch_size
val_steps = len(val_labels) // batch_size

In [185]:
checkpoint = ModelCheckpoint('tune/batch_size512.h5',
                             monitor='val_accuracy',
                             verbose=1, save_best_only=True)

reduceLROnPlat = ReduceLROnPlateau(monitor='val_accuracy',
                                   factor=0.7,
                                   patience=3,
                                   cooldown=3,
                                   verbose=1,
                                   min_lr=0.0001)
early = EarlyStopping(monitor="val_accuracy", 
                      verbose=1,
                      patience=30)

In [186]:
model = create_toplayers()
model.fit(train_gen,
          steps_per_epoch=train_steps,
          validation_data = val_gen,
          validation_steps = val_steps,
          epochs=200,
          callbacks=[checkpoint, reduceLROnPlat, early])

Epoch 1/200

Epoch 00001: val_accuracy improved from -inf to 0.68773, saving model to tune/batch_size512.h5
Epoch 2/200

Epoch 00002: val_accuracy did not improve from 0.68773
Epoch 3/200

Epoch 00003: val_accuracy improved from 0.68773 to 0.69003, saving model to tune/batch_size512.h5
Epoch 4/200

Epoch 00004: val_accuracy did not improve from 0.69003
Epoch 5/200

Epoch 00005: val_accuracy did not improve from 0.69003
Epoch 6/200

Epoch 00006: val_accuracy did not improve from 0.69003

Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.0007000000332482159.
Epoch 7/200

Epoch 00007: val_accuracy did not improve from 0.69003
Epoch 8/200

Epoch 00008: val_accuracy did not improve from 0.69003
Epoch 9/200

Epoch 00009: val_accuracy did not improve from 0.69003
Epoch 10/200

Epoch 00010: val_accuracy did not improve from 0.69003
Epoch 11/200

Epoch 00011: val_accuracy did not improve from 0.69003

Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.0004900000232737511.
Epoch

<tensorflow.python.keras.callbacks.History at 0x7fa9bbd9ec50>

### Batch size = 80

In [33]:
batch_size=80
train_gen = data_generator(labels, train_data, batch_size)
steps = len(train_images)//batch_size

In [35]:
checkpoint = ModelCheckpoint('tune/batch_size80.h5', monitor='loss',
                             verbose=0, save_best_only=True, mode='min')

reduceLROnPlat = ReduceLROnPlateau(monitor='loss',
                                   factor=0.7,
                                   patience=3,
                                   mode='min',
                                   cooldown=3,
                                   verbose=1,
                                   min_lr=0.0001)
early = EarlyStopping(monitor="loss", 
                      mode="min",
                      verbose=1,
                      patience=10)

In [36]:
model = create_toplayers()
model.fit_generator(train_gen,
                    steps_per_epoch=steps,
                    epochs=200,
                    callbacks=[checkpoint, reduceLROnPlat, early])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 00017: ReduceLROnPlateau reducing learning rate to 0.0007000000332482159.
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 00026: ReduceLROnPlateau reducing learning rate to 0.0004900000232737511.
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 00035: ReduceLROnPlateau reducing learning rate to 0.00034300000406801696.
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 00041: ReduceLROnPlateau reducing learning rate to 0.00024009999469853935.
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 00046: ReduceLROnPlateau reducing learning rate to 0.00016806999628897755.
Epoch 47

<tensorflow.python.keras.callbacks.History at 0x7f92e454e750>

## Remove first Dropout layer

In [178]:
def create_toplayers():
    """Create top layer using inceptionb resnet v2 base"""
    # Create input layer based on the output of the convolutional base
    lyr_input = Input(shape=(base.layers[-1].output.shape.as_list()[1], ))

    # Add Dense
    lyr_dense1 = Dense(254, activation='relu')(lyr_input)
    # Add dropout layer
    lyr_dropout2 = Dropout(0.25)(lyr_dense1)

    # Create output layer
    output = Dense(5, activation='softmax')(lyr_dropout2)

    model = Model(inputs=[lyr_input], outputs=[output])
    model.compile(optimizer = 'adam', loss = 'categorical_crossentropy',
                  metrics = ['categorical_accuracy'])
    return model

In [179]:
batch_size = 512
train_gen = data_generator(train_labels, train_data, batch_size)
val_gen = data_generator(val_labels, val_data, batch_size)
train_steps = len(train_labels) // batch_size
val_steps = len(val_labels) // batch_size

In [180]:
checkpoint = ModelCheckpoint('tune/remove_dropout1.h5',
                             monitor='val_categorical_accuracy',
                             verbose=1, save_best_only=True)

reduceLROnPlat = ReduceLROnPlateau(monitor='val_categorical_accuracy',
                                   factor=0.7,
                                   patience=3,
                                   cooldown=3,
                                   verbose=1,
                                   min_lr=0.0001)
early = EarlyStopping(monitor="val_categorical_accuracy", 
                      verbose=1,
                      patience=30)

In [181]:
model = create_toplayers()
model.fit(train_gen,
          steps_per_epoch=train_steps,
          validation_data = val_gen,
          validation_steps = val_steps,
          epochs=200,
          callbacks=[checkpoint, reduceLROnPlat, early])

Epoch 1/200

Epoch 00001: val_categorical_accuracy improved from -inf to 0.31733, saving model to tune/remove_dropout1.h5
Epoch 2/200

Epoch 00002: val_categorical_accuracy improved from 0.31733 to 0.52889, saving model to tune/remove_dropout1.h5
Epoch 3/200

Epoch 00003: val_categorical_accuracy improved from 0.52889 to 0.66745, saving model to tune/remove_dropout1.h5
Epoch 4/200

Epoch 00004: val_categorical_accuracy improved from 0.66745 to 0.69692, saving model to tune/remove_dropout1.h5
Epoch 5/200

Epoch 00005: val_categorical_accuracy did not improve from 0.69692
Epoch 6/200

Epoch 00006: val_categorical_accuracy did not improve from 0.69692
Epoch 7/200

Epoch 00007: val_categorical_accuracy did not improve from 0.69692

Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.0007000000332482159.
Epoch 8/200

Epoch 00008: val_categorical_accuracy did not improve from 0.69692
Epoch 9/200

Epoch 00009: val_categorical_accuracy did not improve from 0.69692
Epoch 10/200

Epoch 00


Epoch 00033: val_categorical_accuracy did not improve from 0.71122

Epoch 00033: ReduceLROnPlateau reducing learning rate to 0.00011764899536501615.
Epoch 34/200

Epoch 00034: val_categorical_accuracy did not improve from 0.71122
Epoch 35/200

Epoch 00035: val_categorical_accuracy did not improve from 0.71122
Epoch 36/200

Epoch 00036: val_categorical_accuracy did not improve from 0.71122
Epoch 37/200

Epoch 00037: val_categorical_accuracy did not improve from 0.71122
Epoch 38/200

Epoch 00038: val_categorical_accuracy did not improve from 0.71122

Epoch 00038: ReduceLROnPlateau reducing learning rate to 0.0001.
Epoch 39/200

Epoch 00039: val_categorical_accuracy did not improve from 0.71122
Epoch 40/200

Epoch 00040: val_categorical_accuracy did not improve from 0.71122
Epoch 41/200

Epoch 00041: val_categorical_accuracy did not improve from 0.71122
Epoch 42/200

Epoch 00042: val_categorical_accuracy did not improve from 0.71122
Epoch 43/200

Epoch 00043: val_categorical_accuracy did

<tensorflow.python.keras.callbacks.History at 0x7fa9d04dc290>

## Remove first Dropout layer and Add Dense layer

In [193]:
def create_toplayers():
    """Create top layer using inceptionb resnet v2 base"""
    # Create input layer based on the output of the convolutional base
    lyr_input = Input(shape=(base.layers[-1].output.shape.as_list()[1], ))

    # Add Dense
    lyr_dense1 = Dense(1024, activation='relu')(lyr_input)
    lyr_dense2 = Dense(512, activation='relu')(lyr_dense1)
    lyr_dense3 = Dense(256, activation='relu')(lyr_dense2)


    # Create output layer
    output = Dense(5, activation='softmax')(lyr_dense3)

    model = Model(inputs=[lyr_input], outputs=[output])
    model.compile(optimizer = 'rmsprop', loss = 'categorical_crossentropy',
                  metrics = ['categorical_accuracy'])
    return model

In [194]:
batch_size = 512
train_gen = data_generator(train_labels, train_data, batch_size)
val_gen = data_generator(val_labels, val_data, batch_size)
train_steps = len(train_labels) // batch_size
val_steps = len(val_labels) // batch_size

In [195]:
model_fp = 'tune/add_more_dense1.h5'
checkpoint = ModelCheckpoint(model_fp,
                             monitor='val_categorical_accuracy',
                             verbose=1, save_best_only=True)

reduceLROnPlat = ReduceLROnPlateau(monitor='val_categorical_accuracy',
                                   factor=0.8,
                                   patience=3,
                                   cooldown=3,
                                   verbose=1,
                                   min_lr=0.0001)
early = EarlyStopping(monitor="val_categorical_accuracy", 
                      verbose=1,
                      patience=30)

In [196]:
model = create_toplayers()
model.fit(train_gen,
          steps_per_epoch=train_steps,
          validation_data = val_gen,
          validation_steps = val_steps,
          epochs=200,
          callbacks=[checkpoint, reduceLROnPlat])

Epoch 1/200

Epoch 00001: val_categorical_accuracy improved from -inf to 0.73667, saving model to tune/add_more_dense1.h5
Epoch 2/200

Epoch 00002: val_categorical_accuracy did not improve from 0.73667
Epoch 3/200

Epoch 00003: val_categorical_accuracy did not improve from 0.73667
Epoch 4/200

Epoch 00004: val_categorical_accuracy did not improve from 0.73667

Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.000800000037997961.
Epoch 5/200

Epoch 00005: val_categorical_accuracy did not improve from 0.73667
Epoch 6/200

Epoch 00006: val_categorical_accuracy did not improve from 0.73667
Epoch 7/200

Epoch 00007: val_categorical_accuracy did not improve from 0.73667
Epoch 8/200

Epoch 00008: val_categorical_accuracy did not improve from 0.73667
Epoch 9/200

Epoch 00009: val_categorical_accuracy did not improve from 0.73667

Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.0006400000303983689.
Epoch 10/200

Epoch 00010: val_categorical_accuracy did not improve from 0.73


Epoch 00033: val_categorical_accuracy did not improve from 0.75299
Epoch 34/200

Epoch 00034: val_categorical_accuracy did not improve from 0.75299
Epoch 35/200

Epoch 00035: val_categorical_accuracy did not improve from 0.75299

Epoch 00035: ReduceLROnPlateau reducing learning rate to 0.0002621440216898918.
Epoch 36/200

Epoch 00036: val_categorical_accuracy did not improve from 0.75299
Epoch 37/200

Epoch 00037: val_categorical_accuracy did not improve from 0.75299
Epoch 38/200

Epoch 00038: val_categorical_accuracy did not improve from 0.75299
Epoch 39/200

Epoch 00039: val_categorical_accuracy did not improve from 0.75299
Epoch 40/200

Epoch 00040: val_categorical_accuracy did not improve from 0.75299

Epoch 00040: ReduceLROnPlateau reducing learning rate to 0.00020971521735191345.
Epoch 41/200

Epoch 00041: val_categorical_accuracy did not improve from 0.75299
Epoch 42/200

Epoch 00042: val_categorical_accuracy did not improve from 0.75299
Epoch 43/200

Epoch 00043: val_categoric


Epoch 00066: val_categorical_accuracy did not improve from 0.75299
Epoch 67/200

Epoch 00067: val_categorical_accuracy did not improve from 0.75299
Epoch 68/200

Epoch 00068: val_categorical_accuracy did not improve from 0.75299
Epoch 69/200

Epoch 00069: val_categorical_accuracy did not improve from 0.75299
Epoch 70/200

Epoch 00070: val_categorical_accuracy did not improve from 0.75299
Epoch 71/200

Epoch 00071: val_categorical_accuracy did not improve from 0.75299
Epoch 72/200

Epoch 00072: val_categorical_accuracy did not improve from 0.75299
Epoch 73/200

Epoch 00073: val_categorical_accuracy did not improve from 0.75299
Epoch 74/200

Epoch 00074: val_categorical_accuracy did not improve from 0.75299
Epoch 75/200

Epoch 00075: val_categorical_accuracy did not improve from 0.75299
Epoch 76/200

Epoch 00076: val_categorical_accuracy did not improve from 0.75299
Epoch 77/200

Epoch 00077: val_categorical_accuracy did not improve from 0.75299
Epoch 78/200

Epoch 00078: val_categorica


Epoch 00101: val_categorical_accuracy did not improve from 0.75299
Epoch 102/200

Epoch 00102: val_categorical_accuracy did not improve from 0.75299
Epoch 103/200

Epoch 00103: val_categorical_accuracy did not improve from 0.75299
Epoch 104/200

Epoch 00104: val_categorical_accuracy did not improve from 0.75299
Epoch 105/200

Epoch 00105: val_categorical_accuracy did not improve from 0.75299
Epoch 106/200

Epoch 00106: val_categorical_accuracy did not improve from 0.75299
Epoch 107/200

Epoch 00107: val_categorical_accuracy did not improve from 0.75299
Epoch 108/200

Epoch 00108: val_categorical_accuracy did not improve from 0.75299
Epoch 109/200

Epoch 00109: val_categorical_accuracy did not improve from 0.75299
Epoch 110/200

Epoch 00110: val_categorical_accuracy did not improve from 0.75299
Epoch 111/200

Epoch 00111: val_categorical_accuracy did not improve from 0.75299
Epoch 112/200

Epoch 00112: val_categorical_accuracy did not improve from 0.75299
Epoch 113/200

Epoch 00113: va


Epoch 00136: val_categorical_accuracy did not improve from 0.75299
Epoch 137/200

Epoch 00137: val_categorical_accuracy did not improve from 0.75299
Epoch 138/200

Epoch 00138: val_categorical_accuracy did not improve from 0.75299
Epoch 139/200

Epoch 00139: val_categorical_accuracy did not improve from 0.75299
Epoch 140/200

Epoch 00140: val_categorical_accuracy did not improve from 0.75299
Epoch 141/200

Epoch 00141: val_categorical_accuracy did not improve from 0.75299
Epoch 142/200

Epoch 00142: val_categorical_accuracy did not improve from 0.75299
Epoch 143/200

Epoch 00143: val_categorical_accuracy did not improve from 0.75299
Epoch 144/200

Epoch 00144: val_categorical_accuracy did not improve from 0.75299
Epoch 145/200

Epoch 00145: val_categorical_accuracy did not improve from 0.75299
Epoch 146/200

Epoch 00146: val_categorical_accuracy did not improve from 0.75299
Epoch 147/200

Epoch 00147: val_categorical_accuracy did not improve from 0.75299
Epoch 148/200

Epoch 00148: va


Epoch 00171: val_categorical_accuracy did not improve from 0.75299
Epoch 172/200

Epoch 00172: val_categorical_accuracy did not improve from 0.75299
Epoch 173/200

Epoch 00173: val_categorical_accuracy did not improve from 0.75299
Epoch 174/200

Epoch 00174: val_categorical_accuracy did not improve from 0.75299
Epoch 175/200

Epoch 00175: val_categorical_accuracy did not improve from 0.75299
Epoch 176/200

Epoch 00176: val_categorical_accuracy did not improve from 0.75299
Epoch 177/200

Epoch 00177: val_categorical_accuracy did not improve from 0.75299
Epoch 178/200

Epoch 00178: val_categorical_accuracy did not improve from 0.75299
Epoch 179/200

Epoch 00179: val_categorical_accuracy did not improve from 0.75299
Epoch 180/200

Epoch 00180: val_categorical_accuracy did not improve from 0.75299
Epoch 181/200

Epoch 00181: val_categorical_accuracy did not improve from 0.75299
Epoch 182/200

Epoch 00182: val_categorical_accuracy did not improve from 0.75299
Epoch 183/200

Epoch 00183: va

<tensorflow.python.keras.callbacks.History at 0x7fa9d0e95810>

In [198]:
model = load_model(model_fp)
print('train loss {} accuracy {}'.format(*model.evaluate(train_gen, steps=train_steps)))

train loss 1.2557694911956787 accuracy 0.4284784197807312


In [199]:
model = load_model(model_fp)
print('train loss {} accuracy {}'.format(*model.evaluate(val_gen, steps=val_steps)))

train loss 0.8216578364372253 accuracy 0.7533317804336548


## Remove first Dropout layer and Add Dense layer, Reduce Batch size

In [206]:
def create_toplayers():
    """Create top layer using inceptionb resnet v2 base"""
    # Create input layer based on the output of the convolutional base
    lyr_input = Input(shape=(base.layers[-1].output.shape.as_list()[1], ))

    # Add Dense
    lyr_dense1 = Dense(1024, activation='relu')(lyr_input)
    lyr_dense2 = Dense(512, activation='relu')(lyr_dense1)
    lyr_dense3 = Dense(256, activation='relu')(lyr_dense2)


    # Create output layer
    output = Dense(5, activation='softmax')(lyr_dense3)

    model = Model(inputs=[lyr_input], outputs=[output])
    model.compile(optimizer = 'rmsprop', loss = 'categorical_crossentropy',
                  metrics = ['categorical_accuracy'])
    return model

In [212]:
batch_size = 1024
train_gen = data_generator(train_labels, train_data, batch_size)
val_gen = data_generator(val_labels, val_data, batch_size)
train_steps = len(train_labels) // batch_size
val_steps = len(val_labels) // batch_size

In [213]:
model_fp = 'tune/add_more_dense2.h5'
checkpoint = ModelCheckpoint(model_fp,
                             monitor='val_categorical_accuracy',
                             verbose=1, save_best_only=True)

reduceLROnPlat = ReduceLROnPlateau(monitor='val_categorical_accuracy',
                                   factor=0.8,
                                   patience=3,
                                   cooldown=3,
                                   verbose=1,
                                   min_lr=0.0001)
early = EarlyStopping(monitor="val_categorical_accuracy", 
                      verbose=1,
                      patience=30)

In [214]:
model = create_toplayers()
model.fit(train_gen,
          steps_per_epoch=train_steps,
          validation_data = val_gen,
          validation_steps = val_steps,
          epochs=200,
          callbacks=[checkpoint, reduceLROnPlat])

Epoch 1/200

Epoch 00001: val_categorical_accuracy improved from -inf to 0.73667, saving model to tune/add_more_dense2.h5
Epoch 2/200

Epoch 00002: val_categorical_accuracy did not improve from 0.73667
Epoch 3/200

Epoch 00003: val_categorical_accuracy did not improve from 0.73667
Epoch 4/200

Epoch 00004: val_categorical_accuracy did not improve from 0.73667

Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.000800000037997961.
Epoch 5/200

Epoch 00005: val_categorical_accuracy did not improve from 0.73667
Epoch 6/200

Epoch 00006: val_categorical_accuracy did not improve from 0.73667
Epoch 7/200

Epoch 00007: val_categorical_accuracy did not improve from 0.73667
Epoch 8/200

Epoch 00008: val_categorical_accuracy did not improve from 0.73667
Epoch 9/200

Epoch 00009: val_categorical_accuracy did not improve from 0.73667

Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.0006400000303983689.
Epoch 10/200

Epoch 00010: val_categorical_accuracy did not improve from 0.73


Epoch 00033: val_categorical_accuracy did not improve from 0.73667
Epoch 34/200

Epoch 00034: val_categorical_accuracy did not improve from 0.73667

Epoch 00034: ReduceLROnPlateau reducing learning rate to 0.00020971521735191345.
Epoch 35/200

Epoch 00035: val_categorical_accuracy did not improve from 0.73667
Epoch 36/200

Epoch 00036: val_categorical_accuracy did not improve from 0.73667
Epoch 37/200

Epoch 00037: val_categorical_accuracy did not improve from 0.73667
Epoch 38/200

Epoch 00038: val_categorical_accuracy did not improve from 0.73667
Epoch 39/200

Epoch 00039: val_categorical_accuracy did not improve from 0.73667

Epoch 00039: ReduceLROnPlateau reducing learning rate to 0.00016777217388153076.
Epoch 40/200

Epoch 00040: val_categorical_accuracy did not improve from 0.73667
Epoch 41/200

Epoch 00041: val_categorical_accuracy did not improve from 0.73667
Epoch 42/200

Epoch 00042: val_categorical_accuracy did not improve from 0.73667
Epoch 43/200

Epoch 00043: val_categori


Epoch 00067: val_categorical_accuracy did not improve from 0.73667
Epoch 68/200

Epoch 00068: val_categorical_accuracy did not improve from 0.73667
Epoch 69/200

Epoch 00069: val_categorical_accuracy did not improve from 0.73667
Epoch 70/200

Epoch 00070: val_categorical_accuracy did not improve from 0.73667
Epoch 71/200

Epoch 00071: val_categorical_accuracy did not improve from 0.73667
Epoch 72/200

Epoch 00072: val_categorical_accuracy did not improve from 0.73667
Epoch 73/200

Epoch 00073: val_categorical_accuracy did not improve from 0.73667
Epoch 74/200

Epoch 00074: val_categorical_accuracy did not improve from 0.73667
Epoch 75/200

Epoch 00075: val_categorical_accuracy did not improve from 0.73667
Epoch 76/200

Epoch 00076: val_categorical_accuracy did not improve from 0.73667
Epoch 77/200

Epoch 00077: val_categorical_accuracy did not improve from 0.73667
Epoch 78/200

Epoch 00078: val_categorical_accuracy did not improve from 0.73667
Epoch 79/200

Epoch 00079: val_categorica


Epoch 00102: val_categorical_accuracy did not improve from 0.73667
Epoch 103/200

Epoch 00103: val_categorical_accuracy did not improve from 0.73667
Epoch 104/200

Epoch 00104: val_categorical_accuracy did not improve from 0.73667
Epoch 105/200

Epoch 00105: val_categorical_accuracy did not improve from 0.73667
Epoch 106/200

Epoch 00106: val_categorical_accuracy did not improve from 0.73667
Epoch 107/200

Epoch 00107: val_categorical_accuracy did not improve from 0.73667
Epoch 108/200

Epoch 00108: val_categorical_accuracy did not improve from 0.73667
Epoch 109/200

Epoch 00109: val_categorical_accuracy did not improve from 0.73667
Epoch 110/200

Epoch 00110: val_categorical_accuracy did not improve from 0.73667
Epoch 111/200

Epoch 00111: val_categorical_accuracy did not improve from 0.73667
Epoch 112/200

Epoch 00112: val_categorical_accuracy did not improve from 0.73667
Epoch 113/200

Epoch 00113: val_categorical_accuracy did not improve from 0.73667
Epoch 114/200

Epoch 00114: va


Epoch 00136: val_categorical_accuracy did not improve from 0.73794
Epoch 137/200

Epoch 00137: val_categorical_accuracy did not improve from 0.73794
Epoch 138/200

Epoch 00138: val_categorical_accuracy did not improve from 0.73794
Epoch 139/200

Epoch 00139: val_categorical_accuracy did not improve from 0.73794
Epoch 140/200

Epoch 00140: val_categorical_accuracy did not improve from 0.73794
Epoch 141/200

Epoch 00141: val_categorical_accuracy did not improve from 0.73794
Epoch 142/200

Epoch 00142: val_categorical_accuracy did not improve from 0.73794
Epoch 143/200

Epoch 00143: val_categorical_accuracy did not improve from 0.73794
Epoch 144/200

Epoch 00144: val_categorical_accuracy did not improve from 0.73794
Epoch 145/200

Epoch 00145: val_categorical_accuracy did not improve from 0.73794
Epoch 146/200

Epoch 00146: val_categorical_accuracy did not improve from 0.73794
Epoch 147/200

Epoch 00147: val_categorical_accuracy did not improve from 0.73794
Epoch 148/200

Epoch 00148: va


Epoch 00171: val_categorical_accuracy did not improve from 0.73794
Epoch 172/200

Epoch 00172: val_categorical_accuracy did not improve from 0.73794
Epoch 173/200

Epoch 00173: val_categorical_accuracy did not improve from 0.73794
Epoch 174/200

Epoch 00174: val_categorical_accuracy did not improve from 0.73794
Epoch 175/200

Epoch 00175: val_categorical_accuracy did not improve from 0.73794
Epoch 176/200

Epoch 00176: val_categorical_accuracy did not improve from 0.73794
Epoch 177/200

Epoch 00177: val_categorical_accuracy did not improve from 0.73794
Epoch 178/200

Epoch 00178: val_categorical_accuracy did not improve from 0.73794
Epoch 179/200

Epoch 00179: val_categorical_accuracy did not improve from 0.73794
Epoch 180/200

Epoch 00180: val_categorical_accuracy did not improve from 0.73794
Epoch 181/200

Epoch 00181: val_categorical_accuracy did not improve from 0.73794
Epoch 182/200

Epoch 00182: val_categorical_accuracy did not improve from 0.73794
Epoch 183/200

Epoch 00183: va

<tensorflow.python.keras.callbacks.History at 0x7fa9bb763450>

In [215]:
model = load_model(model_fp)
print('train loss {} accuracy {}'.format(*model.evaluate(train_gen, steps=train_steps)))

train loss 1.0655215978622437 accuracy 0.52099609375


In [216]:
model = load_model(model_fp)
print('train loss {} accuracy {}'.format(*model.evaluate(val_gen, steps=val_steps)))

train loss 0.8140085935592651 accuracy 0.73828125


## Remove first Dropout layer and Add Dense layer, Reduce Batch size perform sampling using mean as n_samples

In [None]:
def create_toplayers():
    """Create top layer using inceptionb resnet v2 base"""
    # Create input layer based on the output of the convolutional base
    lyr_input = Input(shape=(base.layers[-1].output.shape.as_list()[1], ))

    # Add Dense
    lyr_dense1 = Dense(1024, activation='relu')(lyr_input)
    lyr_dense2 = Dense(512, activation='relu')(lyr_dense1)
    lyr_dense3 = Dense(256, activation='relu')(lyr_dense2)


    # Create output layer
    output = Dense(5, activation='softmax')(lyr_dense3)

    model = Model(inputs=[lyr_input], outputs=[output])
    model.compile(optimizer = 'rmsprop', loss = 'categorical_crossentropy',
                  metrics = ['categorical_accuracy'])
    return model

In [None]:
batch_size = 1024
train_gen = data_generator(train_labels, train_data, batch_size)
val_gen = data_generator(val_labels, val_data, batch_size)
train_steps = len(train_labels) // batch_size
val_steps = len(val_labels) // batch_size

In [None]:
model_fp = 'tune/add_more_dense3.h5'
checkpoint = ModelCheckpoint(model_fp,
                             monitor='val_categorical_accuracy',
                             verbose=1, save_best_only=True)

reduceLROnPlat = ReduceLROnPlateau(monitor='val_categorical_accuracy',
                                   factor=0.8,
                                   patience=3,
                                   cooldown=3,
                                   verbose=1,
                                   min_lr=0.0001)
early = EarlyStopping(monitor="val_categorical_accuracy", 
                      verbose=1,
                      patience=30)

In [None]:
model = create_toplayers()
model.fit(train_gen,
          steps_per_epoch=train_steps,
          validation_data = val_gen,
          validation_steps = val_steps,
          epochs=200,
          callbacks=[checkpoint, reduceLROnPlat])

In [None]:
model = load_model(model_fp)
print('train loss {} accuracy {}'.format(*model.evaluate(train_gen, steps=train_steps)))

In [None]:
model = load_model(model_fp)
print('train loss {} accuracy {}'.format(*model.evaluate(val_gen, steps=val_steps)))