In [7]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from shutil import copyfile
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.utils import to_categorical
from keras.applications import VGG16

In [2]:
def createDirectoryEnvironment(train_data, train_targets, val_data, val_targets, path):
    trainpath = path + 'train/'
    valpath = path + 'validation/'
    if(os.listdir() == 2):
        return trainpath,valpath
    if(not os.path.exists(trainpath)):
        os.mkdir(trainpath)
    if(not os.path.exists(valpath)):
        os.mkdir(valpath)
        
    classes = train_targets.columns.append(val_targets.columns).drop_duplicates()  
    for c in classes:
        train_class = trainpath + c + '/'
        val_class = valpath + c + '/'
        if(not os.path.exists(train_class)):
            os.mkdir(train_class)
        if(not os.path.exists(val_class)):
            os.mkdir(val_class)
            
    for f in train_data.index.values:
        copyfile(path + '../CV/' + f[:f.find('frame')-1] + '/icm/' + f, trainpath + train_targets.loc[f].idxmax(axis = 1) + '/' + f)
    for f in val_data.index.values:
        copyfile(path + '../CV/' + f[:f.find('frame')-1] + '/icm/' + f, valpath + val_targets.loc[f].idxmax(axis = 1) + '/' + f)
    
    return trainpath, valpath

In [5]:
folder_path = "/home/diegues/Desktop/ProcessedImages/"
data = pd.read_csv(folder_path + "sampled_data.csv")

filenames = data['filename']
targets = data['level3']

# one-hot encoding
targets_ohe = pd.get_dummies(targets)

# dealing with NaNs
data = data.drop(['roll', 'pitch', 'level1', 'level2', 'level3', 'level4', 
                  'level5', 'level6', 'AphiaID', 'EunisName', 'EunisCode', 
                  'date', 'timestamp', 'species'],
                 axis = 1)

X = data.groupby('filename').max()
Y = pd.concat([filenames,targets_ohe], axis = 1).groupby('filename').max()


# tts
train_X, test_X, train_Y, test_Y = train_test_split(X, Y, test_size = 0.3, random_state=47)

images_path = '/home/diegues/Desktop/ProcessedImages/SampledData/'
train_dir, val_dir = createDirectoryEnvironment(train_X, train_Y, test_X, test_Y, images_path)

In [3]:
vgg16_ft = VGG16(weights = 'imagenet', include_top=False, input_shape=(344, 344, 3))
vgg16_ft.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 344, 344, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 344, 344, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 344, 344, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 172, 172, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 172, 172, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 172, 172, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 86, 86, 128)       0         
__________

In [4]:
eunis_classifier = Sequential()
eunis_classifier.add(vgg16_ft)
eunis_classifier.add(Flatten())
eunis_classifier.add(Dense(1024, activation='relu'))
eunis_classifier.add(Dropout(0.5))
eunis_classifier.add(Dense(1024, activation='relu'))
eunis_classifier.add(Dropout(0.5))
eunis_classifier.add(Dense(1024, activation='relu'))
eunis_classifier.add(Dropout(0.5))
eunis_classifier.add(Dense(7, activation='softmax'))

eunis_classifier.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 10, 10, 512)       14714688  
_________________________________________________________________
flatten_1 (Flatten)          (None, 51200)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 1024)              52429824  
_________________________________________________________________
dropout_1 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 1024)              1049600   
_________________________________________________________________
dropout_2 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 1024)              1049600   
__________

In [13]:
eunis_classifier.layers[0].trainable = False

In [16]:
for layer in eunis_classifier.layers:
    print(layer, layer.trainable)

<keras.engine.training.Model object at 0x7fa38cb8fa90> False
<keras.layers.core.Flatten object at 0x7fa38c572fd0> True
<keras.layers.core.Dense object at 0x7fa38c574048> True
<keras.layers.core.Dropout object at 0x7fa38c5b54a8> True
<keras.layers.core.Dense object at 0x7fa38c60c1d0> True
<keras.layers.core.Dropout object at 0x7fa38c60c438> True
<keras.layers.core.Dense object at 0x7fa38cae4d30> True
<keras.layers.core.Dropout object at 0x7fa38c5ded68> True
<keras.layers.core.Dense object at 0x7fa38caaec18> True


In [5]:
train_datagen = ImageDataGenerator(
      rescale=1./255,
      rotation_range=20,
      width_shift_range=0.2,
      height_shift_range=0.2,
      horizontal_flip=True,
      fill_mode='nearest')
 
validation_datagen = ImageDataGenerator(rescale=1./255)
 
train_batchsize = 4
val_batchsize = 4
 
train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=(344, 344),
        batch_size=train_batchsize,
        class_mode='categorical')
 
validation_generator = validation_datagen.flow_from_directory(
        val_dir,
        target_size=(344, 344),
        batch_size=val_batchsize,
        class_mode='categorical',
        shuffle=False)

NameError: name 'train_dir' is not defined

In [8]:
sgd_optimizer = SGD(lr=0.01, decay=0.5)
eunis_classifier.compile(loss='categorical_crossentropy',
                        optimizer=sgd_optimizer, 
                        metrics=['acc'])
tensorboard = TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=True)
best_checkpoint = ModelCheckpoint('./weights/FT-VGG16-Best.h5', save_best_only=True)
earlystop = EarlyStopping(monitor='val_loss', patience=5, verbose=1)
reducer = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, min_delta=1e-5, min_lr=1e-7)

In [None]:
history = eunis_classifier.fit_generator(train_generator, 
                                         steps_per_epoch=len(train_X)//train_batchsize,
                                         epochs=100,
                                         validation_data=validation_generator,
                                         validation_steps = len(test_X)//val_batchsize,
                                         callbacks=[tensorboard, best_checkpoint, earlystop, reducer]
                                        )

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
 
epochs = range(len(acc))
 
plt.plot(epochs, acc, 'b', label='Training acc')
plt.plot(epochs, val_acc, 'r', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
 
plt.figure()
 
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
 
plt.show()