## Evaluate transfer learning on Turnedtable Watertank (Dataset 2)  using an SVM Classifier: Jigsaw Puzzle pretraining


In [2]:
%config Completer.use_jedi = False
import keras
import keras.backend as K
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, Dropout, Flatten, BatchNormalization
from keras.layers import Activation, AveragePooling2D, UpSampling2D, Reshape
from keras.models import Model, Sequential
from keras.preprocessing.image import ImageDataGenerator
from keras.engine import InputLayer
from keras.datasets import cifar10
from keras.utils import to_categorical
from keras.optimizers import Adam, SGD
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras.models import load_model

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import tensorflow as tf
import pandas as pd
import numpy as np
import joblib
import h5py
import os

### configure Tensorflow/Keras for GPU device


In [3]:
tf.config.experimental_run_functions_eagerly(True)
print("[INFO] Tensorflow Version:", tf.__version__)

if tf.config.list_physical_devices("GPU") and tf.test.is_built_with_cuda():
    print("[INFO] Tensorflow built with CUDA")
    print("[INFO] Number GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
    print("[INFO] List of GPU devices:", tf.config.list_physical_devices("GPU"))
    physical_devices = tf.config.list_physical_devices("GPU")
    # tf.config.experimental.set_memory_growth(physical_devices[0], True)
    for gpu in physical_devices:
        tf.config.experimental.set_memory_growth(gpu, True)

else:
    print("[ERROR] GPU not detected, make sure tensorflow-gpu is installed and that GPU is recognized")
    exit()

[INFO] Tensorflow Version: 2.2.0
[INFO] Tensorflow built with CUDA
[INFO] Number GPUs Available:  1
[INFO] List of GPU devices: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


### define utilities



In [4]:
def flatten(x):
    return x.reshape((x.shape[0], -1))

def classSampling(X, y, samplesPerClass, numberOfClasses):
    X_ret = np.zeros((samplesPerClass * numberOfClasses, X.shape[1]), dtype = np.float32)
    y_ret = np.zeros((samplesPerClass * numberOfClasses), dtype = np.uint8)
    count = 0

    for classIdx in range(numberOfClasses):
        indices = np.where(y == classIdx)[0]

        #if len(indices) < samplesPerClass:
        #    raise IndexError("Not enough samples for class {} to produce {} samples per class. Only {} class samples available".format(classIdx, samplesPerClass, len(indices)))

        doResample = len(indices) < samplesPerClass

        chosenIndices = np.random.choice(indices, samplesPerClass, replace = doResample)

        for ci in chosenIndices:
            X_ret[count] = X[ci]
            y_ret[count] = y[ci]

            count += 1

    return X_ret, y_ret

### load turntable dataset 

In [5]:
class SonarTurnedTableSupervised(object):
    def __init__(self, file_path):
        self.file_path = file_path

    def get_sonar_data(self):
        """
        Reads from HDF5 file containing sonar data (resized to fix dims).
        Returns list of np arrays containing image data.
        """

        print("[INFO] Retrieving Sonar Turned Table Supervised Data")

        with h5py.File(self.file_path, "r") as f:
            # list all groups
            print("hdf5 dataset keys: %s" % f.keys())

            # get images and labels
            # x_train_val = list(f["x_train"])
            # y_train_val = list(f["y_train"])
            x_train = f["x_train"][...].astype(np.float32)
            y_train = f["y_train"][...]

            # x_test = list(f["x_test"])
            # y_test = list(f["y_test"])
            x_test = f["x_test"][...].astype(np.float32)
            y_test = f["y_test"][...]

            _, x_val, _, y_val = train_test_split(x_test, y_test, train_size=0.5)

            print("[INFO] Data dimensions")
            print("Train", len(x_train))
            print("Val", len(x_val))
            print("Test", len(x_test))

        return (x_train, y_train), (x_val, y_val), (x_test, y_test)
    
def load_sonar_turnedtable_supervised(file_path):
    """
    """
    print()
    print("[INFO] Loading Tf datasets")

    dataset_object = SonarTurnedTableSupervised(file_path)

    # Read data
    (x_train, y_train), (x_val, y_val), (x_test, y_test) = dataset_object.get_sonar_data()
    
    return (x_train, y_train), (x_val, y_val), (x_test, y_test)

In [6]:
turntable_data_path = "../../../datasets/sonar_turntable_dataset_2/marine-debris-turntable-classification-object_classes-platform-96x96.hdf5"
(x_train, y_train), (x_val, y_val), (x_test, y_test) = load_sonar_turnedtable_supervised(turntable_data_path)


[INFO] Loading Tf datasets
[INFO] Retrieving Sonar Turned Table Supervised Data
hdf5 dataset keys: <KeysViewHDF5 ['class_names', 'x_test', 'x_train', 'y_test', 'y_train']>
[INFO] Data dimensions
Train 1505
Val 323
Test 645


### load pretrained models 

* define no. permutations
* define intermediate layers

In [7]:
epochs = 21
no_permutations = 5 # 5 classes for wild data
cuts = 3
model_name = "feature_extractor_{}x{}_puzzle_{}_epochs_{}_perms".format(cuts, cuts, epochs, no_permutations)

feature_extractor = load_model(os.path.join("trained_models", "{}x{}".format(cuts, cuts), "{}_classes".format(no_permutations), model_name + ".h5"))
print()
feature_extractor.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 32, 32, 32)        320       
_________________________________________________________________
batch_normalization (BatchNo (None, 32, 32, 32)        128       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 16, 16, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 16, 16, 16)        4624      
_________________________________________________________________
batch_normalization_1 (Batch (None, 16, 16, 16)        64        
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 8, 8, 16)          

In [8]:
layers = ["dropout", "dropout_1", "dropout_2"]
feature_extractor_dropout0 = Model(inputs=feature_extractor.input, outputs=feature_extractor.get_layer(layers[0]).output)
feature_extractor_dropout1 = Model(inputs=feature_extractor.input, outputs=feature_extractor.get_layer(layers[1]).output)
feature_extractor_dropout2 = Model(inputs=feature_extractor.input, outputs=feature_extractor.get_layer(layers[2]).output)

In [9]:
feature_extractor_dropout1.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_input (InputLayer)    [(None, 32, 32, 1)]       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 32, 32, 32)        320       
_________________________________________________________________
batch_normalization (BatchNo (None, 32, 32, 32)        128       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 16, 16, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 16, 16, 16)        4624      
_________________________________________________________________
batch_normalization_1 (Batch (None, 16, 16, 16)        64  

### Define feature extractors with new input (96, 96, 1)

In [12]:
newInput = Input(shape=(96, 96, 1))

pretrained_model_dropout0 = feature_extractor_dropout0(newInput)
pretrained_model_dropout1 = feature_extractor_dropout1(newInput)
pretrained_model_dropout2 = feature_extractor_dropout2(newInput)

pretrained_model_dropout0 = Model(newInput, pretrained_model_dropout0)
pretrained_model_dropout1 = Model(newInput, pretrained_model_dropout1)
pretrained_model_dropout2 = Model(newInput, pretrained_model_dropout2)



In [13]:
pretrained_model_dropout0.summary()

Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 96, 96, 1)]       0         
_________________________________________________________________
model (Model)                multiple                  448       
Total params: 448
Trainable params: 384
Non-trainable params: 64
_________________________________________________________________


### generate vector embeddings for train and test data (for all models)

In [14]:
print(x_train.shape)
print(x_test.shape)
print()

# drop 0
train_embeddings_dropout0 = pretrained_model_dropout0.predict(x_train)
test_embeddings_dropout0 = pretrained_model_dropout0.predict(x_test)

# drop 1
train_embeddings_dropout1 = pretrained_model_dropout1.predict(x_train)
test_embeddings_dropout1 = pretrained_model_dropout1.predict(x_test)

# drop 2
train_embeddings_dropout2 = pretrained_model_dropout2.predict(x_train)
test_embeddings_dropout2 = pretrained_model_dropout2.predict(x_test)

(1505, 96, 96, 1)
(645, 96, 96, 1)













In [15]:
print("Dropout0 embeddings")
print(train_embeddings_dropout0.shape)
print(test_embeddings_dropout0.shape)
print()

print("Dropout1 embeddings")
print(train_embeddings_dropout1.shape)
print(test_embeddings_dropout1.shape)
print()

print("Dropout2 embeddings")
print(train_embeddings_dropout2.shape)
print(test_embeddings_dropout2.shape)
print()

Dropout0 embeddings
(1505, 48, 48, 32)
(645, 48, 48, 32)

Dropout1 embeddings
(1505, 24, 24, 16)
(645, 24, 24, 16)

Dropout2 embeddings
(1505, 12, 12, 8)
(645, 12, 12, 8)



### reshape into 2D for SVM classifier


In [16]:
train_embeddings_dropout0 = train_embeddings_dropout0.reshape(train_embeddings_dropout0.shape[0], -1)
test_embeddings_dropout0 = test_embeddings_dropout0.reshape(test_embeddings_dropout0.shape[0], -1)

train_embeddings_dropout1 = train_embeddings_dropout1.reshape(train_embeddings_dropout1.shape[0], -1)
test_embeddings_dropout1 = test_embeddings_dropout1.reshape(test_embeddings_dropout1.shape[0], -1)

train_embeddings_dropout2 = train_embeddings_dropout2.reshape(train_embeddings_dropout2.shape[0], -1)
test_embeddings_dropout2 = test_embeddings_dropout2.reshape(test_embeddings_dropout2.shape[0], -1)

print("Reshaped Dropout0 embeddings")
print(test_embeddings_dropout0.shape)
print(test_embeddings_dropout0.shape)
print()

print("Reshaped Dropout1 embeddings")
print(test_embeddings_dropout1.shape)
print(test_embeddings_dropout1.shape)
print()

print("Reshaped Dropout2 embeddings")
print(test_embeddings_dropout2.shape)
print(test_embeddings_dropout2.shape)

Reshaped Dropout0 embeddings
(645, 73728)
(645, 73728)

Reshaped Dropout1 embeddings
(645, 9216)
(645, 9216)

Reshaped Dropout2 embeddings
(645, 1152)
(645, 1152)


### Transfer Learning setup: classification with subsamples per object class (few shot learning)

In [17]:
SAMPLES_PER_CLASS = [10, 20, 30, 40, 50, 80, 110, 140, 170, 200]
# SAMPLES_PER_CLASS = [170, 200]
TRIALS = 5
NUM_CLASSES_TURNEDTABLE = 12

In [18]:
# NOTE: y_train and y_test are already numpy arrays
y_test.shape

(645,)

### Run svm tl evaluation with spc for each layer defined

In [22]:
def train_svm_with_spc(x_train, y_train, x_test, y_test, layer_name):
    """
    Takes embeddings from pretrained model and evaluates transfer learning 
    with few samples per class.
    """
    # NOTE: svm takes original labels (not one-hot encoding)
    for spc in SAMPLES_PER_CLASS:
        accuracies = []

        for i in range(TRIALS):
            x_sample, y_sample = classSampling(x_train, y_train, spc, NUM_CLASSES_TURNEDTABLE)
            
#             rf_clf = RandomForestClassifier(max_depth=2, random_state=0)
#             rf_clf.fit(x_sample, y_sample)
            
#             train_acc = rf_clf.score(x_sample, y_sample)
#             test_acc = rf_clf.score(x_test, y_test)
            
            svm = SVC(C=1.0, decision_function_shape = 'ovo', kernel="linear")
            svm.fit(x_sample, y_sample)
            
            train_acc = svm.score(x_sample, y_sample)
            test_acc = svm.score(x_test, y_test)

            print("SPC {} Train Accuracy: {:.3f}".format(spc, train_acc))
            print("SPC {} Test Accuracy: {:.3f}".format(spc, test_acc))
            print()

            accuracies.append(test_acc)

        mean_acc = np.mean(accuracies)
        std_acc = np.std(accuracies)

        mean_acc = round(100 * mean_acc, 3)
        std_acc = round(100 * std_acc, 3)
        

        print("After {} trials - Test Accuracy is {} +- {}".format(TRIALS, mean_acc, std_acc ))
        print("------------------------------------------------------------------------------")
        print()
        
        with open(os.path.join("svm_tl_evaluations", "jigsaw_svm_accs_logs", "{}x{}".format(cuts, cuts), model_name + "_" + layer_name + ".txt"), 'a') as f:
        # with open(os.path.join("svm_tl_evaluations", "jigsaw_svm_accs_logs", "{}x{}".format(cuts, cuts), "{}_classes".format(no_permutations), model_name + "_" + layer_name + ".txt"), 'a') as f:
            print("After {} trials - Test Accuracy is {} +- {}".format(TRIALS, mean_acc, std_acc ), file=f)
            print("------------------------------------------------------------------------------", file=f)
            print(file=f)

In [23]:
# os.path.join("svm_tl_evaluations", "{}x{}".format(cuts, cuts), "{}_classes".format(no_permutations), model_name + ".txt")

In [24]:
# dropout 0 
train_svm_with_spc(train_embeddings_dropout0, y_train, test_embeddings_dropout0, y_test, layers[0])

SPC 10 Train Accuracy: 1.000
SPC 10 Test Accuracy: 0.639

SPC 10 Train Accuracy: 1.000
SPC 10 Test Accuracy: 0.639

SPC 10 Train Accuracy: 1.000
SPC 10 Test Accuracy: 0.572

SPC 10 Train Accuracy: 1.000
SPC 10 Test Accuracy: 0.674

SPC 10 Train Accuracy: 1.000
SPC 10 Test Accuracy: 0.640

After 5 trials - Test Accuracy is 63.287 +- 3.33
------------------------------------------------------------------------------

SPC 20 Train Accuracy: 1.000
SPC 20 Test Accuracy: 0.715

SPC 20 Train Accuracy: 1.000
SPC 20 Test Accuracy: 0.752

SPC 20 Train Accuracy: 1.000
SPC 20 Test Accuracy: 0.698

SPC 20 Train Accuracy: 1.000
SPC 20 Test Accuracy: 0.763

SPC 20 Train Accuracy: 1.000
SPC 20 Test Accuracy: 0.769

After 5 trials - Test Accuracy is 73.922 +- 2.803
------------------------------------------------------------------------------

SPC 30 Train Accuracy: 1.000
SPC 30 Test Accuracy: 0.808

SPC 30 Train Accuracy: 1.000
SPC 30 Test Accuracy: 0.783

SPC 30 Train Accuracy: 1.000
SPC 30 Test Accu

In [25]:
# dropout 1
train_svm_with_spc(train_embeddings_dropout1, y_train, test_embeddings_dropout1, y_test, layers[1])

SPC 10 Train Accuracy: 1.000
SPC 10 Test Accuracy: 0.710

SPC 10 Train Accuracy: 1.000
SPC 10 Test Accuracy: 0.623

SPC 10 Train Accuracy: 1.000
SPC 10 Test Accuracy: 0.684

SPC 10 Train Accuracy: 1.000
SPC 10 Test Accuracy: 0.625

SPC 10 Train Accuracy: 1.000
SPC 10 Test Accuracy: 0.667

After 5 trials - Test Accuracy is 66.171 +- 3.373
------------------------------------------------------------------------------

SPC 20 Train Accuracy: 1.000
SPC 20 Test Accuracy: 0.772

SPC 20 Train Accuracy: 1.000
SPC 20 Test Accuracy: 0.794

SPC 20 Train Accuracy: 1.000
SPC 20 Test Accuracy: 0.764

SPC 20 Train Accuracy: 1.000
SPC 20 Test Accuracy: 0.788

SPC 20 Train Accuracy: 1.000
SPC 20 Test Accuracy: 0.755

After 5 trials - Test Accuracy is 77.457 +- 1.436
------------------------------------------------------------------------------

SPC 30 Train Accuracy: 1.000
SPC 30 Test Accuracy: 0.856

SPC 30 Train Accuracy: 1.000
SPC 30 Test Accuracy: 0.860

SPC 30 Train Accuracy: 1.000
SPC 30 Test Acc

In [26]:
# dropout 2
train_svm_with_spc(train_embeddings_dropout2, y_train, test_embeddings_dropout2, y_test, layers[2])

SPC 10 Train Accuracy: 1.000
SPC 10 Test Accuracy: 0.676

SPC 10 Train Accuracy: 1.000
SPC 10 Test Accuracy: 0.701

SPC 10 Train Accuracy: 1.000
SPC 10 Test Accuracy: 0.667

SPC 10 Train Accuracy: 1.000
SPC 10 Test Accuracy: 0.695

SPC 10 Train Accuracy: 1.000
SPC 10 Test Accuracy: 0.802

After 5 trials - Test Accuracy is 70.791 +- 4.841
------------------------------------------------------------------------------

SPC 20 Train Accuracy: 1.000
SPC 20 Test Accuracy: 0.791

SPC 20 Train Accuracy: 1.000
SPC 20 Test Accuracy: 0.755

SPC 20 Train Accuracy: 1.000
SPC 20 Test Accuracy: 0.797

SPC 20 Train Accuracy: 1.000
SPC 20 Test Accuracy: 0.814

SPC 20 Train Accuracy: 1.000
SPC 20 Test Accuracy: 0.788

After 5 trials - Test Accuracy is 78.884 +- 1.92
------------------------------------------------------------------------------

SPC 30 Train Accuracy: 1.000
SPC 30 Test Accuracy: 0.864

SPC 30 Train Accuracy: 1.000
SPC 30 Test Accuracy: 0.871

SPC 30 Train Accuracy: 1.000
SPC 30 Test Accu