In [1]:
import h5py
import numpy as np
import sklearn
from sklearn.svm import SVC

In [2]:
# utilities
def flatten(x):
    return x.reshape((x.shape[0], -1))

def classSampling(X, y, samplesPerClass, numberOfClasses):
    X_ret = np.zeros((samplesPerClass * numberOfClasses, X.shape[1]), dtype = np.float32)
    y_ret = np.zeros((samplesPerClass * numberOfClasses), dtype = np.uint8)
    count = 0

    for classIdx in range(numberOfClasses):
        indices = np.where(y == classIdx)[0]

        #if len(indices) < samplesPerClass:
        #    raise IndexError("Not enough samples for class {} to produce {} samples per class. Only {} class samples available".format(classIdx, samplesPerClass, len(indices)))

        doResample = len(indices) < samplesPerClass

        chosenIndices = np.random.choice(indices, samplesPerClass, replace = doResample)

        for ci in chosenIndices:
            X_ret[count] = X[ci]
            y_ret[count] = y[ci]

            count += 1

    return X_ret, y_ret

In [3]:
# main params
NUM_CLASSES_WATERTANK = 11
NUM_CLASSES_TURNEDTABLE = 12

# SAMPLES_PER_CLASS = [1, 5, 10, 20, 30, 40, 50]
SAMPLES_PER_CLASS = [10, 20, 30, 40, 50, 80, 110, 140, 170, 200]

TRIALS = 10

## SVM Classifier on Turnedtable Watertank (Dataset 2)

In [4]:
# Turnedtable Watertank Data

class SonarTurnedTableSupervised(object):
    def __init__(self, file_path):
        self.file_path = file_path

    def _normalize_images(self, images):
        """
        Normalize sonar images by 1/255.
        """
        return [element/255.0 for element in images]

    def get_sonar_data(self):
        """
        Reads from HDF5 file containing sonar data (resized to fix dims).
        Returns list of np arrays containing image data.
        """

        print("[INFO] Retrieving Sonar Turned Table Supervised Data")

        with h5py.File(self.file_path, "r") as f:
            # list all groups
            print("hdf5 dataset keys: %s" % f.keys())

            x_train = f["x_train"][...].astype(np.float32)
            y_train = f["y_train"][...]

            # x_val = list(f["x_val"])
            # y_val = list(f["y_val"])

            x_test = f["x_test"][...].astype(np.float32)
            y_test = f["y_test"][...]

            # x_train, x_val, y_train, y_val = train_test_split(x_train_val, y_train_val, train_size=0.83)

            print("[INFO] Data dimensions")
            print("Train", len(x_train))
            # print("Val", len(x_val))
            print("Test", len(x_test))
            # pdb.set_trace()

        # x_train = self._normalize_images(x_train)
        # x_val = self._normalize_images(x_val)
        # x_test = self._normalize_images(x_test)

        # return (x_train, y_train), (x_val, y_val), (x_test, y_test)
        
        return (x_train, y_train), (x_test, y_test)


In [7]:
ls ../../../../../../

[0m[01;34mdatasets[0m/  [01;34mdocumentation[0m/  [01;34mliterature[0m/  [01;34mmodels[0m/  [01;34mnotes[0m/  [01;34mros[0m/


In [8]:
# Path to file
turnedtable_file_path = "../../../../../../datasets/sonar_turntable_dataset_2/marine-debris-turntable-classification-object_classes-platform-96x96.hdf5"

# Load data
turnedtable_dataset_object = SonarTurnedTableSupervised(turnedtable_file_path)
(x_train, y_train), (x_test, y_test) = turnedtable_dataset_object.get_sonar_data()

[INFO] Retrieving Sonar Turned Table Supervised Data
hdf5 dataset keys: <KeysViewHDF5 ['class_names', 'x_test', 'x_train', 'y_test', 'y_train']>
[INFO] Data dimensions
Train 1505
Test 645


In [9]:
x_train.shape

(1505, 96, 96, 1)

In [10]:
# Flatten train & test data for SVM
x_train = flatten(x_train)
x_test = flatten(x_test)

print(x_train.shape)
print(x_test.shape)

(1505, 9216)
(645, 9216)


In [11]:
y_test

array([ 3,  9,  2,  1,  1,  0,  3,  0, 11,  0,  3,  1, 10,  0,  0,  0,  4,
        0,  0,  9,  2,  0,  8,  0,  3,  2,  9,  2,  0,  1,  0,  9,  4,  0,
       10,  0,  0,  9,  9,  1,  0,  0,  8,  1,  2,  2,  3,  0,  0,  0,  5,
        0,  7, 10,  0,  0,  0, 11,  0, 11,  2, 10,  7,  2,  6,  0,  5,  0,
        9, 11,  8,  8,  4,  8,  0,  6,  0,  5,  0,  0,  0,  0,  2,  2,  1,
        0,  3,  0,  0,  9,  2,  1,  0,  7,  2,  0,  0,  0, 10,  2,  0,  6,
        8,  0,  6,  3,  9,  0,  0, 10,  9,  7,  7, 11,  1,  5,  9, 11, 11,
        1,  2,  0,  0,  0,  1,  8,  0,  0,  2,  5,  0,  0,  0, 10,  8,  9,
        9,  0,  0,  9,  6,  4,  0,  7,  2,  9,  7,  8,  0,  5,  1,  9,  0,
        9,  2,  5,  0,  2,  2,  0, 11,  4,  8,  9,  2,  6,  1,  0,  0, 10,
        0,  0,  0,  0,  6,  0,  0,  0,  2, 10, 11,  0,  2,  0,  4,  2,  6,
        9,  3,  9,  5,  5,  0,  0,  9,  9,  3,  3,  7,  4,  6,  8,  0,  0,
        0, 11,  6,  2,  5,  0,  0,  9, 11,  2,  8,  6,  1,  0,  7,  5,  1,
       11,  0,  0,  0,  3

### Pretrainig setup: classification full turnedtable test set (your pipeline)

In [33]:
svm = SVC(C=1.0, decision_function_shape = 'ovo', kernel="linear")
# svm.fit(x_sample, y_sample)
svm.fit(x_train, y_train)

train_acc = svm.score(x_train, y_train)
test_acc = svm.score(x_test, y_test)

print("Full train set accuracy: {:.3f}".format(train_acc))
print("Full test set accuracy: {:.3f}".format(test_acc))
print()

Full train set accuracy: 1.000
Full test set accuracy: 0.974



In [36]:
# value above is the same one as matias one!

### Transfer Learning setup: classification with subsamples per class (your pipeline)

In [12]:
for spc in SAMPLES_PER_CLASS:
    accuracies = []
    
    for i in range(TRIALS):
        x_sample, y_sample = classSampling(x_train, y_train, spc, NUM_CLASSES_TURNEDTABLE)
        
        svm = SVC(C=1.0, decision_function_shape = 'ovo', kernel="linear")
        svm.fit(x_sample, y_sample)

        train_acc = svm.score(x_sample, y_sample)
        test_acc = svm.score(x_test, y_test)
        
        print("SPC {} Train Accuracy: {:.3f}".format(spc, train_acc))
        print("SPC {} Test Accuracy: {:.3f}".format(spc, test_acc))
        print()
        
        accuracies.append(test_acc)
    
    mean_acc = np.mean(accuracies)
    std_acc = np.std(accuracies)

    mean_acc = round(100 * mean_acc, 3)
    std_acc = round(100 * std_acc, 3)
    
    print("After {} trials - Test Accuracy is {} +- {}".format(TRIALS, mean_acc, std_acc ))
    print("------------------------------------------------------------------------------")
    print()

SPC 10 Train Accuracy: 1.000
SPC 10 Test Accuracy: 0.608

SPC 10 Train Accuracy: 1.000
SPC 10 Test Accuracy: 0.659

SPC 10 Train Accuracy: 1.000
SPC 10 Test Accuracy: 0.709

SPC 10 Train Accuracy: 1.000
SPC 10 Test Accuracy: 0.603

SPC 10 Train Accuracy: 1.000
SPC 10 Test Accuracy: 0.580

SPC 10 Train Accuracy: 1.000
SPC 10 Test Accuracy: 0.678

SPC 10 Train Accuracy: 1.000
SPC 10 Test Accuracy: 0.622

SPC 10 Train Accuracy: 1.000
SPC 10 Test Accuracy: 0.651

SPC 10 Train Accuracy: 1.000
SPC 10 Test Accuracy: 0.616

SPC 10 Train Accuracy: 1.000
SPC 10 Test Accuracy: 0.631

After 10 trials - Test Accuracy is 63.55 +- 3.663
------------------------------------------------------------------------------

SPC 20 Train Accuracy: 1.000
SPC 20 Test Accuracy: 0.729

SPC 20 Train Accuracy: 1.000
SPC 20 Test Accuracy: 0.767

SPC 20 Train Accuracy: 1.000
SPC 20 Test Accuracy: 0.771

SPC 20 Train Accuracy: 1.000
SPC 20 Test Accuracy: 0.743

SPC 20 Train Accuracy: 1.000
SPC 20 Test Accuracy: 0.792



In [None]:
# these are for my results 

## SVM Classifier on Marine Debris Watertank (Dataset 1)

In [5]:
# Watertank Data

class SonarDebrisSupervised(object):
    def __init__(self, file_path):
        self.file_path = file_path

    def _normalize_images(self, images):

        """
        """
        # NOTE: HDF5 images are already normalized
        return [element/255.0 for element in images]

    def get_sonar_data(self):

        """
        Reads from HDF5 file containing sonar data (resized to fix dims).
        Returns list of np arrays containing image data.
        """

        print("[INFO] Retrieving Sonar Debris Supervised Data")
        with h5py.File(self.file_path, "r") as f:
            # get images and labels
            x_train = f["x_train"][...].astype(np.float32)
            y_train = f["y_train"][...]

            x_val = list(f["x_val"])
            y_val = list(f["y_val"])

            x_test = f["x_test"][...].astype(np.float32)
            y_test = f["y_test"][...]

            print("[INFO] Data dimensions")
            print("Train", len(x_train))
            print("Val", len(x_val))
            print("Test", len(x_test))

        return (x_train, y_train), (x_val, y_val), (x_test, y_test)

In [6]:
# Path to file
watertank_file_path = "../../../../../datasets/sonar_debris_dataset_1/marine-debris-watertank-release/marine-debris-watertank-classification-96x96.hdf5"

# Load data
dataset_object = SonarDebrisSupervised(watertank_file_path)
(x_train, y_train), (x_val, y_val), (x_test, y_test) = dataset_object.get_sonar_data()

[INFO] Retrieving Sonar Debris Supervised Data
[INFO] Data dimensions
Train 1838
Val 394
Test 395


In [7]:
# Matias normalization
# x_train  *= 255.0
# x_test   *= 255.0

# x_train -= 84.51
# x_test  -= 84.51

In [8]:
x_train.shape

(1838, 96, 96, 1)

In [9]:
# Flatten train & test data for SVM
x_train = flatten(x_train)
x_test = flatten(x_test)

In [10]:
x_train.shape

(1838, 9216)

In [11]:
x_test.shape

(395, 9216)

### Pretraining setup: classification full test set (your pipeline)

In [12]:
# Run full test set SVM classification (your pipeline)

# x_sample, y_sample = classSampling(x_train, y_train, spc, NUM_CLASSES)
svm = SVC(C=1.0, decision_function_shape = 'ovo', kernel="linear")
# svm.fit(x_sample, y_sample)
svm.fit(x_train, y_train)

train_acc = svm.score(x_train, y_train)
test_acc = svm.score(x_test, y_test)

print("Full train set accuracy: {:.3f}".format(train_acc))
print("Full test set accuracy: {:.3f}".format(test_acc))
print()

Full train set accuracy: 1.000
Full test set accuracy: 0.967



### Transfer Learning setup: classification with subsamples per class (matias pipeline)

In [13]:
for spc in SAMPLES_PER_CLASS:
    accuracies = []
    
    for i in range(TRIALS):
        x_sample, y_sample = classSampling(x_train, y_train, spc, NUM_CLASSES_WATERTANK)
        
        svm = SVC(C=1.0, decision_function_shape = 'ovo', kernel="linear")
        svm.fit(x_sample, y_sample)

        train_acc = svm.score(x_sample, y_sample)
        test_acc = svm.score(x_test, y_test)
        
        print("SPC {} Train Accuracy: {:.3f}".format(spc, train_acc))
        print("SPC {} Test Accuracy: {:.3f}".format(spc, test_acc))
        print()
        
        accuracies.append(test_acc)
    
    mean_acc = np.mean(accuracies)
    std_acc = np.std(accuracies)

    mean_acc = round(100 * mean_acc, 3)
    std_acc = round(100 * std_acc, 3)
    
    print("After {} trials - Test Accuracy is {} +- {}".format(TRIALS, mean_acc, std_acc ))
    print("------------------------------------------------------------------------------")
    print()

SPC 1 Train Accuracy: 1.000
SPC 1 Test Accuracy: 0.539

SPC 1 Train Accuracy: 1.000
SPC 1 Test Accuracy: 0.385

SPC 1 Train Accuracy: 1.000
SPC 1 Test Accuracy: 0.365

SPC 1 Train Accuracy: 1.000
SPC 1 Test Accuracy: 0.430

SPC 1 Train Accuracy: 1.000
SPC 1 Test Accuracy: 0.408

SPC 1 Train Accuracy: 1.000
SPC 1 Test Accuracy: 0.367

SPC 1 Train Accuracy: 1.000
SPC 1 Test Accuracy: 0.415

SPC 1 Train Accuracy: 1.000
SPC 1 Test Accuracy: 0.562

SPC 1 Train Accuracy: 1.000
SPC 1 Test Accuracy: 0.478

SPC 1 Train Accuracy: 1.000
SPC 1 Test Accuracy: 0.395

After 10 trials - Test Accuracy is 43.443 +- 6.615
------------------------------------------------------------------------------

SPC 5 Train Accuracy: 1.000
SPC 5 Test Accuracy: 0.739

SPC 5 Train Accuracy: 1.000
SPC 5 Test Accuracy: 0.732

SPC 5 Train Accuracy: 1.000
SPC 5 Test Accuracy: 0.772

SPC 5 Train Accuracy: 1.000
SPC 5 Test Accuracy: 0.813

SPC 5 Train Accuracy: 1.000
SPC 5 Test Accuracy: 0.722

SPC 5 Train Accuracy: 1.000
S