In [5]:
import h5py
import numpy as np
import sklearn
from sklearn.svm import SVC

In [6]:
# utilities
def flatten(x):
    return x.reshape((x.shape[0], -1))

def classSampling(X, y, samplesPerClass, numberOfClasses):
    X_ret = np.zeros((samplesPerClass * numberOfClasses, X.shape[1]), dtype = np.float32)
    y_ret = np.zeros((samplesPerClass * numberOfClasses), dtype = np.uint8)
    count = 0

    for classIdx in range(numberOfClasses):
        indices = np.where(y == classIdx)[0]

        #if len(indices) < samplesPerClass:
        #    raise IndexError("Not enough samples for class {} to produce {} samples per class. Only {} class samples available".format(classIdx, samplesPerClass, len(indices)))

        doResample = len(indices) < samplesPerClass

        chosenIndices = np.random.choice(indices, samplesPerClass, replace = doResample)

        for ci in chosenIndices:
            X_ret[count] = X[ci]
            y_ret[count] = y[ci]

            count += 1

    return X_ret, y_ret

In [7]:
# main params
NUM_CLASSES_WATERTANK = 11
NUM_CLASSES_TURNEDTABLE = 12

SAMPLES_PER_CLASS = [1, 5, 10, 20, 30, 40, 50]
TRIALS = 10

## SVM Classifier on Turnedtable Watertank (Dataset 2)

In [27]:
# Turnedtable Watertank Data
class SonarTurnedTableSelfSupervised(object):
    def __init__(self, file_path):
        self.file_path = file_path

    def _normalize_images(self, images):

        """
        """
        # test = sample.reshape([-1, 1, 63, 63])
        # test = test.transpose([0, 2, 3, 1])
        return [element/255.0 for element in images]

    def get_sonar_data(self):

        """
        Reads from HDF5 file containing sonar data.
        Returns list of np arrays.
        Resized to 96x96 dimensions.
        """
        with h5py.File(self.file_path, "r") as f:
            # list all groups
            print("hdf5 dataset keys: %s" % f.keys())

            # get the data
            x_train = f["x_train"][...].astype(np.float32)
            x_val = f["x_val"][...].astype(np.float32)
            x_test = f["x_test"][...].astype(np.float32)

            print("[INFO] Original data dimensions")
            print("Train", len(x_train))
            print("Val", len(x_val))
            print("Test", len(x_test))

        return x_train, x_val, x_test

    def generate_rotations(self, images):
        """
        Rotates images and stores labels for each rotation.
        0: 0 degrees
        1: 90 degrees
        2: 180 degrees
        3: 270 degrees
        This is the core component of RotNet.
        """
        images_rotated = []
        labels_rotated = []
        for num_r in [0, 1, 2, 3]:
            for i in range(len(images)):
                r_im = np.rot90(images[i], k=num_r)
                images_rotated.append(r_im)
                labels_rotated.append(num_r)

        # return np.concatenate(images_rotated), np.array(labels_rotated)
        return np.asarray(images_rotated), np.array(labels_rotated)


In [29]:
# Path to file
turnedtable_file_path = "../../../../../datasets/sonar_turntable_dataset_2/marine-debris-turntable-classification-object_classes-platform-96x96.hdf5"

# Load data
dataset_object = SonarTurnedTableSelfSupervised(turnedtable_file_path)
orig_x_train, orig_x_val, orig_x_test = dataset_object.get_sonar_data()

x_train, y_train = dataset_object.generate_rotations(orig_x_train)
x_val, y_val = dataset_object.generate_rotations(orig_x_val)
x_test, y_val = dataset_object.generate_rotations(orig_x_test)

[INFO] Retrieving Sonar Turned Table Supervised Data
hdf5 dataset keys: <KeysViewHDF5 ['x_test', 'x_train', 'y_test', 'y_train']>
[INFO] Data dimensions
Train 1505
Test 645


In [31]:
print(x_train.shape)
print(x_test.shape)

(1505, 96, 96, 1)

In [32]:
# Flatten train & test data for SVM
x_train = flatten(x_train)
x_test = flatten(x_test)

print(x_train.shape)
print(x_test.shape)

(1505, 9216)
(645, 9216)


### Pretrainig setup: classification full turnedtable test set (your pipeline)

In [33]:
svm = SVC(C=1.0, decision_function_shape = 'ovo', kernel="linear")
# svm.fit(x_sample, y_sample)
svm.fit(x_train, y_train)

train_acc = svm.score(x_train, y_train)
test_acc = svm.score(x_test, y_test)

print("Full train set accuracy: {:.3f}".format(train_acc))
print("Full test set accuracy: {:.3f}".format(test_acc))
print()

Full train set accuracy: 1.000
Full test set accuracy: 0.974



In [36]:
# value above is the same one as matias one!

## SVM Classifier on Marine Debris Watertank (Dataset 1)

In [8]:
# Watertank Data
class SonarDebrisSelfSupervised(object):
    def __init__(self, data_dir):
        self.data_dir = data_dir

    def _normalize_images(self, images):

        """
        """
        # test = sample.reshape([-1, 1, 63, 63])
        # test = test.transpose([0, 2, 3, 1])
        return [element/255.0 for element in images]

    def get_sonar_data(self):

        """
        Reads from HDF5 file containing sonar data.
        Returns list of np arrays.
        Resized to 96x96 dimensions.
        """
        with h5py.File(self.data_dir, "r") as f:
            # list all groups
            print("hdf5 dataset keys: %s" % f.keys())

            # get the data
            x_train = f["x_train"][...].astype(np.float32)
            x_val = f["x_val"][...].astype(np.float32)
            x_test = f["x_test"][...].astype(np.float32)

            print("[INFO] Original data dimensions")
            print("Train", len(x_train))
            print("Val", len(x_val))
            print("Test", len(x_test))

        return x_train, x_val, x_test

    def generate_rotations(self, images):
        """
        Rotates images and stores labels for each rotation.
        0: 0 degrees
        1: 90 degrees
        2: 180 degrees
        3: 270 degrees
        This is the core component of RotNet.
        """
        images_rotated = []
        labels_rotated = []
        for num_r in [0, 1, 2, 3]:
            for i in range(len(images)):
                r_im = np.rot90(images[i], k=num_r)
                images_rotated.append(r_im)
                labels_rotated.append(num_r)

        # return np.concatenate(images_rotated), np.array(labels_rotated)
        return np.asarray(images_rotated), np.array(labels_rotated)

In [9]:
# Path to file
watertank_file_path = "../../../../../datasets/sonar_debris_dataset_1/marine-debris-watertank-release/marine-debris-watertank-classification-96x96.hdf5"

# Load data
dataset_object = SonarDebrisSelfSupervised(watertank_file_path)
orig_x_train, orig_x_val, orig_x_test = dataset_object.get_sonar_data()

hdf5 dataset keys: <KeysViewHDF5 ['class_names', 'x_test', 'x_train', 'x_val', 'y_test', 'y_train', 'y_val']>
[INFO] Original data dimensions
Train 1838
Val 394
Test 395


In [10]:
x_train, y_train = dataset_object.generate_rotations(orig_x_train)
x_val, y_val = dataset_object.generate_rotations(orig_x_val)
x_test, y_test = dataset_object.generate_rotations(orig_x_test)

In [11]:
print(x_train.shape)
print(x_test.shape)

(7352, 96, 96, 1)
(1580, 96, 96, 1)


In [12]:
# Flatten train & test data for SVM
x_train = flatten(x_train)
x_test = flatten(x_test)

In [13]:
x_train.shape

(7352, 9216)

In [14]:
x_test.shape

(1580, 9216)

In [25]:
print(np.unique(y_train))

[0 1 2 3]


### Pretraining setup: classification full test set (your pipeline)

In [26]:
# Run full test set SVM classification (your pipeline)

# x_sample, y_sample = classSampling(x_train, y_train, spc, NUM_CLASSES)
svm = SVC(C=1.0, decision_function_shape = 'ovo', kernel="linear")
svm.fit(x_train, y_train)


SVC(decision_function_shape='ovo', kernel='linear')

In [27]:
train_acc = svm.score(x_train, y_train)
test_acc = svm.score(x_test, y_test)

print("Full train set accuracy: {:.3f}".format(train_acc))
print("Full test set accuracy: {:.3f}".format(test_acc))
print()

Full train set accuracy: 1.000
Full test set accuracy: 0.764



In [28]:
len(y_test)

1580