In [None]:
import numpy as np
import scipy.io as sio
import os
import random
from sklearn import svm
from sklearn.model_selection import train_test_split

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C

In [2]:
#author: karanchauhan
"""Generates a confusionMatrix
Ideally number of distinct classes should be 6. 5 given class labels plus one class for the unknown
Expects labels between 1-6. Preprocessing needs to be done to provide just matrices of class labels"""
def ComputeConfusionMatrix(actualLabels, predictedLabels):
    unique_number_of_class_labels = sorted(set(actualLabels))
    matrix = np.zeros([len(unique_number_of_class_labels), len(unique_number_of_class_labels)], dtype=int)
    for i in range(len(actualLabels)):
        matrix[actualLabels[i]][predictedLabels[i]] += 1
    confusion_matrix = matrix / matrix.sum(axis=1)[:, None]
    return confusion_matrix

In [5]:
# author: karanchauhan
#TODO Need to add hyperparameter tuning
#TODO Improve performance
def GPR(x_estimate, x_validate, class_labels):
    # Instanciate a Gaussian Process model
    print("started gpr")
    print("size of x_estimate: " + str(len(x_estimate)))
    kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
    gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=1)
    print("size of x_estimate: " + str(len(x_estimate)))

    #TODO n_restarts_optimizer should be higher

    # Fit to data using Maximum Likelihood Estimation of the parameters
    gp.fit(x_estimate, class_labels)

    # Make the prediction and assing values to class labels
    y_validate, sigma = gp.predict(x_validate, return_std=True)
    return y_validate

In [6]:

#TODO: Test
# author: karanchauhan
def CreateLabelsFromTestOutput(target_output, threshold):
    classLabels = []
    for i in target_output:
        max_test_output = max(target_output[i])
        max_test_output_index = target_output[i].index(max_test_output)
        if(max_test_output_index>threshold):
            classLabels[i] = max_test_output_index + 1
        else: # Assign class label 6 for the unknown unclassifiable date
            classLabels[i] = 6
    return classLabels

In [7]:

# author: karanchauhan
def MyCrossValidate(XTrain, Nf, YTrain):

    validation_set_size = len(XTrain)//Nf

    print(len(XTrain) - validation_set_size)

    # validation_sets is the Nf number of validation set (each of whose size is total data size/ number of folds
    validation_sets = [[[0 for _ in range(Nf)] for _ in range(validation_set_size)] for _ in range(len(XTrain[0]))]

    # actual_validation_output is the actual training output corresponding to the validation set
    actual_validation_output = [[[0 for _ in range(Nf)] for _ in range(validation_set_size)] for _ in range(len(XTrain[0]))]

    # threshold set for classifying a test data sample to class 6 if any of models give all class values<threshold
    threshold = 0.7

    # estimation_sets is the Nf number of estimation sets (each of which contains total data - size of validation set)
    estimation_sets = [[[0 for _ in range(Nf)] for _ in range(len(XTrain)-validation_set_size)] for _ in range(len(XTrain[0]))]

    # y_train_sets is the Nf number of actual training output corresponding to each estimation set
    y_train_sets = [[[0 for _ in range(Nf)] for _ in range(len(XTrain)-validation_set_size)] for _ in range(len(YTrain[0]))]

    # Shuffle the data set and label set for random partition
    c = list(zip(XTrain, YTrain))
    random.shuffle(c)
    XTrain, YTrain = zip(*c)

    # Partition and store in partionedLabelSet
    x = 0
    for i in range(Nf):

        validation_sets[i] = XTrain[x:x+validation_set_size]
        actual_validation_output[i] = YTrain[x:x + validation_set_size]

        indices_to_ignore = range(x, x+validation_set_size)
        estimation_sets[i] = [estimation_set for index, estimation_set in enumerate(XTrain) if index not in (indices_to_ignore)]

        y_train_sets[i] = [i for j, i in enumerate(YTrain) if j not in indices_to_ignore]

        x += validation_set_size

    # Run for all validation and estimation sets
    for i in range(Nf):
        #TODO Call GPR, SVM, RVM
        y_validate = GPR(estimation_sets[i],validation_sets[i], y_train_sets[i])

        # actual_class_labels are the class labels (1-6) defined for the actual_validation_output
        actual_class_labels = CreateLabelsFromTestOutput(actual_validation_output[i])

        # predicted_class_labels are the class labels (1-6) defined for the predicted validation output
        predicted_class_labels = CreateLabelsFromTestOutput(y_validate)

        #Compute confusion matrix (actual_class_labels, predicted_class_labels)
        confusion_matrix = ComputeConfusionMatrix(actual_class_labels, predicted_class_labels)

        #TODO Compute average confusion matrix


In [8]:

# author: Jianing
# def SVMTraining(XEstimate,XValidate,Parameters):
#     clf = svm.SVC(decision_function_shape='ovo')
#     print clf.fit(XEstimate, XValidate)
#     Yvalidate=clf.predict(Parameters)
#     EstParameters=clf.get_params()
#     return {"Yvalidate": Yvalidate,
#             "EstParameters": EstParameters}

# Path of training set and labels

In [9]:
# Load image data
loadFeatVecsMatData = sio.loadmat(os.getcwd() + '/Proj2FeatVecsSet1.mat')
loadLabelsMatData = sio.loadmat(os.getcwd() + '/Proj2TargetOutputsSet1.mat')

trainingSet = loadFeatVecsMatData['Proj2FeatVecsSet1']
labelSet = loadLabelsMatData['Proj2TargetOutputsSet1']

In [None]:
# Running GPR for just 5 samples
print(GPR(trainingSet[0:5], trainingSet[6:11], labelSet[0:5]))
print(labelSet[6:11])

MyCrossValidate(trainingSet, 3, labelSet)

started gpr
size of x_estimate: 5
size of x_estimate: 5
[[ 1.00000359 -1.00000359 -1.00000359 -1.00000359 -1.00000359]
 [ 0.99999939 -0.99999939 -0.99999939 -0.99999939 -0.99999939]
 [ 1.00000015 -1.00000015 -1.00000015 -1.00000015 -1.00000015]
 [ 0.99998071 -0.99998071 -0.99998071 -0.99998071 -0.99998071]
 [ 1.00000188 -1.00000188 -1.00000188 -1.00000188 -1.00000188]]
[[ 1 -1 -1 -1 -1]
 [ 1 -1 -1 -1 -1]
 [ 1 -1 -1 -1 -1]
 [ 1 -1 -1 -1 -1]
 [ 1 -1 -1 -1 -1]]
16667
started gpr
size of x_estimate: 16667
size of x_estimate: 16667
