In [2]:
import time
import numpy as np
import numpy as np
from keras.datasets import cifar100

(training_data, training_labels), (testing_data, testing_labels) = (cifar100.load_data("coarse"))

print(training_data.shape)
print(training_labels.shape)
print(testing_data.shape)
print(testing_labels.shape)

validation_data = training_data[49000:, :].astype(np.float)
validation_labels = np.squeeze(training_labels[49000:, :])
training_data = training_data[:49000, :].astype(np.float)
training_labels = np.squeeze(training_labels[:49000, :])
testing_labels = np.squeeze(testing_labels)
testing_data = testing_data.astype(np.float)

# Pre processing data
# Normalize the data by subtract the mean image
meanImage = np.mean(training_data, axis=0)
training_data -= meanImage
validation_data -= meanImage
testing_data -= meanImage

number_training_samples = len(training_data)
number_validation_samples = len(validation_data)
number_testing_samples = len(testing_data)

# Reshape data from channel to rows
training_data = np.reshape(training_data, (number_training_samples, -1))
validation_data = np.reshape(validation_data, (number_validation_samples, -1))
testing_data = np.reshape(testing_data, (number_testing_samples, -1))

# Add bias dimension columns
training_data = np.hstack([training_data, np.ones((number_training_samples, 1))])
validation_data = np.hstack([validation_data, np.ones((number_validation_samples, 1))])
testing_data = np.hstack([testing_data, np.ones((number_testing_samples, 1))])

num_classes = np.max(training_labels) + 1

weights = np.ones((len(training_data[0]), num_classes))

# calculate gradient, use L2 regularisation 
def calculate_gradient(training_data, training_labels, regularisation, weights):
    
    num_training_samples, num_training_features = training_data.shape
    
    gradient = np.zeros(num_training_features, num_classes)
    
    # w^Tx
    linear_output = np.dot(training_data, weights)

    #linear output with labels 
    linear_output_yi = linear_output[np.arange(num_training_samples),training_labels]

    # distance of point from separating hyperplane?
    # np.newaxis here makes it a column vector 
    # calculate distance?
    
    # distances = y * (np.dot(X, w)) - 1
    delta = linear_output - linear_output_yi[:,np.newaxis] + 1
    
    ds = np.zeros(delta.shape)
    
    # makes all the places where delta > 0, 1 else 0
    # With lagrange multiplier considered, if the sample is on the support vector: 𝛼 = 1
    # else: 𝛼 = 0
    ds = np.where(delta > 0, 1, 0)
    
    # calculate the sum of each row 
    sum_of_each_row = np.sum(ds, axis=1)
    
    
    ds[np.arange(num_training_samples), training_labels] = - sum_of_each_row

    gradient = (1/num_training_samples) * np.dot((training_data.T), ds)
    
    # controls the influence of each individual support vector on the objective function. 
    # Greater C decreases the effect of |w|²/2, and results in the narrower margin
    gradient = gradient + (2* regularisation * weights)
    
    return gradient 

# train model using stochastic gradient descent 
def train_model(training_data, training_labels, weights, learning_rate, regularisation, iterations):
    
    # number of examples in each batch
    batch_size = 200
    
    num_training_samples = len(training_data)
    
    weights = weights
    
    for i in range(iterations):

        # create batch
        batch = np.random.choice(num_training_samples, batch_size)

        gradient = calculate_gradient(training_data[batch], training_labels[batch], regularisation, weights)
        
        weights = weights - learning_rate * gradient
    
    return weights

# calculate accuracy of model 
def calculate_accuracy (data, labels, weights):
    
    accuracy = 0
    
    prediction = np.zeros(len(data))
    
    #w^Tx
    linear_output = np.dot(data, weights)
    
    # returns the indices of the maximum values along an axis, ie. in this case will return the 
    # column index corresponding to the greatest index of each row
    prediction = np.argmax(linear_output, axis=1)
    
    # count the number of predictions that are correct 
    total_correct_predictions = (prediction == labels).sum()
    
    num_data_points = len(data)
    
    accuracy = (total_correct_predictions/num_data_points)*100

    return accuracy
    
startTime = time.time()
weights = train_model(training_data, training_labels, weights, 0.00000001, 50000, 15000)
print ('Training time: {0}'.format(time.time() - startTime))
print ('Training acc:   {0}%'.format(calculate_accuracy(training_data, training_labels, weights)))
print ('Validating acc: {0}%'.format(calculate_accuracy(validation_data, validation_labels, weights)))
print ('Testing acc:    {0}%'.format(calculate_accuracy(testing_data, testing_labels, weights)))

(50000, 32, 32, 3)
(50000, 1)
(10000, 32, 32, 3)
(10000, 1)
Training time: 79.86820101737976
Training acc:   23.66734693877551%
Validating acc: 22.900000000000002%
Testing acc:    23.44%
