In [1]:
import time
import numpy as np

# Load the dataset
from keras.datasets import cifar100

(training_data, training_labels), (testing_data, testing_labels) = (cifar100.load_data("coarse"))

validation_data = training_data[49000:, :].astype(np.float)
validation_labels = np.squeeze(training_labels[49000:, :])
training_data = training_data[:49000, :].astype(np.float)
training_labels = np.squeeze(training_labels[:49000, :])
testing_labels = np.squeeze(testing_labels)
testing_data = testing_data.astype(np.float)

# Show dimension for each variable
print ('Train image shape:    {0}'.format(training_data.shape))
print ('Train label shape:    {0}'.format(training_labels.shape))
print ('Validate image shape: {0}'.format(validation_data.shape))
print ('Validate label shape: {0}'.format(validation_labels.shape))
print ('Test image shape:     {0}'.format(testing_data.shape))
print ('Test label shape:     {0}'.format(testing_labels.shape))

# Pre processing data
# Normalize the data by subtract the mean image
meanImage = np.mean(training_data, axis=0)
training_data -= meanImage
validation_data -= meanImage
testing_data -= meanImage

number_training_samples = len(training_data)
number_validation_samples = len(validation_data)
number_testing_samples = len(testing_data)

# Reshape data from channel to rows
training_data = np.reshape(training_data, (number_training_samples, -1))
validation_data = np.reshape(validation_data, (number_validation_samples, -1))
testing_data = np.reshape(testing_data, (number_testing_samples, -1))

# Add bias dimension columns
training_data = np.hstack([training_data, np.ones((number_training_samples, 1))])
validation_data = np.hstack([validation_data, np.ones((number_validation_samples, 1))])
testing_data = np.hstack([testing_data, np.ones((number_testing_samples, 1))])


import numpy as np

num_classes = np.max(training_labels) + 1

sigma =0.01

weights = sigma * np.random.randn(training_data.shape[1], num_classes)

# calculate gradient, use L2 regularisation 
def calculate_gradient(training_data, training_labels, reg, weights):
    
    num_training_samples, num_training_features = training_data.shape
    
    gradient = np.zeros(num_training_features, num_classes)

    b = 0
    
    # w^Tx
    linear_output = training_data.dot(weights)


    #linear output with labels 
    
    linear_output_yi = linear_output[np.arange(num_training_samples),training_labels]
    

    # distance of point from separating hyperplane?
    # np.newaxis here makes it a column vector 
    # calculate distance?
    delta = linear_output - linear_output_yi[:,np.newaxis]+1
    
    ds = np.zeros(delta.shape)
    
    # makes all the places where delta > 0, 1 else 0
    ds = np.where(delta > 0, 1, 0)
    
    ds[np.arange(num_training_samples),training_labels] = -np.sum(ds, axis=1)

    gradient = (1/num_training_samples) * (training_data.T).dot(ds)
    
    gradient = gradient + (2* reg * weights)
    
    return gradient 

# train model using stochastic gradient descent 
def train_model(training_data, training_labels, weights, learning_rate, reg, iterations):
    
    # number of examples in each batch
    batch_size = 200
    
    num_training_samples = len(training_data)
    
    weights = weights
    
    for i in range(iterations):

        # create batch
        batch = np.random.choice(num_training_samples, batch_size)
        
        batch_data = training_data[batch]
        
        batch_labels = training_labels[batch]

        gradient = calculate_gradient(batch_data, batch_labels, reg, weights)
        
        weights = weights - learning_rate * gradient
    
    return weights

def calculate_accuracy (data, labels, weights):
    accuracy = 0
    
    prediction = np.zeros(len(data))
    
    linear_output = data.dot(weights)
    
    prediction = np.argmax(linear_output, axis=1)
    
    total_correct_predictions = np.sum(labels == prediction)
    
    num_data_points = len(data)
    
    accuracy = (total_correct_predictions/num_data_points)*100

    return accuracy
    
startTime = time.time()
weights = train_model(training_data, training_labels, weights, 0.00000001, 5000, 15000)
print ('Training time: {0}'.format(time.time() - startTime))
print ('Training acc:   {0}%'.format(calculate_accuracy(training_data, training_labels, weights)))
print ('Validating acc: {0}%'.format(calculate_accuracy(validation_data, validation_labels, weights)))
print ('Testing acc:    {0}%'.format(calculate_accuracy(testing_data, testing_labels, weights)))

Train image shape:    (49000, 32, 32, 3)
Train label shape:    (49000,)
Validate image shape: (1000, 32, 32, 3)
Validate label shape: (1000,)
Test image shape:     (10000, 32, 32, 3)
Test label shape:     (10000,)
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
400

4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4000
200
4

KeyboardInterrupt: 

In [None]:
# 0.00000001, 50000, 15000
# 23.48%
# 0.00000001, 50000, 30000
# 22.98%

# 0.00000001, 5000, 15000
# 14.66%