Vectorized logistic regression program in Python with momentum

In [2]:
import numpy as np
# import os

def import_from_csv(path, pixel_depth, length, class_num):
    train_database = np.genfromtxt('{}'.format(path), delimiter=",", dtype=int)[1:,:]
    train_labels = train_database[:,0].reshape(length,)
    training_labels = np.eye(class_num)[train_labels]
    training_data = np.delete(train_database,0,1)
    return normalize(training_data, pixel_depth), training_labels, train_labels

def normalize(image_data, pixel_depth):
    data = (image_data - pixel_depth / 2) / pixel_depth
    return data.astype(np.float32)

#training_data = np.random.rand(20000, 200)
#correct_index = np.random.choice(10, np.shape(training_data)[0])
#training_labels = np.eye(10)[correct_index]

#training_data = np.random.rand(10000, 784)
#training_weights = np.random.rand(10, 784)
#X = np.dot(training_data, training_weights.T)
#training_labels = X / np.sum(X, axis=1).reshape(10000,1)

training_data, training_labels, correct_index = import_from_csv('/Users/JAustin/Desktop/MNIST/train.csv', 255, 42000, 10)

data_size = np.shape(training_data)[0] # 20000
num_params = np.shape(training_data)[1] # 784
num_classes = np.shape(training_labels)[1] # 10

In [31]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=1)[:,None]

def predict(data, batch_size, weights, biases):
    index = np.random.choice(len(data), batch_size, replace=False)
    output = np.dot(data[index], weights.T) + biases.T #np.dot(weights[:,np.newaxis], np.transpose(data[index])) + biases
    return index, softmax(output)

def evaluate(data, labels, weights, biases, batch_size):
    index = np.random.choice(len(data), batch_size, replace=False)
    accuracy = (np.argmax(softmax(np.dot(data[index], weights.T) + biases.T), axis=1)==np.argmax(labels[index], axis=1)).sum() * 100 / batch_size
    print("The accuracy of your model is %s%%!" % accuracy)
    return accuracy, index

def gradient_descent(data, labels, weights, biases, batch_size, iterations, learning_rate, momentum_rate):
    weight_momentum = np.zeros_like(weights)
    bias_momentum = np.zeros_like(biases)
    gamma = momentum_rate
    for i in range(iterations):
        index, prediction = predict(data, batch_size, weights, biases)
        loss = - np.tensordot(labels[index], np.log(prediction), axes=2) / batch_size # + .1*np.linalg.norm(weights)**2 # cross entropy loss
        if i % 50 == 0: print("Loss at step %s is %s" % (i, loss))
        error_arr = prediction - labels[index]
        dW = np.sum(error_arr[..., None] * data[index][:, None, :], axis=0) / batch_size
        dB = np.sum(error_arr, axis=0)[:, None] / batch_size
        weight_momentum = gamma*weight_momentum + learning_rate[i]*dW
        bias_momentum = gamma*bias_momentum + learning_rate[i]*dB
        weights -= weight_momentum
        biases -= bias_momentum

    return weights, biases

In [32]:
weights = np.random.rand(num_classes, num_params) # or randn
biases = np.random.rand(num_classes, 1)

batch_size = 64
iterations = 4000
momentum_rate = .9

learning_rate = np.array([.5*(1- x/iterations) for x in range(iterations)]) # linear learning rate
# learning_rate = 1*np.exp(-5*np.arange(0,iterations)/iterations) # exponential learning rate # best results with .2
# learning_rate = .01 * np.ones(iterations) # constant learning rate

In [33]:
new_weights, new_biases = gradient_descent(training_data, training_labels, weights, biases, batch_size, iterations, learning_rate, momentum_rate)
accuracy, index = evaluate(training_data, training_labels, new_weights, new_biases, batch_size) # probably should be larger than batch_size

Loss at step 0 is 8.06005566862
Loss at step 50 is 5.92359180838
Loss at step 100 is 5.38969714084
Loss at step 150 is 1.22678561019
Loss at step 200 is 2.43627242839
Loss at step 250 is 1.44387168511
Loss at step 300 is 2.10201195136
Loss at step 350 is 1.54929052547
Loss at step 400 is 3.55840642009
Loss at step 450 is 1.10801998021
Loss at step 500 is 1.79164786756
Loss at step 550 is 0.868835384908
Loss at step 600 is 1.046410642
Loss at step 650 is 0.911849297774
Loss at step 700 is 1.25705907134
Loss at step 750 is 1.29870191831
Loss at step 800 is 1.32913365288
Loss at step 850 is 2.15079963299
Loss at step 900 is 0.906734303932
Loss at step 950 is 1.66850354744
Loss at step 1000 is 1.07047299414
Loss at step 1050 is 1.95134229418
Loss at step 1100 is 1.02094186882
Loss at step 1150 is 0.271660882765
Loss at step 1200 is 0.723631463258
Loss at step 1250 is 1.40132588618
Loss at step 1300 is 1.78147129683
Loss at step 1350 is 0.820488329445
Loss at step 1400 is 0.677340150599
Los

In [5]:
test_labels = np.eye(10)[np.genfromtxt('{}'.format('/Users/JAustin/Desktop/MNIST/results_net4.csv'), delimiter=",", dtype=int)[1:,1]]
test_data = normalize(np.genfromtxt('{}'.format('/Users/JAustin/Desktop/MNIST/test.csv'), delimiter=",", dtype=int)[1:,:], 255)

In [34]:
test_accuracy, test_index = evaluate(test_data, test_labels, new_weights, new_biases, len(test_data)) # probably should be larger than batch_size

The accuracy of your model is 92.2285714286%!


In [8]:
solution_arr = np.zeros((len(test_data)+1,2))
solution_arr[1:,0] = np.linspace(1,len(test_data),len(test_data))
solution_arr[1:,1] = np.argmax(softmax(np.dot(test_data, new_weights.T) + new_biases.T), axis=1)
np.savetxt("results_self.csv", solution_arr, fmt='%i', delimiter=",")

Same logistic model with Nesterov momentum

In [2]:
import numpy as np
# import os

def import_from_csv(path, pixel_depth, length, class_num):
    train_database = np.genfromtxt('{}'.format(path), delimiter=",", dtype=int)[1:,:]
    train_labels = train_database[:,0].reshape(length,)
    training_labels = np.eye(class_num)[train_labels]
    training_data = np.delete(train_database,0,1)
    return normalize(training_data, pixel_depth), training_labels, train_labels

def normalize(image_data, pixel_depth):
    data = (image_data - pixel_depth / 2) / pixel_depth
    return data.astype(np.float32)

#training_data = np.random.rand(20000, 200)
#correct_index = np.random.choice(10, np.shape(training_data)[0])
#training_labels = np.eye(10)[correct_index]

#training_data = np.random.rand(10000, 784)
#training_weights = np.random.rand(10, 784)
#X = np.dot(training_data, training_weights.T)
#training_labels = X / np.sum(X, axis=1).reshape(10000,1)

training_data, training_labels, correct_index = import_from_csv('/Users/JAustin/Desktop/MNIST/train.csv', 255, 42000, 10)

data_size = np.shape(training_data)[0] # 20000
num_params = np.shape(training_data)[1] # 784
num_classes = np.shape(training_labels)[1] # 10

In [43]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=1)[:,None]

def predict(data, batch_size, weights, biases):
    index = np.random.choice(len(data), batch_size, replace=False)
    output = np.dot(data[index], weights.T) + biases.T #np.dot(weights[:,np.newaxis], np.transpose(data[index])) + biases
    return index, softmax(output)

def evaluate(data, labels, weights, biases, batch_size):
    index = np.random.choice(len(data), batch_size, replace=False)
    accuracy = (np.argmax(softmax(np.dot(data[index], weights.T) + biases.T), axis=1)==np.argmax(labels[index], axis=1)).sum() * 100 / batch_size
    print("The accuracy of your model is %s%%!" % accuracy)
    return accuracy, index

def gradient_descent(data, labels, weights, biases, batch_size, iterations, learning_rate, momentum_rate):
    weight_momentum = np.zeros_like(weights)
    bias_momentum = np.zeros_like(biases)
    gamma = momentum_rate
    for i in range(iterations):
        temp_weights = weights - gamma*weight_momentum
        temp_biases = biases - gamma*bias_momentum
        index, prediction = predict(data, batch_size, temp_weights, temp_biases)
        loss = - np.tensordot(labels[index], np.log(prediction), axes=2) / batch_size # + .1*np.linalg.norm(weights)**2 # cross entropy loss
        if i % 50 == 0: print("Loss at step %s is %s" % (i, loss))
        if i % 1000 ==0: evaluate(test_data, test_labels, weights, biases, len(test_data))
        error_arr = prediction - labels[index]
        dW = np.sum(error_arr[..., None] * data[index][:, None, :], axis=0) / batch_size
        dB = np.sum(error_arr, axis=0)[:, None] / batch_size
        weight_momentum = gamma*weight_momentum + learning_rate[i]*dW
        bias_momentum = gamma*bias_momentum + learning_rate[i]*dB
        weights -= weight_momentum
        biases -= bias_momentum

    return weights, biases

In [56]:
weights = np.random.rand(num_classes, num_params) # or randn
biases = np.random.rand(num_classes, 1)

batch_size = 64
iterations = 8000
momentum_rate = .9

# learning_rate = np.array([1*(1- x/iterations) for x in range(iterations)]) # linear learning rate
learning_rate = 1*np.exp(-5*np.arange(0,iterations)/iterations) # exponential learning rate # best results with .2
# learning_rate = .01 * np.ones(iterations) # constant learning rate

In [57]:
new_weights, new_biases = gradient_descent(training_data, training_labels, weights, biases, batch_size, iterations, learning_rate, momentum_rate)
accuracy, index = evaluate(training_data, training_labels, new_weights, new_biases, batch_size) # probably should be larger than batch_size

Loss at step 0 is 5.25475619541
The accuracy of your model is 7.57857142857%!
Loss at step 50 is 7.27959367886
Loss at step 100 is 3.0028517884
Loss at step 150 is 2.59428100015
Loss at step 200 is 2.96461203722
Loss at step 250 is 2.60001133761
Loss at step 300 is 1.03521271022
Loss at step 350 is 2.46027328323
Loss at step 400 is 1.3428843158
Loss at step 450 is 1.61336694174
Loss at step 500 is 2.43901149363
Loss at step 550 is 1.29455865273
Loss at step 600 is 1.51971525898
Loss at step 650 is 0.509834151595
Loss at step 700 is 2.33491309668
Loss at step 750 is 2.04823223344
Loss at step 800 is 2.33112023258
Loss at step 850 is 0.364659446915
Loss at step 900 is 1.17967382192
Loss at step 950 is 1.07609615988
Loss at step 1000 is 0.158047246613
The accuracy of your model is 90.3642857143%!
Loss at step 1050 is 0.171318693359
Loss at step 1100 is 2.6864301851
Loss at step 1150 is 0.886333719239
Loss at step 1200 is 0.486163010456
Loss at step 1250 is 0.895487085114
Loss at step 1300

In [5]:
test_labels = np.eye(10)[np.genfromtxt('{}'.format('/Users/JAustin/Desktop/MNIST/results_net4.csv'), delimiter=",", dtype=int)[1:,1]]
test_data = normalize(np.genfromtxt('{}'.format('/Users/JAustin/Desktop/MNIST/test.csv'), delimiter=",", dtype=int)[1:,:], 255)

In [61]:
test_accuracy, test_index = evaluate(test_data, test_labels, new_weights, new_biases, len(test_data)) # probably should be larger than batch_size

The accuracy of your model is 91.9428571429%!


In [8]:
solution_arr = np.zeros((len(test_data)+1,2))
solution_arr[1:,0] = np.linspace(1,len(test_data),len(test_data))
solution_arr[1:,1] = np.argmax(softmax(np.dot(test_data, new_weights.T) + new_biases.T), axis=1)
np.savetxt("results_self.csv", solution_arr, fmt='%i', delimiter=",")