In [3]:
import numpy as np
import matplotlib.pyplot as plt

# Given training and testing data, learning rate epsilon, batch size, and regularization strength alpha,
# conduct stochastic gradient descent (SGD) to optimize the weight matrix Wtilde (785x10).
# Then return Wtilde.
def softmaxRegression (trainingImages, trainingLabels, testingImages, testingLabels, epsilon, batchSize, alpha):
    n = np.shape(trainingImages)[1]
    m_features = np.shape(trainingImages)[0] #785
    l_labels = np.shape(trainingLabels)[1] #10
    W = .00001 * np.random.randn(m_features, l_labels)
    shuffle = np.random.permutation(n)
    m_shuffled = trainingImages[:, shuffle]
    labels_shuffled = trainingLabels[:, shuffle]
    num_epochs = 5
    for e in range(num_epochs):
        for i in range((n/batchSize) - 1):
            ntilde = m_shuffled[: , i*batchSize:(i*batchSize) + batchSize]
            W = W - epsilon * gradfMSE(W, ntilde, labels_shuffled, alpha)
    return W


In [47]:
def accuracy (wtilde, Xtilde, y):
    X = np.transpose(Xtilde)
    yhat = np.dot(X, wtilde)
    print (yhat)
    predictions = yhat.argmax(axis=1)
    actual = y.argmax(axis=1)
    return np.mean(predictions == actual)

In [None]:
# Given a vector of weights w, a design matrix Xtilde, and a vector of labels y, and a regularization strength
# alpha (default value of 0), return the gradient of the (regularized) MSE loss.
def gradfMSE (wtilde, Xtilde, y, alpha = 0.):
    X = np.transpose(Xtilde)
    shape = np.shape(X)
    n = shape[0]
    wtilde_remove_bias = wtilde.copy()
    wtilde_remove_bias[-1] = 0
    Z = np.exp(np.dot(X, wtilde))
    yhat = (Z / np.sum(Z, axis = 1))
    return (1/n) * np.dot(Xtilde, (yhat - y)) + (alpha/n) * wtilde_remove_bias

In [33]:
def reshapeAndAppend1s (faces):
    shape = np.shape(faces)
    N = shape[0]
    #print (N) #60000
    M = shape[1]
    #print (M) #784
    transposed = np.array(np.transpose(faces))
    reshaped = np.array(transposed.reshape(M, N))
    new_row = np.ones(N)
    xtilde = np.array(np.vstack([reshaped, new_row]))
    return xtilde

In [34]:
def reshapeLabelVectors (labels):
    shape = np.shape(labels)
    N = shape[0]
    oneHotMatrix = np.zeros(shape= (N,10))
    oneHotMatrix[np.arange(N), labels] = 1
    return oneHotMatrix


In [39]:
if __name__ == "__main__":
    # Load data
    trainingImages = np.load("fashion_mnist_train_images.npy") / 255.0  # Normalizing by 255 helps accelerate training
    trainingLabels = np.load("fashion_mnist_train_labels.npy")
    testingImages = np.load("fashion_mnist_test_images.npy") / 255.0  # Normalizing by 255 helps accelerate training
    testingLabels = np.load("fashion_mnist_test_labels.npy")

    # Append a constant 1 term to each example to correspond to the bias terms
    # ...
    trainImages = reshapeAndAppend1s(trainingImages)
    print (np.shape(trainImages))
    testImages = reshapeAndAppend1s(testingImages)

    # Change from 0-9 labels to "one-hot" binary vector labels. For instance,
    # if the label of some example is 3, then its y should be [ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 ]
    # ...
    trainLabels = reshapeLabelVectors(trainingLabels)
    testLabels = reshapeLabelVectors(testingLabels)
    print (np.shape(trainLabels))
    # Train the model
    Wtilde = softmaxRegression(trainingImages, trainingLabels, testingImages, testingLabels, epsilon=0.1, batchSize=100, alpha=.1)

    # Visualize the vectors
    # ...

(785, 60000)
(60000, 10)


In [57]:
wtilde = np.array([[1, 1, 1, 1],
                  [1, .1, 1, .2],
                  [1, 1, .1, 1]])
xtilde = np.array([[.8, .6, .7, .4, .3],
                   [.4, .2, .3, .4, .3],
                  [.4, .3, .3,.4, .3]])
y = np.array([[.2, .3, .4, .5],
            [.9, .2, .1, .1],
            [.4, .4, .6, .2],
             [.2, .4, .1, .1],
            [.5, .4, .3, .1]])

print (accuracy(wtilde,xtilde,y))

[[1.6  1.24 1.24 1.28]
 [1.1  0.92 0.83 0.94]
 [1.3  1.03 1.03 1.06]
 [1.2  0.84 0.84 0.88]
 [0.9  0.63 0.63 0.66]]
0.4
