In [39]:
import numpy as np
import json
import math
train = np.load('feats.npy')
train_labels = np.load('labels.npy')
np.random.seed(42)
theta = np.random.rand(15, 10000)

In [40]:
def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

def inference(theta, feats):
    """make a prediction given a vector of features"""
    eY = softmax(np.matmul(theta, feats))
    return eY
    

In [170]:
def updatedPredictionMatrix(inference, label, feats):
    """creates matrix for updating theta based off of gradient of softmax + cross/entropy"""
    inference[int(label)-1] = 1 - inference[int(label)-1]
    return -5 * np.matmul(inference.reshape(len(inference), 1), np.column_stack(feats))

def printLoss(inference, label):
    """print cross entropy loss, assuming actual y is one hot encoded"""
    loss = -math.log(inference[int(label)-1])
    print('Loss:', loss)
    
def loss(inference, label):
    """return cross entropy loss, assuming actual y is one hot encoded"""
    try:
        ans = -math.log(inference[int(label)-1])
        return ans
    except ValueError:
        return 0

def evalLoss(theta, train, label):
    avgloss = 0
    for x in range(len(train)):
        los = loss(softmax(inference(theta, train[x])), label[x])
#         pred = softmax(inference(theta, train[x]))
        avgloss += los
#         if (los > 3):
#             print(los)
#             qualitative(x, theta, train, label)
    return avgloss / len(train)

def evalAccuracy(theta, train, label):
    correct = 0
    for x in range(len(train)):
        pred = np.argmax(softmax(inference(theta, train[x])))
        if (pred == int(label[x])):
            correct += 1
    return round(correct / len(train), 4)

In [42]:
def full_gradient_descent(theta, labels, train):
    N = len(train)
    updateM = np.zeros((15, 10000))
    print("Beginning ", end = '')
    y = np.linalg.norm(theta)
    print(evalTrain(theta, train, labels))
    for x in range(len(train)):
        expY = inference(theta, train[x])
        update = updatedPredictionMatrix(expY, labels[x], train[x])
        if ( x % 1000 == 0):
            print('Step', str(x)+'/'+str(N)+'  ')
        update = np.add(update, updateM)
    theta = np.add(theta, update)
    print("End ", end='')
    print(evalTrain(theta, train, labels))
    return theta

In [43]:
# for x in range(10):
#     print("epoch", x)
#     theta = full_gradient_descent(theta, train_labels, train)

In [46]:

def stochastic_descent(theta, labels, train):
    N = len(train)
    updateM = np.zeros((15, 10000))
    print("Beginning ", end = '')
    y = np.linalg.norm(theta)
    print(evalTrain(theta, train, labels))
    for x in range(100):
        randSample = np.random.randint(0, train.shape[0])
        expY = inference(theta, train[randSample]) #correct?
        update = updatedPredictionMatrix(expY, labels[x], train[randSample])
#             print('Step', str(x)+'/'+str(N)+'  ')
        update = np.add(update, updateM)
    theta = np.add(theta, update)
    print("End ", end='')
    print(evalTrain(theta, train, labels))
    return theta

        

In [47]:
for x in range(10):
    print("epoch", x)
    theta = stochastic_descent(theta, train_labels, train)

epoch 0
Beginning 4.635560278237862
End 4.669114594527819
epoch 1
Beginning 4.669114594527819
End 7.474797289016692
epoch 2
Beginning 7.474797289016692
End 8.463335967671588
epoch 3
Beginning 8.463335967671588
End 8.995736346706616
epoch 4
Beginning 8.995736346706616
End 9.358861022636177
epoch 5
Beginning 9.358861022636177
End 9.940454468685063
epoch 6
Beginning 9.940454468685063
End 10.447333573063773
epoch 7
Beginning 10.447333573063773
End 11.145020996438928
epoch 8
Beginning 11.145020996438928
End 11.781051237395792
epoch 9
Beginning 11.781051237395792
End 11.697709538105803


In [144]:
def qualitative(y, theta, train, labels):
    pred = softmax(inference(theta, train[y]))
    for i, x in enumerate(pred):
        print(i+1, x)
    print('prediction:', np.argmax(pred)+1)
    print('actual:', int(labels[y]))
    print('loss:', loss(pred, labels[y]))

In [171]:
evalAccuracy(theta, train, train_labels)

0.05

working


In [72]:
int(data[0].split(' ')[-1][1:])

13