In [8]:
import numpy as np
import json
import math
train = np.load('feats.npy')
train_labels = np.load('labels.npy')
np.random.seed(42)
theta = np.random.rand(15, 10000)

In [9]:
def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

def inference(theta, feats):
    """make a prediction given a vector of features"""
    eY = softmax(np.matmul(theta, feats))
    return eY
    

In [35]:
def updatedPredictionMatrix(inference, label, feats):
    """creates matrix for updating theta based off of gradient of softmax + cross/entropy"""
    inference[int(label)-1] = 1 - inference[int(label)-1]
    return -5 * np.matmul(inference.reshape(len(inference), 1), np.column_stack(feats))

def printLoss(inference, label):
    """print cross entropy loss, assuming actual y is one hot encoded"""
    loss = -math.log(inference[int(label)-1])
    print('Loss:', loss)
    
def loss(inference, label):
    """return cross entropy loss, assuming actual y is one hot encoded"""
    try:
        ans = -math.log(inference[int(label)-1])
        return ans
    except ValueError:
        return 0

def evalTrain(theta, train, label):
    avgloss = 0
    for x in range(len(train)):
        avgloss += loss(inference(theta, train[x]), label[x])
    return avgloss / len(train)

In [36]:
def full_gradient_descent(theta, labels, train):
    N = len(train)
    updateM = np.zeros((15, 10000))
    print("Beginning ", end = '')
    y = np.linalg.norm(theta)
    print(evalTrain(theta, train, labels))
    for x in range(len(train)):
        expY = inference(theta, train[x])
        update = updatedPredictionMatrix(expY, labels[x], train[x])
        if ( x % 1000 == 0):
            print('Step', str(x)+'/'+str(N)+'  ')
        update = np.add(update, updateM)
    theta = np.add(theta, update)
    print("End ", end='')
    print(evalTrain(theta, train, labels))
    return theta

In [37]:
# for x in range(10):
#     print("epoch", x)
#     theta = full_gradient_descent(theta, train_labels, train)

epoch 0
Beginning 24.210959302374835
Step 0/3000  


KeyboardInterrupt: 

In [23]:

def stochastic_descent(theta, labels, train):
    N = len(train)
    updateM = np.zeros((15, 10000))
    print("Beginning ", end = '')
    y = np.linalg.norm(theta)
    avgloss = evalTrain(inference(theta, train[0]), labels[0])
    for x in range(100):
        randSample = np.random.randint(0, train.shape[0])
        expY = inference(theta, train[randSample]) #correct?
        update = updatedPredictionMatrix(expY, labels[x], train[randSample])
#             print('Step', str(x)+'/'+str(N)+'  ')
        update = np.add(update, updateM)
    theta = np.add(theta, update)
    print("End ", end='')
    printLoss(inference(theta, train[0]), labels[0])
    return theta

        

In [38]:
for x in range(10):
    print("epoch", x)
    theta = stochastic_descent(theta, train_labels, train)

epoch 0
Beginning Loss: 17.54089999074002
End Loss: 16.082767693121554
epoch 1
Beginning Loss: 16.082767693121554
End Loss: 16.083470395547508
epoch 2
Beginning Loss: 16.083470395547508
End Loss: 14.878226239135385
epoch 3
Beginning Loss: 14.878226239135385
End Loss: 15.901601483885443
epoch 4
Beginning Loss: 15.901601483885443
End Loss: 15.257673145919565
epoch 5
Beginning Loss: 15.257673145919565
End Loss: 15.254230598505849
epoch 6
Beginning Loss: 15.254230598505849
End Loss: 15.44666530531986
epoch 7
Beginning Loss: 15.44666530531986
End Loss: 15.381743096354787
epoch 8
Beginning Loss: 15.381743096354787
End Loss: 15.385948248834618
epoch 9
Beginning Loss: 15.385948248834618
End Loss: 15.38634628816825


(3000, 10000)