In [1]:
# Basic Imports and Data Setup

import math
import numpy as np
import scipy
import scipy.io
from sklearn.utils import shuffle

mnist = scipy.io.loadmat("./hw6_data_dist/letters_data.mat")

mnist_train_data = mnist["train_x"]
#mnist_train_labels = mnist["train_y"] # 1 - 26
mnist_test_data = mnist["test_x"]
mnist_raw_labels = mnist["train_y"]

#Append bias to training data
ones = np.array([[1 for _ in range(len(mnist_train_data))]])
mnist_train_data = np.concatenate((mnist_train_data, ones.T), axis=1)

mnist_train_data, mnist_raw_labels = shuffle(
    mnist_train_data, mnist_raw_labels, random_state=0)

logging = True

def logprint(msg):
    if logging:
        print(msg)

In [2]:
# Data Preprocessing

def labelsToVector(labels):
    result = []
    for label in labels:
        vec = [1 if i+1 == label else 0 for i in range(26)]
        result.append(vec)
    return result

mnist_train_labels = labelsToVector(mnist_raw_labels)

logprint(mnist_raw_labels[100])
logprint(mnist_train_labels[100])

[23]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]


In [3]:
# Main training of the network by SGD

#Note that this works for matrices by element
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

#z is predicted, y is actual
def crossEntropy(z, y):
    assert len(z) == len(y)
    entropy = 0
    for j in range(len(z)):
        entropy += y[j] * np.log(z[j]) + (1 - y[j]) * np.log(1 - z[j])
    return -entropy    

def trainNeuralNetwork(images, labels, params=None):
    #V = np.random.rand(200, 785)
    #W = np.random.rand(26, 201)
    
    V = np.random.normal(0, math.sqrt(1/785), (200,785))
    W = np.random.normal(0, math.sqrt(1/201), (26,201))
    
    iterations = 100
    while iterations > 0:
        #Choose a random index for SGD, we'll use this to update
        index = int(np.random.random() * len(images))
        X_i = images[index] #The data
        y_i = labels[index] #The correct value as a vector
        
        #Compute the forward pass by simple matrix multiplication
        #hidden = V * X_i
        #200 x 1 = 200 x 785 * 785 x 1
        hidden = np.dot(V, X_i)
        hidden_activated = np.tanh(hidden)
        #V 201 x 1
        hidden_activated_bias = np.concatenate((hidden_activated, np.array([1])))
        
        #output = W * hidden_bias
        #26 x 1 = 26 x 201 * 201 x 1
        output = np.dot(W, hidden_activated_bias)
        output_activated = sigmoid(output)
        
        entropy = crossEntropy(output_activated, y_i)
        
        #SGD
        
        W_copy = np.array(W)
        
        derivatives_store = []
        
        for outputIndex in range(len(output)):
            left = - y_i[outputIndex] / output_activated[outputIndex] + (1 - y_i[outputIndex]) / (1 - output_activated[outputIndex])
            middle = output_activated[outputIndex] * (1 - output_activated[outputIndex])
            derivatives_store.append(left * middle)
        
        for outputIndex in range(len(output)): #for every unit in the output layer
            for hiddenIndex in range(len(hidden)):
                #left = - y_i[outputIndex] / output_activated[outputIndex] + (1 - y_i[outputIndex]) / (1 - output_activated[outputIndex])
                #middle = output_activated[outputIndex] * (1 - output_activated[outputIndex])
                leftmiddle = derivatives_store[outputIndex]
                right = hidden_activated[hiddenIndex]
                d_e_weight = leftmiddle * right
                W_copy[outputIndex][hiddenIndex] -= learning_rate * d_e_weight
        
        for hiddenIndex in range(len(hidden)):
            for inputIndex in range(len(X_i) - 1):
                sum_outgoing = 0
                for outputIndex in range(26):
                    sum_outgoing += derivatives_store[outputIndex] * W[outputIndex][hiddenIndex]
                middle = hidden_activated[hiddenIndex]
                middle = 1 - middle*middle
                right = X_i[inputIndex]
                d_e_weight = sum_outgoing * middle * right
                W_copy[outputIndex][hiddenIndex] -= learning_rate * d_e_weight
        
        iterations -= 1
        
    return V, W

trainNeuralNetwork(mnist_train_data, mnist_train_labels, None)



(array([[-0.00396354,  0.02697234,  0.02327695, ..., -0.04033353,
          0.04201523,  0.02732812],
        [-0.00880124,  0.00061007, -0.01679176, ...,  0.06638868,
          0.02122301, -0.0001514 ],
        [ 0.04049719, -0.00195545,  0.03198255, ..., -0.10671998,
         -0.03895394,  0.01085768],
        ..., 
        [-0.00659104,  0.02128268,  0.03273203, ..., -0.00262291,
         -0.02487332,  0.04742108],
        [ 0.02953429, -0.02234793,  0.0117783 , ..., -0.00015394,
         -0.00933957,  0.01959741],
        [-0.02433439, -0.08169252,  0.02198847, ...,  0.00478221,
         -0.09231498,  0.01993075]]),
 array([[ 0.03590482, -0.0520396 , -0.06043766, ...,  0.08255051,
          0.01542365,  0.06019421],
        [-0.01370881,  0.07813921,  0.06126528, ...,  0.01460649,
          0.0638195 , -0.02265309],
        [ 0.00382901,  0.06679599, -0.10522025, ..., -0.0382656 ,
          0.1488184 ,  0.04465715],
        ..., 
        [ 0.00642352,  0.02028853,  0.01481156, ...,

In [5]:
print(np.tanh(np.array([[1,2,3,4],[5,6,7,8]])))

[[ 0.76159416  0.96402758  0.99505475  0.9993293 ]
 [ 0.9999092   0.99998771  0.99999834  0.99999977]]


In [None]:
def predictNeuralNetwork(images, V, W):

def calculateVal:    