In [41]:
# Basic Imports and Data Setup

import numpy as np
import scipy
import scipy.io
from sklearn.utils import shuffle

mnist = scipy.io.loadmat("./hw6_data_dist/letters_data.mat")

mnist_train_data = mnist["train_x"]
#mnist_train_labels = mnist["train_y"] # 1 - 26
mnist_test_data = mnist["test_x"]
mnist_raw_labels = mnist["train_y"]

#Append bias to training data
ones = np.array([[1 for _ in range(len(mnist_train_data))]])
mnist_train_data = np.concatenate((mnist_train_data, ones.T), axis=1)

mnist_train_data, mnist_raw_labels = shuffle(
    mnist_train_data, mnist_raw_labels, random_state=0)

logging = True

def logprint(msg):
    if logging:
        print(msg)

In [40]:
# Data Preprocessing

def labelsToVector(labels):
    result = []
    for label in labels:
        vec = [1 if i+1 == label else 0 for i in range(26)]
        result.append(vec)
    return result

mnist_train_labels = labelsToVector(mnist_raw_labels)

logprint(mnist_raw_labels[100])
logprint(mnist_train_labels[100])

[23]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]


In [57]:
# Main training of the network by SGD

#Note that this works for matrices by element
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

#z is predicted, y is actual
def crossEntropy(z, y):
    entropy = 0
    for j in range(len(z)):
        entropy += y[j] * np.log(z[j]) + (1 - y[j]) * np.log(1 - z[j])
    return -entropy    

def trainNeuralNetwork(images, labels, params=None):
    V = np.random.rand(200, 785)
    W = np.random.rand(26, 201)
    
    iterations = 100
    while iterations > 0:
        #Choose a random index for SGD, we'll use this to update
        index = int(np.random.random() * len(images))
        X_i = images[index] #The data
        y_i = labels[index] #The correct value as a vector
        
        #Compute the forward pass by simple matrix multiplication
        #hidden = V * X_i
        #200 x 1 = 200 x 785 * 785 x 1
        hidden = np.dot(V, X_i)
        hidden_activated = np.tanh(hidden)
        #V 201 x 1
        hidden_activated_bias = np.concatenate((hidden_activated, np.array([1])))
        
        #output = W * hidden_bias
        #26 x 1 = 26 x 201 * 201 x 1
        output = np.dot(W, hidden_activated_bias)
        output_activated = sigmoid(output)
        
        print(output_activated)
        
        iterations -= 1
        
    return V, W

trainNeuralNetwork(mnist_train_data, mnist_train_labels, None)

[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.]
[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.]
[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.]
[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.]
[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.]
[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.]
[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.]
[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.]
[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.]
[ 1.  1.  1.  1.  1.  1.  1.  1.  1. 

(array([[ 0.43184943,  0.0627503 ,  0.17587594, ...,  0.37312768,
          0.70856109,  0.98404611],
        [ 0.01445226,  0.64426432,  0.12394897, ...,  0.74351062,
          0.87741314,  0.78891272],
        [ 0.62406494,  0.45906154,  0.56025499, ...,  0.08789429,
          0.71251083,  0.03695395],
        ..., 
        [ 0.91993997,  0.26172947,  0.29233467, ...,  0.8467851 ,
          0.55672227,  0.94558292],
        [ 0.84053162,  0.84239322,  0.75487135, ...,  0.14469633,
          0.19181361,  0.26925764],
        [ 0.74694823,  0.28751789,  0.25751169, ...,  0.28121277,
          0.20167293,  0.10075588]]),
 array([[ 0.36194314,  0.68696403,  0.61127077, ...,  0.82065883,
          0.60924468,  0.97623904],
        [ 0.62006561,  0.6369432 ,  0.47715808, ...,  0.2981657 ,
          0.17139218,  0.39069018],
        [ 0.72845044,  0.85329736,  0.33797144, ...,  0.41714384,
          0.20582471,  0.50332565],
        ..., 
        [ 0.53350754,  0.5201907 ,  0.30277783, ...,

In [42]:
print(np.tanh(np.array([[1,2,3,4],[5,6,7,8]])))

[[ 0.76159416  0.96402758  0.99505475  0.9993293 ]
 [ 0.9999092   0.99998771  0.99999834  0.99999977]]
