In [59]:
# Basic Imports and Data Setup

import math
import numpy as np
import scipy
import scipy.io
from sklearn.utils import shuffle

mnist = scipy.io.loadmat("./hw6_data_dist/letters_data.mat")

mnist_train_data = mnist["train_x"]
#mnist_train_labels = mnist["train_y"] # 1 - 26
mnist_test_data = mnist["test_x"]
mnist_raw_labels = mnist["train_y"]

#Append bias to training data
ones = np.array([[1 for _ in range(len(mnist_train_data))]])
mnist_train_data = np.concatenate((mnist_train_data, ones.T), axis=1)

mnist_train_data, mnist_raw_labels = shuffle(
    mnist_train_data, mnist_raw_labels, random_state=0)

logging = True

def logprint(msg):
    if logging:
        print(msg)

In [40]:
# Data Preprocessing

def labelsToVector(labels):
    result = []
    for label in labels:
        vec = [1 if i+1 == label else 0 for i in range(26)]
        result.append(vec)
    return result

mnist_train_labels = labelsToVector(mnist_raw_labels)

logprint(mnist_raw_labels[100])
logprint(mnist_train_labels[100])

[23]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]


In [60]:
# Main training of the network by SGD

#Note that this works for matrices by element
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

#z is predicted, y is actual
def crossEntropy(z, y):
    entropy = 0
    for j in range(len(z)):
        entropy += y[j] * np.log(z[j]) + (1 - y[j]) * np.log(1 - z[j])
    return -entropy    

def trainNeuralNetwork(images, labels, params=None):
    #V = np.random.rand(200, 785)
    #W = np.random.rand(26, 201)
    
    V = np.random.normal(0, math.sqrt(1/785), (200,785))
    W = np.random.normal(0, math.sqrt(1/201), (26,201))
    
    iterations = 100
    while iterations > 0:
        #Choose a random index for SGD, we'll use this to update
        index = int(np.random.random() * len(images))
        X_i = images[index] #The data
        y_i = labels[index] #The correct value as a vector
        
        #Compute the forward pass by simple matrix multiplication
        #hidden = V * X_i
        #200 x 1 = 200 x 785 * 785 x 1
        hidden = np.dot(V, X_i)
        hidden_activated = np.tanh(hidden)
        #V 201 x 1
        hidden_activated_bias = np.concatenate((hidden_activated, np.array([1])))
        
        #output = W * hidden_bias
        #26 x 1 = 26 x 201 * 201 x 1
        output = np.dot(W, hidden_activated_bias)
        output_activated = sigmoid(output)
        
        entropy = crossEntropy(output_activated, y_i)
        
        iterations -= 1
        
    return V, W

trainNeuralNetwork(mnist_train_data, mnist_train_labels, None)

[ 0.68162349  0.58998974  0.64544565  0.51622065  0.40733153  0.42240481
  0.74894483  0.84899571  0.47745002  0.21756815  0.8100889   0.51442896
  0.58749822  0.51777575  0.64835752  0.79782031  0.7177631   0.77785247
  0.42975452  0.74118307  0.56212273  0.62487186  0.29839452  0.59055676
  0.68700131  0.36384338]
[ 0.62206011  0.5790413   0.49374337  0.38065813  0.34364352  0.29235528
  0.58249247  0.52343919  0.69485663  0.32256057  0.53956196  0.29702047
  0.54425798  0.77932759  0.80231623  0.38229623  0.56708296  0.62039983
  0.91716391  0.21716591  0.81081475  0.4282393   0.16783223  0.55099739
  0.51290851  0.72256226]
[ 0.4085931   0.25587071  0.38127586  0.06305594  0.22230361  0.72202873
  0.30125044  0.67028826  0.82119449  0.65700595  0.45459042  0.45554162
  0.41285407  0.60730445  0.45937541  0.31838983  0.48661528  0.87058424
  0.59889714  0.25416734  0.31833634  0.28767358  0.81971963  0.7148851
  0.27443334  0.7110331 ]
[ 0.50382904  0.53989442  0.33385299  0.6068743

(array([[ 0.04328892, -0.00639609,  0.02230988, ..., -0.04350863,
          0.0527743 , -0.02832847],
        [ 0.01099676, -0.02421852, -0.03849537, ..., -0.00667233,
         -0.01748159,  0.00804364],
        [-0.03069799, -0.02521133,  0.02682835, ...,  0.02726248,
          0.03672574,  0.0122877 ],
        ..., 
        [-0.03532823,  0.00788378, -0.02555143, ..., -0.04017287,
         -0.00736081, -0.04117716],
        [ 0.04222521,  0.03265436,  0.01046159, ...,  0.0031484 ,
          0.01654961, -0.01436818],
        [ 0.04930115, -0.01785493, -0.01292475, ..., -0.0084824 ,
         -0.01305147,  0.00291725]]),
 array([[ 0.0997196 ,  0.01322   ,  0.12017167, ...,  0.11151101,
         -0.07108588,  0.00096161],
        [ 0.07645679, -0.07266888, -0.073089  , ...,  0.06094217,
          0.12261047, -0.01235102],
        [-0.00367309,  0.03527866,  0.09152014, ..., -0.02314323,
          0.03034294, -0.04372423],
        ..., 
        [-0.09845962, -0.00696572,  0.21841804, ...,

In [42]:
print(np.tanh(np.array([[1,2,3,4],[5,6,7,8]])))

[[ 0.76159416  0.96402758  0.99505475  0.9993293 ]
 [ 0.9999092   0.99998771  0.99999834  0.99999977]]
