In [1]:
import numpy as np

#sigmoid activation function
def sigmoid(x):
    return 1 / (1+np.exp(-x))

In [2]:
import numpy as np

# Load the training data
training_data = np.loadtxt('mnist_train.csv', delimiter=',', dtype=np.float32, skiprows=1)

# Load the test data
test_data = np.loadtxt('mnist_test.csv', delimiter=',', dtype=np.float32, skiprows=1)


In [3]:
print("training_data.shape = ", training_data.shape, " ,  test_data.shape = ", test_data.shape)

training_data.shape =  (60000, 785)  ,  test_data.shape =  (10000, 785)


In [4]:
class NeuralNetwork:

    def __init__(self, input_nodes, hidden_nodes, output_nodes, learning_rate):

        self.input_nodes = input_nodes
        self.hidden_nodes = hidden_nodes
        self.output_nodes = output_nodes

        # Weight Initialization with Xavier/He : W2
        self.W2 = np.random.randn(self.input_nodes, self.hidden_nodes) / np.sqrt(self.input_nodes/2)
        self.b2 = np.random.rand(self.hidden_nodes)

        # Weight Initialization Xavier/He : W3
        self.W3 = np.random.randn(self.hidden_nodes, self.output_nodes) / np.sqrt(self.hidden_nodes/2)
        self.b3 = np.random.rand(self.output_nodes)

        # Initialization A3,Z3 : A3 is the result of sigmoid function about Z2
        self.Z3 = np.zeros([1,output_nodes])
        self.A3 = np.zeros([1,output_nodes])

        # Initialization A2,Z2
        self.Z2 = np.zeros([1,hidden_nodes])
        self.A2 = np.zeros([1,hidden_nodes])

        # Initialization A1,Z1
        self.Z1 = np.zeros([1,input_nodes])
        self.A1 = np.zeros([1,input_nodes])

        # Learning rate Initialization
        self.learning_rate = learning_rate

    def feed_forward(self):

        delta = 1e-7    # log Infinite Divergence Prevention

        # Calculate Z1,A1 in the input layer
        self.Z1 = self.input_data
        self.A1 = self.input_data

        # Calculate Z2,A2 in the hidden layer
        self.Z2 = np.dot(self.A1, self.W2) + self.b2
        self.A2 = sigmoid(self.Z2)

        # Calculate Z3,A3 in the ouput layer
        self.Z3 = np.dot(self.A2, self.W3) + self.b3
        self.A3 = sigmoid(self.Z3)

        # Calculate the loss function value (error) : cross entropy
        return  -np.sum( self.target_data*np.log(self.A3 + delta) + (1-self.target_data)*np.log((1 - self.A3)+delta ) )

    # For external printing
    def loss_val(self):

        delta = 1e-7    # log Infinite Divergence Prevention

        # Calculate Z1,A1 in the input layer
        self.Z1 = self.input_data
        self.A1 = self.input_data

        # Calculate Z2,A2 in the hidden layer
        self.Z2 = np.dot(self.A1, self.W2) + self.b2
        self.A2 = sigmoid(self.Z2)

        # Calculate Z3,A3 in the ouput layer
        self.Z3 = np.dot(self.A2, self.W3) + self.b3
        self.A3 = sigmoid(self.Z3)

        # Calculate the loss function value : cross entropy
        return  -np.sum( self.target_data*np.log(self.A3 + delta) + (1-self.target_data)*np.log((1 - self.A3)+delta ) )

    def train(self, input_data, target_data):   # input_data : 784 , target_data : 10

        self.target_data = target_data
        self.input_data = input_data

        # Calculate an error with the feed foward
        loss_val = self.feed_forward()

        # Calculate loss_3
        loss_3 = (self.A3-self.target_data) * self.A3 * (1-self.A3)

        # Update W3, b3
        self.W3 = self.W3 - self.learning_rate * np.dot(self.A2.T, loss_3)

        self.b3 = self.b3 - self.learning_rate * loss_3

        # Caculate loss_2
        loss_2 = np.dot(loss_3, self.W3.T) * self.A2 * (1-self.A2)

        # Update W2, b2
        self.W2 = self.W2 - self.learning_rate * np.dot(self.A1.T, loss_2)

        self.b2 = self.b2 - self.learning_rate * loss_2

    def predict(self, input_data):        # Shape of input_data is (1, 784) matrix

        Z2 = np.dot(input_data, self.W2) + self.b2
        A2 = sigmoid(Z2)

        Z3 = np.dot(A2, self.W3) + self.b3
        A3 = sigmoid(Z3)

        predicted_num = np.argmax(A3)

        return predicted_num

    # Accuracy measurement
    def accuracy(self, test_data):

        matched_list = []
        not_matched_list = []

        for index in range(len(test_data)):

            label = int(test_data[index, 0])

            # Data normalize for one-hot encoding
            data = (test_data[index, 1:] / 255.0 * 0.99) + 0.01


            # Vector -> Matrix (for the prediction)
            predicted_num = self.predict(np.array(data, ndmin=2))

            if label == predicted_num:
                matched_list.append(index)
            else:
                not_matched_list.append(index)

        print("Current Accuracy = ", 100*(len(matched_list)/(len(test_data))), " %")

        return matched_list, not_matched_list

In [5]:
# Define variables
input_nodes = 784
hidden_nodes = 100
output_nodes = 10
learning_rate = 0.3
epochs = 1

nn = NeuralNetwork(input_nodes, hidden_nodes, output_nodes, learning_rate)

for i in range(epochs):

    for step in range(len(training_data)):  # train

        # input_data, target_data normalize
        target_data = np.zeros(output_nodes) + 0.01
        target_data[int(training_data[step, 0])] = 0.99
        input_data = ((training_data[step, 1:] / 255.0) * 0.99) + 0.01

        nn.train( np.array(input_data, ndmin=2), np.array(target_data, ndmin=2) )


        # Print the error once every 400 times
        if step % 400 == 0:
            print("step = ", step,  ",  loss_val = ", nn.loss_val())

step =  0 ,  loss_val =  5.181760451497374
step =  400 ,  loss_val =  1.8257279028072664
step =  800 ,  loss_val =  1.1966792019184942
step =  1200 ,  loss_val =  0.6891300029086208
step =  1600 ,  loss_val =  1.3263726552853805
step =  2000 ,  loss_val =  1.3204952536331338
step =  2400 ,  loss_val =  0.7099982590623498
step =  2800 ,  loss_val =  0.8521348818927709
step =  3200 ,  loss_val =  0.7284323585177378
step =  3600 ,  loss_val =  0.6745150319724835
step =  4000 ,  loss_val =  0.9315049798047481
step =  4400 ,  loss_val =  0.8098864914446494
step =  4800 ,  loss_val =  0.9004415456447329
step =  5200 ,  loss_val =  0.7883906736536096
step =  5600 ,  loss_val =  1.4187366952122564
step =  6000 ,  loss_val =  0.8133478931051695
step =  6400 ,  loss_val =  0.8920518709408244
step =  6800 ,  loss_val =  0.8922103035081866
step =  7200 ,  loss_val =  0.7958953427236826
step =  7600 ,  loss_val =  0.8275873918472992
step =  8000 ,  loss_val =  0.9278295180004442
step =  8400 ,  los

In [6]:
nn.accuracy(test_data)

Current Accuracy =  93.84  %


([0,
  1,
  2,
  3,
  4,
  5,
  6,
  7,
  9,
  10,
  11,
  12,
  13,
  14,
  15,
  16,
  17,
  18,
  19,
  20,
  21,
  22,
  23,
  24,
  25,
  26,
  27,
  28,
  29,
  30,
  31,
  32,
  34,
  35,
  36,
  37,
  38,
  39,
  40,
  41,
  42,
  43,
  44,
  45,
  46,
  47,
  48,
  49,
  50,
  51,
  52,
  53,
  54,
  55,
  56,
  57,
  58,
  59,
  60,
  61,
  62,
  63,
  64,
  65,
  66,
  67,
  68,
  69,
  70,
  71,
  72,
  73,
  74,
  75,
  76,
  77,
  78,
  79,
  80,
  81,
  82,
  83,
  84,
  85,
  86,
  87,
  88,
  89,
  90,
  91,
  92,
  93,
  94,
  95,
  96,
  97,
  98,
  99,
  100,
  101,
  102,
  103,
  104,
  105,
  106,
  107,
  108,
  109,
  110,
  112,
  113,
  114,
  116,
  117,
  118,
  119,
  120,
  121,
  122,
  123,
  125,
  126,
  127,
  128,
  129,
  130,
  131,
  132,
  133,
  134,
  135,
  136,
  137,
  138,
  140,
  141,
  142,
  143,
  144,
  145,
  146,
  147,
  148,
  150,
  152,
  153,
  154,
  155,
  156,
  157,
  158,
  159,
  160,
  161,
  162,
  163,
  164,
  165,
 