# Activation function

In [1]:
import numpy as np

In [2]:
def step_function(x):
    y = x > 0
    return y.astype(np.int32)

In [3]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [4]:
def softmax(x):
    const = np.max(x)
    exp_a = np.exp(x-const)
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a
    return y

In [5]:
def relu(x):
    y = np.maximum(0, x)
    return y

In [6]:
test = np.random.randn(2, 2)
print("step function: \n", step_function(test))
print("sigmoid: \n", sigmoid(test))
print("relu: \n", relu(test))
print("softmax:", softmax(test))

step function: 
 [[1 1]
 [1 0]]
sigmoid: 
 [[0.5838314  0.54742341]
 [0.568442   0.4825243 ]]
relu: 
 [[0.33852179 0.19026556]
 [0.27549738 0.        ]]
softmax: [[0.28853297 0.24877606]
 [0.27090954 0.19178142]]


# Neural Network

In [7]:
def init_network():
    network = {}
    network['W1'] = np.random.randn(2, 2)
    network['b1'] = np.random.randn(2, 2)
    network['W2'] = np.random.randn(2, 2)
    network['b2'] = np.random.randn(2, 2)
    network['W3'] = np.random.randn(2, 2)
    network['b3'] = np.random.randn(2, 2)
    
    return network

def forward(network, x, activation):
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    b1, b2, b3 = network['b1'], network['b2'], network['b3']
    
    l1 = np.dot(x, W1) + b1
    z1 = activation(l1)
    l2 = np.dot(z1, W2) + b2
    z2 = activation(l2)
    l3 = np.dot(z2, W3) + b3
    z3 = activation(l3)

    return z3

In [8]:
network_test = init_network()
test_result = forward(network_test, test, sigmoid)
print(test_result)


[[0.89992965 0.38655294]
 [0.59915291 0.51733408]]


In [9]:
test_result = forward(network_test, test, softmax)
print(test_result)

[[0.70143142 0.05260635]
 [0.15905325 0.08690898]]


# MNIST tuto

## load data

In [27]:
import sys, os
sys.path.append(r'C:\Users\JEJOON YOO\Desktop\prgm\dl_scratch\data')
from mnist import load_mnist

In [28]:
(x_train, y_train), (x_test, y_test) = load_mnist(flatten=True, normalize=True)

Downloading train-images-idx3-ubyte.gz ... 
Done
Downloading train-labels-idx1-ubyte.gz ... 
Done
Downloading t10k-images-idx3-ubyte.gz ... 
Done
Downloading t10k-labels-idx1-ubyte.gz ... 
Done
Converting train-images-idx3-ubyte.gz to NumPy Array ...
Done
Converting train-labels-idx1-ubyte.gz to NumPy Array ...
Done
Converting t10k-images-idx3-ubyte.gz to NumPy Array ...
Done
Converting t10k-labels-idx1-ubyte.gz to NumPy Array ...
Done
Creating pickle file ...
Done!


In [29]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(60000, 784)
(60000,)
(10000, 784)
(10000,)


## inference tutorial

In [57]:
import pickle

def get_data():
    (x_train, t_train), (x_test, t_test) = \
        load_mnist(normalize=True, flatten=True, one_hot_label=False)
    return x_test, t_test

def init_network():
    with open(r"C:\Users\JEJOON YOO\Desktop\prgm\dl_scratch\data\sample_weight.pkl", 'rb') as f:
        network = pickle.load(f)
    return network

def predict(network, x):
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    b1, b2, b3 = network['b1'], network['b2'], network['b3']
    
    a1 = np.dot(x, W1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, W2) + b2
    z2 = sigmoid(a2)
    a3 = np.dot(z2, W3) + b3
    y = softmax(a3)
    
    return y


In [58]:
x, t = get_data()
network = init_network()

acc_cnt = 0
for i in range(len(x)):
    y = predict(network, x[i])
    p  = np.argmax(y)
    if p == t[i]:
        acc_cnt += 1

print("Acc:" + str(float(acc_cnt) / len(x)))

Acc:0.9352


In [59]:
x, _ = get_data()
network = init_network()
W1, W2, W3 = network['W1'], network['W2'], network['W3']

print(x.shape)
print(x[0].shape)
print(W1.shape)
print(W2.shape)
print(W3.shape)

(10000, 784)
(784,)
(784, 50)
(50, 100)
(100, 10)


In [61]:
x, t = get_data()
network = init_network()
batch_size = 100
accuracy_cnt = 0

for i in range(0, len(x), batch_size):
    x_batch = x[i:i+batch_size]
    y_batch = predict(network, x_batch)
    p = np.argmax(y_batch, axis=1)
    accuracy_cnt += np.sum(p == t[i:i+batch_size])

print("Acc:" + str(float(accuracy_cnt) / len(x)))

Acc:0.9352
