In [282]:
import numpy as np
import matplotlib.pyplot as plt

In [283]:
from mlxtend.data import loadlocal_mnist
import platform

In [284]:
#step1 : MNIST reading & splitting
ori_x, ori_y = loadlocal_mnist(
    images_path='MNIST/train-images.idx3-ubyte', 
    labels_path='MNIST/train-labels.idx1-ubyte'
    )

In [285]:
np.savetxt(fname='ori_images.csv', 
           X = ori_x, delimiter=',', fmt='%d')
np.savetxt(fname='ori_labels.csv', 
           X = ori_y, delimiter=',', fmt='%d')

In [286]:
#One Hot encoding的編碼邏輯為將類別拆成多個行(column)，每個列中的數值由1、0替代，當某一列的資料存在的該行的類別則顯示1，反則顯示0。
def oneHot(x, n_col=None):
  """ One hot encoding function"""
  if not n_col:
    n_col = np.amax(x) + 1
  
  one_hot = np.zeros((x.shape[0], n_col))
  one_hot[np.arange(x.shape[0]), x] = 1
  return one_hot

In [287]:
def accuracy(y_true, y_pred):
  #小數點後兩位
  return round(np.sum(y_true == y_pred, axis = 0) / len(y_true), 2)

In [288]:
def batch_loader(X, y = None, batch_size=64):
  """ Generates batches for training"""
  n_samples = X.shape[0]
  for i in np.arange(0, n_samples, batch_size):
    begin, end = i, min(i + batch_size, n_samples)
    if y is not None:
      yield X[begin:end], y[begin: end]
    else:
      yield X[begin:end]

In [289]:
m = 42000
#70% train 30% test
x_train, x_test = ori_x[:m], ori_x[m:]
y_train, y_test = ori_y[:m], ori_y[m:]

In [290]:
y_train, y_test = oneHot(y_train.astype("int")), oneHot(y_test.astype("int"))
x_train, x_test = x_train / 255.0, x_test / 255.0

In [291]:
x_train, x_test = x_train.reshape(-1, 28*28), x_test.reshape(-1, 28*28)
x_train.shape, x_test.shape

((42000, 784), (18000, 784))

In [292]:
input_dim = 28*28 # 784
output_dim = 10 # 10 classes

In [293]:
#step2 : Dense neural layer:
class Linear():
  def __init__(self, input_dim, output_dim, name = "linear"):
    limit = 1 / np.sqrt(input_dim)
    self.W = np.random.uniform(-limit, limit, (input_dim, output_dim))
    #initially set zero 
    self.b = np.zeros((1, output_dim)) 
    self.input = None
    self.output = None
    self.name = name
  
  def forward(self, x):
    self.input = x
    self.output = np.dot(self.input, self.W) + self.b 
    return self.output
  
  def backward(self, output_error, learning_rate = 0.01):
    input_error = np.dot(output_error, self.W.T)
    # Calculate the weights error
    delta = np.dot(self.input.T, output_error) 

    # 這裡使用sgd來更新參數
    self.W -= learning_rate * delta
    self.b -= learning_rate * np.mean(output_error)
    return input_error
  
  def __call__(self, x):
    return self.forward(x)

In [294]:
#step3: ReLU layer:
class ReLU():
  def __init__(self, alpha = 0.2):
    self.alpha = alpha
  
  def __call__(self, x):
    return self.activation(x)
  # ReLU activation
  def activation(self, x):
    return np.where(x > 0, x, 0)
  
  def gradient(self, x):
    return np.where(x >= 0, 1, self.alpha)

In [295]:
#step4: Softmax output:
class Softmax():
  def __call__(self, x):
    return self.activation(x)
  
  def activation(self, x):
    e_x = np.exp(x - np.max(x, axis = -1, keepdims=True))
    return e_x / np.sum(e_x, axis=-1, keepdims = True)
  
  def gradient(self, x):
    # Error was in our softmax
    p = self.activation(x)
    return p * (1 - p)

In [296]:
#step5: Cross-entropy loss calculation:
class CrossEntropy():
  def loss(self, y, p):
    p = np.clip(p, 1e-15, 1- 1e-15)
    return -y*np.log(p) - (1 - y) * np.log(1- p)
  
  def gradient(self, y, p):
    p = np.clip(p, 1e-15, 1- 1e-15)
    return -(y/p) + (1 - y) / (1 - p)

In [297]:
class Activation():
  def __init__(self, activation, name = "activation"):
    self.activation = activation
    self.gradient = activation.gradient
    self.input = None
    self.output = None
    self.name = name
  #calculate the output
  def forward(self, x):
    self.input = x
    self.output = self.activation(x)
    return self.output
  #backward to update the weight and biase
  def backward(self, output_error, learning_rate = 0.01):
    return self.gradient(self.input) * output_error
  
  def __call__(self, x):
    return self.forward(x)

In [298]:
class Network():
  def __init__(self, input_dim, output_dim, learning_rate = 0.01):
    # input_dim = 784, output_dim = 10 for mnist
    self.layers = [
                   Linear(input_dim, 512, name = "input"),
                   Activation(ReLU(), name = "hidden_1"),
                   Linear(512, 256, name = "input"),
                   Activation(ReLU(), name = "hidden_2"),
                   Linear(256, output_dim, name = "output"),
                   Activation(Softmax(), name = "softmax")
    ]
    self.learning_rate = learning_rate
  
  def forward(self, x):
    for layer in self.layers:
      x = layer(x)
    return x
  #step6: Backward propagation:
  def backward(self, loss_grad):
    for layer in reversed(self.layers):
      loss_grad = layer.backward(loss_grad, self.learning_rate)
    # Iterating backwards through the layers
  
  def __call__(self, x):
    return self.forward(x)

In [299]:
criterion = CrossEntropy()
model = Network(input_dim, output_dim, learning_rate = 1e-3)

In [300]:
EPOCHS = 10

In [301]:
# x is image y is label
# 收集loss data作圖用
plot_loss = []
plot_validate = []
epoch_iter = []

for epoch in range(EPOCHS):
  loss = []
  acc = []
  test_loss = []
  test_acc = []

  for x_batch, y_batch in batch_loader(x_train, y_train):
    out = model(x_batch) # Forward pass
    loss.append(np.mean(criterion.loss(y_batch, out))) 
    # We just passed the inputs incorrectly.
    acc.append(accuracy(np.argmax(y_batch, axis=1), np.argmax(out, axis=1))) 
    error = criterion.gradient(y_batch, out) # Calculate gradient of loss
    #step6: Backpropagation
    model.backward(error) 
    #step7: Validation
  for x_batch, y_batch in batch_loader(x_test, y_test):
    #step8: Testing accuracy
    test_out = model(x_batch)
    test_loss.append(np.mean(criterion.loss(y_batch, test_out))) 
    test_acc.append(accuracy(np.argmax(y_batch, axis=1), np.argmax(test_out, axis=1))) 
  plot_loss.append(np.mean(loss))
  plot_validate.append(np.mean(test_loss))
  epoch_iter.append(epoch + 1)

  print(f"Epoch {epoch + 1}, ValidationLoss: {np.mean(test_loss)}, ValidationAcc: {np.mean(test_acc)}, TrainingLoss: {np.mean(loss)}, TrainingAcc: {np.mean(acc)}")

Epoch 1, ValidationLoss: 0.055263398175666896, ValidationAcc: 0.9000709219858156, TrainingLoss: 0.10845611887307162, TrainingAcc: 0.830289193302892
Epoch 2, ValidationLoss: 0.04188900874879076, ValidationAcc: 0.9276241134751774, TrainingLoss: 0.04649470391894461, TrainingAcc: 0.9213394216133941
Epoch 3, ValidationLoss: 0.0338745065336244, ValidationAcc: 0.9418085106382977, TrainingLoss: 0.035096432778106076, TrainingAcc: 0.9412024353120242
Epoch 4, ValidationLoss: 0.028947008455809338, ValidationAcc: 0.9506382978723403, TrainingLoss: 0.028024720328670965, TrainingAcc: 0.9521613394216133
Epoch 5, ValidationLoss: 0.025676270176351434, ValidationAcc: 0.9553900709219859, TrainingLoss: 0.023163417672677947, TrainingAcc: 0.9611111111111111
Epoch 6, ValidationLoss: 0.023377193860218966, ValidationAcc: 0.9588297872340426, TrainingLoss: 0.019556071368199077, TrainingAcc: 0.9677168949771688
Epoch 7, ValidationLoss: 0.021705432309929, ValidationAcc: 0.9620567375886523, TrainingLoss: 0.01672413399

In [None]:
y_predic = model(x_test) 
accuracy(np.argmax(y_test, axis=1), np.argmax(y_predic, axis=1)) 

In [None]:
plt.title('Result Analysis')
plt.plot(epoch_iter, plot_loss, color='green', label='training accuracy')
plt.plot(epoch_iter, plot_validate, color='red', label='testing accuracy')
plt.xlabel('iteration times')
plt.ylabel('loss')
plt.show()