In [5]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
plt.style.use(['seaborn-whitegrid'])
from collections import OrderedDict

#### 데이터 로드

In [6]:
np.random.seed(111)

mnist = tf.keras.datasets.mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()

num_classes = 10

#### 데이터 전처리

In [7]:
X_train, X_test = X_train.reshape(-1, 28*28).astype(np.float32), X_test.reshape(-1, 28*28).astype(np.float32)

X_train /= .255
X_test /= .255

y_train = np.eye(num_classes)[y_train]

#### Hyper Parameters

In [16]:
epochs = 1000
learning_rate = 1e-3
batch_size = 100
train_size = X_train.shape[0]

#### Util Functions

In [17]:
def softmax(x):
  if x.ndim == 2:
    x = x.T
    x = x - np.max(x, axis=0)
    y = np.exp(x) / np.sum(np.exp(x), axis=0)
    return y.T

  x = x - np.max(x)
  return np.exp(x) / np.sum(np.exp(x))

def mean_squared_error(pred_y, true_y):
  return 0.5 * np.sum((pred_y - true_y)**2)

def cross_entropy_error(pred_y, true_y):
  if pred_y.ndim == 1:
    true_y = true_y.reshape(1,true_y.size)
    pred_y = pred_y.reshape(1,pred_y.size)
  
  if true_y.size == pred_y.size:
    true_y = true_y.argmax(axis=1)

  batch_size = pred_y.shape[0]
  return -np.sum(np.log(pred_y[np.arange(batch_size), true_y] + 1e-7)) / batch_size

def softmax_loss(X, true_y):
  pred_y = softmax(X)
  return cross_entropy_error(pred_y, true_y)


#### Util Classes

##### ReLU

In [18]:
class ReLU():

  def __init__(self):
    self.out = None

  def forward(self,x):
    self.mask = (x < 0)
    out = x.copy()
    out[x<0] = 0
    return out

  def backward(self,dout):
    dout[self.mask] = 0
    dx = dout
    return dx

##### Sigmoid

In [61]:
class Sigmoid():

  def __init__(self):
    self.out = None

  def forward(self,x):
    out = 1 / ( 1 + np.exp(-x))
    self.out = out
    return out

  def backward(self,dout):
    dx = dout * (1.0 - self.out) * self.out
    return dx

##### Layer

In [62]:
class Layer():

  def __init__(self, W, b):
    self.W = W
    self.b = b

    self.x = None
    self.origin_x_shape = None

    self.dL_dW = None
    self.dL_db = None

  def forward(self,x):
    self.origin_x_shape = x.shape

    x = x.reshape(x.shape[0], -1)
    self.x = x

    out = np.dot(self.x, self.W) + self.b
    return out
  def backward(self,dout):
    dx = np.dot(dout, self.W.T)
    self.dL_dW = np.dot(self.x.T,dout)
    self.dL_db = np.sum(dout, axis=0)
    dx = dx.reshape(*self.origin_x_shape)
    return dx

#### Softmax

In [63]:
class SoftMax():
  def __init__(self):
    self.loss = None
    self.y = None
    self.t = None

  def forward(self,x,t):
    self.t = t
    self.y = softmax(x)
    self.loss = cross_entropy_error(self.y, self.t)

    return self.loss

  def backward(self,dout=1):
    batch_size = self.t.shape[0]

    if self.t.size == self.y.size:
      dx = (self.y - self.t) / batch_size
    else:
      dx = self.y.copu()
      dx[np.arange(batch_size), self.t] -= 1
      dx = dx / batch_size

    return dx

In [64]:
class MyModel():

  def __init__(self, input_size, hidden_size_list, output_size, activation='relu'):
    self.input_size = input_size
    self.output_size = output_size
    self.hidden_size_list = hidden_size_list
    self.hidden_layer_num = len(hidden_size_list)
    self.params = {}

    self.__init_weights(activation)

    activation_layer = {'sigmoid':Sigmoid,'relu':ReLU}
    self.layers = OrderedDict()
    for idx in range(1, self.hidden_layer_num + 1):
      self.layers['Layer' + str(idx)] = Layer(self.params['W' + str(idx)], self.params['b'+str(idx)])
      self.layers['Actiavation_function' + str(idx)] = activation_layer[activation]()

    idx = self.hidden_layer_num + 1

    self.layers['Layer' + str(idx)] = Layer(self.params['W' + str(idx)], self.params['b' + str(idx)])
    self.last_layer = SoftMax()

  def __init_weights(self, activation):
    weight_std = None
    all_size_list = [self.input_size] + self.hidden_size_list + [self.output_size]
    for idx in range(1, len(all_size_list)):
      if activation.lower() == 'relu':
        weight_std = np.sqrt(2.0 / self.input_size)
      elif activation.lower() == 'sigmoid':
        weight_std = np.sqrt(1.0 / self.input_size)
      self.params['W' + str(idx)] = weight_std * np.random.randn(all_size_list[idx-1], all_size_list[idx])
      self.params['b' + str(idx)] = np.random.randn(all_size_list[idx])

  def predict(self, x):
    for layer in self.layers.values():
      x = layer.forward(x)

    return x

  def loss(self, x, true_y):
    pred_y = self.predict(x)

    return self.last_layer.forward(pred_y, true_y)

  def accuracy(self, x, true_y):
    pred_y = self.predict(x)
    pred_y = np.argmax(pred_y, axis=1)

    if true_y.ndim != 1:
      true_y = np.argmax(true_y, axis=1)
    
    accuracy = np.sum(pred_y == true_y) / float(x.shape[0])
    return accuracy

  def gradient(self, x, t):
    self.loss(x,t)

    dout = 1
    dout = self.last_layer.backward(dout)

    layers = list(self.layers.values())
    layers.reverse()
    for layer in layers:
      dout = layer.backward(dout)
    
    grads = {}
    for idx in range(1, self.hidden_layer_num + 2):
      grads['W' + str(idx)] = self.layers['Layer' + str(idx)].dL_dW
      grads['b' + str(idx)] = self.layers['Layer' + str(idx)].dL_db
    return grads

#relu

In [26]:
model = MyModel(28*28, [200,200,200,200,200],10,activation='relu')

train_loss_list = []
train_acc_list = []
test_acc_list = []

for epoch in range(epochs):
  batch_mask = np.random.choice(train_size, batch_size)
  x_batch = X_train[batch_mask]
  y_batch = y_train[batch_mask]

  grad = model.gradient(x_batch, y_batch)

  for key in model.params.keys():
    model.params[key] -= learning_rate * grad[key]
  
  loss = model.loss(x_batch, y_batch)
  train_loss_list.append(loss)

  if epoch % 50 == 0:
    train_acc = model.accuracy(X_train, y_train)
    test_acc = model.accuracy(X_test, y_test)
    train_acc_list.append(train_acc)
    test_acc_list.append(test_acc)
    print('Epoch: {}, Train Accuracy: {:.4f}, Test Accuracy: {:.4f}'.format(epoch+1, train_acc,test_acc))

Epoch: 1, Train Accuracy: 0.1156, Test Accuracy: 0.1145
Epoch: 51, Train Accuracy: 0.6688, Test Accuracy: 0.6766
Epoch: 101, Train Accuracy: 0.7637, Test Accuracy: 0.7643
Epoch: 151, Train Accuracy: 0.8012, Test Accuracy: 0.8020
Epoch: 201, Train Accuracy: 0.8128, Test Accuracy: 0.8094
Epoch: 251, Train Accuracy: 0.8347, Test Accuracy: 0.8324
Epoch: 301, Train Accuracy: 0.8437, Test Accuracy: 0.8388
Epoch: 351, Train Accuracy: 0.8622, Test Accuracy: 0.8548
Epoch: 401, Train Accuracy: 0.8644, Test Accuracy: 0.8589
Epoch: 451, Train Accuracy: 0.8752, Test Accuracy: 0.8691
Epoch: 501, Train Accuracy: 0.8834, Test Accuracy: 0.8780
Epoch: 551, Train Accuracy: 0.8877, Test Accuracy: 0.8840
Epoch: 601, Train Accuracy: 0.8921, Test Accuracy: 0.8873
Epoch: 651, Train Accuracy: 0.8942, Test Accuracy: 0.8866
Epoch: 701, Train Accuracy: 0.8951, Test Accuracy: 0.8893
Epoch: 751, Train Accuracy: 0.9014, Test Accuracy: 0.8919
Epoch: 801, Train Accuracy: 0.9044, Test Accuracy: 0.8927
Epoch: 851, Train

In [25]:
model = MyModel(28*28, [100,100,100,100,100],10,activation='relu')

train_loss_list = []
train_acc_list = []
test_acc_list = []

for epoch in range(epochs):
  batch_mask = np.random.choice(train_size, batch_size)
  x_batch = X_train[batch_mask]
  y_batch = y_train[batch_mask]

  grad = model.gradient(x_batch, y_batch)

  for key in model.params.keys():
    model.params[key] -= learning_rate * grad[key]
  
  loss = model.loss(x_batch, y_batch)
  train_loss_list.append(loss)

  if epoch % 50 == 0:
    train_acc = model.accuracy(X_train, y_train)
    test_acc = model.accuracy(X_test, y_test)
    train_acc_list.append(train_acc)
    test_acc_list.append(test_acc)
    print('Epoch: {}, Train Accuracy: {:.4f}, Test Accuracy: {:.4f}'.format(epoch+1, train_acc,test_acc))

Epoch: 1, Train Accuracy: 0.0938, Test Accuracy: 0.0936
Epoch: 51, Train Accuracy: 0.5484, Test Accuracy: 0.5500
Epoch: 101, Train Accuracy: 0.6764, Test Accuracy: 0.6819
Epoch: 151, Train Accuracy: 0.7320, Test Accuracy: 0.7416
Epoch: 201, Train Accuracy: 0.7822, Test Accuracy: 0.7912
Epoch: 251, Train Accuracy: 0.8063, Test Accuracy: 0.8134
Epoch: 301, Train Accuracy: 0.8203, Test Accuracy: 0.8274
Epoch: 351, Train Accuracy: 0.8283, Test Accuracy: 0.8338
Epoch: 401, Train Accuracy: 0.8428, Test Accuracy: 0.8511
Epoch: 451, Train Accuracy: 0.8517, Test Accuracy: 0.8548
Epoch: 501, Train Accuracy: 0.8551, Test Accuracy: 0.8567
Epoch: 551, Train Accuracy: 0.8515, Test Accuracy: 0.8540
Epoch: 601, Train Accuracy: 0.8683, Test Accuracy: 0.8699
Epoch: 651, Train Accuracy: 0.8643, Test Accuracy: 0.8668
Epoch: 701, Train Accuracy: 0.8703, Test Accuracy: 0.8722
Epoch: 751, Train Accuracy: 0.8803, Test Accuracy: 0.8815
Epoch: 801, Train Accuracy: 0.8759, Test Accuracy: 0.8766
Epoch: 851, Train

In [27]:
model = MyModel(28*28, [200,200,200,200],10,activation='relu')

train_loss_list = []
train_acc_list = []
test_acc_list = []

for epoch in range(epochs):
  batch_mask = np.random.choice(train_size, batch_size)
  x_batch = X_train[batch_mask]
  y_batch = y_train[batch_mask]

  grad = model.gradient(x_batch, y_batch)

  for key in model.params.keys():
    model.params[key] -= learning_rate * grad[key]
  
  loss = model.loss(x_batch, y_batch)
  train_loss_list.append(loss)

  if epoch % 50 == 0:
    train_acc = model.accuracy(X_train, y_train)
    test_acc = model.accuracy(X_test, y_test)
    train_acc_list.append(train_acc)
    test_acc_list.append(test_acc)
    print('Epoch: {}, Train Accuracy: {:.4f}, Test Accuracy: {:.4f}'.format(epoch+1, train_acc,test_acc))

Epoch: 1, Train Accuracy: 0.0974, Test Accuracy: 0.0960
Epoch: 51, Train Accuracy: 0.6857, Test Accuracy: 0.6823
Epoch: 101, Train Accuracy: 0.7732, Test Accuracy: 0.7786
Epoch: 151, Train Accuracy: 0.8016, Test Accuracy: 0.8025
Epoch: 201, Train Accuracy: 0.8246, Test Accuracy: 0.8309
Epoch: 251, Train Accuracy: 0.8283, Test Accuracy: 0.8308
Epoch: 301, Train Accuracy: 0.8544, Test Accuracy: 0.8558
Epoch: 351, Train Accuracy: 0.8634, Test Accuracy: 0.8602
Epoch: 401, Train Accuracy: 0.8662, Test Accuracy: 0.8613
Epoch: 451, Train Accuracy: 0.8787, Test Accuracy: 0.8760
Epoch: 501, Train Accuracy: 0.8827, Test Accuracy: 0.8787
Epoch: 551, Train Accuracy: 0.8866, Test Accuracy: 0.8823
Epoch: 601, Train Accuracy: 0.8923, Test Accuracy: 0.8862
Epoch: 651, Train Accuracy: 0.8976, Test Accuracy: 0.8906
Epoch: 701, Train Accuracy: 0.8991, Test Accuracy: 0.8914
Epoch: 751, Train Accuracy: 0.9031, Test Accuracy: 0.8977
Epoch: 801, Train Accuracy: 0.9080, Test Accuracy: 0.8993
Epoch: 851, Train

In [28]:
model = MyModel(28*28, [100,100,100,100],10,activation='relu')

train_loss_list = []
train_acc_list = []
test_acc_list = []

for epoch in range(epochs):
  batch_mask = np.random.choice(train_size, batch_size)
  x_batch = X_train[batch_mask]
  y_batch = y_train[batch_mask]

  grad = model.gradient(x_batch, y_batch)

  for key in model.params.keys():
    model.params[key] -= learning_rate * grad[key]
  
  loss = model.loss(x_batch, y_batch)
  train_loss_list.append(loss)

  if epoch % 50 == 0:
    train_acc = model.accuracy(X_train, y_train)
    test_acc = model.accuracy(X_test, y_test)
    train_acc_list.append(train_acc)
    test_acc_list.append(test_acc)
    print('Epoch: {}, Train Accuracy: {:.4f}, Test Accuracy: {:.4f}'.format(epoch+1, train_acc,test_acc))

Epoch: 1, Train Accuracy: 0.1147, Test Accuracy: 0.1165
Epoch: 51, Train Accuracy: 0.5857, Test Accuracy: 0.5926
Epoch: 101, Train Accuracy: 0.7022, Test Accuracy: 0.7048
Epoch: 151, Train Accuracy: 0.7520, Test Accuracy: 0.7508
Epoch: 201, Train Accuracy: 0.7722, Test Accuracy: 0.7714
Epoch: 251, Train Accuracy: 0.8057, Test Accuracy: 0.8021
Epoch: 301, Train Accuracy: 0.8237, Test Accuracy: 0.8196
Epoch: 351, Train Accuracy: 0.8360, Test Accuracy: 0.8403
Epoch: 401, Train Accuracy: 0.8472, Test Accuracy: 0.8437
Epoch: 451, Train Accuracy: 0.8542, Test Accuracy: 0.8544
Epoch: 501, Train Accuracy: 0.8603, Test Accuracy: 0.8603
Epoch: 551, Train Accuracy: 0.8662, Test Accuracy: 0.8635
Epoch: 601, Train Accuracy: 0.8711, Test Accuracy: 0.8717
Epoch: 651, Train Accuracy: 0.8786, Test Accuracy: 0.8775
Epoch: 701, Train Accuracy: 0.8807, Test Accuracy: 0.8793
Epoch: 751, Train Accuracy: 0.8810, Test Accuracy: 0.8775
Epoch: 801, Train Accuracy: 0.8876, Test Accuracy: 0.8839
Epoch: 851, Train

#sigmoid

In [65]:
model = MyModel(28*28, [200,200,200,200,200],10,activation='sigmoid')

train_loss_list = []
train_acc_list = []
test_acc_list = []

for epoch in range(epochs):
  batch_mask = np.random.choice(train_size, batch_size)
  x_batch = X_train[batch_mask]
  y_batch = y_train[batch_mask]

  grad = model.gradient(x_batch, y_batch)

  for key in model.params.keys():
    model.params[key] -= learning_rate * grad[key]
  
  loss = model.loss(x_batch, y_batch)
  train_loss_list.append(loss)

  if epoch % 50 == 0:
    train_acc = model.accuracy(X_train, y_train)
    test_acc = model.accuracy(X_test, y_test)
    train_acc_list.append(train_acc)
    test_acc_list.append(test_acc)
    print('Epoch: {}, Train Accuracy: {:.4f}, Test Accuracy: {:.4f}'.format(epoch+1, train_acc,test_acc))

  import sys


Epoch: 1, Train Accuracy: 0.0986, Test Accuracy: 0.0958
Epoch: 51, Train Accuracy: 0.0986, Test Accuracy: 0.0958
Epoch: 101, Train Accuracy: 0.0986, Test Accuracy: 0.0958
Epoch: 151, Train Accuracy: 0.0986, Test Accuracy: 0.0958
Epoch: 201, Train Accuracy: 0.0986, Test Accuracy: 0.0958
Epoch: 251, Train Accuracy: 0.0986, Test Accuracy: 0.0958
Epoch: 301, Train Accuracy: 0.0986, Test Accuracy: 0.0958
Epoch: 351, Train Accuracy: 0.0986, Test Accuracy: 0.0958
Epoch: 401, Train Accuracy: 0.1022, Test Accuracy: 0.1010
Epoch: 451, Train Accuracy: 0.1022, Test Accuracy: 0.1010
Epoch: 501, Train Accuracy: 0.1022, Test Accuracy: 0.1010
Epoch: 551, Train Accuracy: 0.1022, Test Accuracy: 0.1010
Epoch: 601, Train Accuracy: 0.1022, Test Accuracy: 0.1010
Epoch: 651, Train Accuracy: 0.1022, Test Accuracy: 0.1010
Epoch: 701, Train Accuracy: 0.1022, Test Accuracy: 0.1010
Epoch: 751, Train Accuracy: 0.1124, Test Accuracy: 0.1135
Epoch: 801, Train Accuracy: 0.1124, Test Accuracy: 0.1135
Epoch: 851, Train

In [66]:
model = MyModel(28*28, [100,100,100,100,100],10,activation='sigmoid')

train_loss_list = []
train_acc_list = []
test_acc_list = []

for epoch in range(epochs):
  batch_mask = np.random.choice(train_size, batch_size)
  x_batch = X_train[batch_mask]
  y_batch = y_train[batch_mask]

  grad = model.gradient(x_batch, y_batch)

  for key in model.params.keys():
    model.params[key] -= learning_rate * grad[key]
  
  loss = model.loss(x_batch, y_batch)
  train_loss_list.append(loss)

  if epoch % 50 == 0:
    train_acc = model.accuracy(X_train, y_train)
    test_acc = model.accuracy(X_test, y_test)
    train_acc_list.append(train_acc)
    test_acc_list.append(test_acc)
    print('Epoch: {}, Train Accuracy: {:.4f}, Test Accuracy: {:.4f}'.format(epoch+1, train_acc,test_acc))

  import sys


Epoch: 1, Train Accuracy: 0.1044, Test Accuracy: 0.1028
Epoch: 51, Train Accuracy: 0.1044, Test Accuracy: 0.1028
Epoch: 101, Train Accuracy: 0.1044, Test Accuracy: 0.1028
Epoch: 151, Train Accuracy: 0.1044, Test Accuracy: 0.1028
Epoch: 201, Train Accuracy: 0.1044, Test Accuracy: 0.1028
Epoch: 251, Train Accuracy: 0.1044, Test Accuracy: 0.1028
Epoch: 301, Train Accuracy: 0.1044, Test Accuracy: 0.1028
Epoch: 351, Train Accuracy: 0.1044, Test Accuracy: 0.1028
Epoch: 401, Train Accuracy: 0.1044, Test Accuracy: 0.1028
Epoch: 451, Train Accuracy: 0.1044, Test Accuracy: 0.1028
Epoch: 501, Train Accuracy: 0.1044, Test Accuracy: 0.1028
Epoch: 551, Train Accuracy: 0.1044, Test Accuracy: 0.1028
Epoch: 601, Train Accuracy: 0.1044, Test Accuracy: 0.1028
Epoch: 651, Train Accuracy: 0.1044, Test Accuracy: 0.1028
Epoch: 701, Train Accuracy: 0.1044, Test Accuracy: 0.1028
Epoch: 751, Train Accuracy: 0.1044, Test Accuracy: 0.1028
Epoch: 801, Train Accuracy: 0.1044, Test Accuracy: 0.1028
Epoch: 851, Train

In [67]:
model = MyModel(28*28, [200,200,200,200],10,activation='sigmoid')

train_loss_list = []
train_acc_list = []
test_acc_list = []

for epoch in range(epochs):
  batch_mask = np.random.choice(train_size, batch_size)
  x_batch = X_train[batch_mask]
  y_batch = y_train[batch_mask]

  grad = model.gradient(x_batch, y_batch)

  for key in model.params.keys():
    model.params[key] -= learning_rate * grad[key]
  
  loss = model.loss(x_batch, y_batch)
  train_loss_list.append(loss)

  if epoch % 50 == 0:
    train_acc = model.accuracy(X_train, y_train)
    test_acc = model.accuracy(X_test, y_test)
    train_acc_list.append(train_acc)
    test_acc_list.append(test_acc)
    print('Epoch: {}, Train Accuracy: {:.4f}, Test Accuracy: {:.4f}'.format(epoch+1, train_acc,test_acc))

  import sys


Epoch: 1, Train Accuracy: 0.0992, Test Accuracy: 0.1009
Epoch: 51, Train Accuracy: 0.0992, Test Accuracy: 0.1009
Epoch: 101, Train Accuracy: 0.0992, Test Accuracy: 0.1009
Epoch: 151, Train Accuracy: 0.0992, Test Accuracy: 0.1009
Epoch: 201, Train Accuracy: 0.0992, Test Accuracy: 0.1009
Epoch: 251, Train Accuracy: 0.0992, Test Accuracy: 0.1009
Epoch: 301, Train Accuracy: 0.1124, Test Accuracy: 0.1135
Epoch: 351, Train Accuracy: 0.1124, Test Accuracy: 0.1135
Epoch: 401, Train Accuracy: 0.1124, Test Accuracy: 0.1135
Epoch: 451, Train Accuracy: 0.1124, Test Accuracy: 0.1135
Epoch: 501, Train Accuracy: 0.1124, Test Accuracy: 0.1135
Epoch: 551, Train Accuracy: 0.1124, Test Accuracy: 0.1135
Epoch: 601, Train Accuracy: 0.1124, Test Accuracy: 0.1135
Epoch: 651, Train Accuracy: 0.1124, Test Accuracy: 0.1135
Epoch: 701, Train Accuracy: 0.1124, Test Accuracy: 0.1135
Epoch: 751, Train Accuracy: 0.1124, Test Accuracy: 0.1135
Epoch: 801, Train Accuracy: 0.1124, Test Accuracy: 0.1135
Epoch: 851, Train

In [68]:
model = MyModel(28*28, [100,100,100,100],10,activation='sigmoid')

train_loss_list = []
train_acc_list = []
test_acc_list = []

for epoch in range(epochs):
  batch_mask = np.random.choice(train_size, batch_size)
  x_batch = X_train[batch_mask]
  y_batch = y_train[batch_mask]

  grad = model.gradient(x_batch, y_batch)

  for key in model.params.keys():
    model.params[key] -= learning_rate * grad[key]
  
  loss = model.loss(x_batch, y_batch)
  train_loss_list.append(loss)

  if epoch % 50 == 0:
    train_acc = model.accuracy(X_train, y_train)
    test_acc = model.accuracy(X_test, y_test)
    train_acc_list.append(train_acc)
    test_acc_list.append(test_acc)
    print('Epoch: {}, Train Accuracy: {:.4f}, Test Accuracy: {:.4f}'.format(epoch+1, train_acc,test_acc))

  import sys


Epoch: 1, Train Accuracy: 0.0903, Test Accuracy: 0.0892
Epoch: 51, Train Accuracy: 0.0903, Test Accuracy: 0.0892
Epoch: 101, Train Accuracy: 0.0903, Test Accuracy: 0.0892
Epoch: 151, Train Accuracy: 0.1022, Test Accuracy: 0.1010
Epoch: 201, Train Accuracy: 0.1022, Test Accuracy: 0.1010
Epoch: 251, Train Accuracy: 0.0979, Test Accuracy: 0.0968
Epoch: 301, Train Accuracy: 0.1022, Test Accuracy: 0.1010
Epoch: 351, Train Accuracy: 0.1022, Test Accuracy: 0.1010
Epoch: 401, Train Accuracy: 0.1022, Test Accuracy: 0.1010
Epoch: 451, Train Accuracy: 0.1022, Test Accuracy: 0.1010
Epoch: 501, Train Accuracy: 0.1022, Test Accuracy: 0.1010
Epoch: 551, Train Accuracy: 0.1022, Test Accuracy: 0.1010
Epoch: 601, Train Accuracy: 0.1022, Test Accuracy: 0.1010
Epoch: 651, Train Accuracy: 0.1022, Test Accuracy: 0.1010
Epoch: 701, Train Accuracy: 0.1022, Test Accuracy: 0.1010
Epoch: 751, Train Accuracy: 0.1022, Test Accuracy: 0.1010
Epoch: 801, Train Accuracy: 0.1022, Test Accuracy: 0.1010
Epoch: 851, Train