In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import mnist 

In [26]:
# image normalization 
def normalize(train_data, test_data) :
  train_data = train_data.astype(np.float32) / 255.0
  test_data = test_data.astype(np.float32) / 255.0
  return train_data, test_data

# load mnist with preprocessing
def load_mnist() : 
  (train_data,train_labels), (test_data, test_labels) = mnist.load_data()

  # in tensorflow input shape : (batch_size, height, width, channel)
  train_data = np.expand_dims(train_data, axis = -1)  # (N,28,28) -> (N, 28, 28, 1) 
  test_data = np.expand_dims(test_data, axis = -1)  # (N,28,28) -> (N, 28, 28, 1) 

  train_data, test_data = normalize(train_data, test_data)  # 0~ 255 > 0~1

  # label preprocessing
  train_labels = to_categorical(train_labels, 10)  # (n,) -> (n,10) / one-hot encoding / 10 : class 개수
  test_labels = to_categorical(test_labels, 10) 

  return train_data, train_labels, test_data, test_labels

In [27]:
# Create network

def flatten() :
  return tf.keras.layers.Flatten()

# To make Fully connected layer
def dense(channel,weight_init) :
  return tf.keras.layers.Dense(units = channel, use_bias = True, kernel_initializer = weight_init) # units : output으로 나가는 channel 개수 use_bias : bias 사용 여부 kernel_initializer : weight initializer

def relu() :
  return tf.keras.layers.Activation(tf.keras.activations.relu)

# class로 모델 만들때 주의할 점 : tf.keras.Model 상속해야함
# label_dim : 여기서는 10을 의미 / output 개수
# weight_init = tf.keras.initializers.RandomNormal() : N(0,1) 정규분포에서 랜덤한 weight 설정
class create_model(tf.keras.Model) : 
  def __init__(self, label_dim) :
    super(create_model, self).__init__()
    weight_init = tf.keras.initializers.RandomNormal()
    self.model = tf.keras.Sequential()   # layer을 층층이 쌓아나가는 것을 list에 계속 더해준다고 할 수 있음 / Sequential : list 자료 구조 type

    self.model.add(flatten())   # (N,28,28,1) -> (n, 784)  / fully connected layer을 이용하기 때문에 flatten 시킴 / CNN 이라면 필요 없음

    for i in range(2) : 
      # (N,784) > (N,256) > (N,256)
      self.model.add(dense(256, weight_init))
      self.model.add(relu())

    self.model.add(dense(label_dim,weight_init))  #(N,256) -> (N,10)
  
  def call(self, x, training=None, mask=None) :
    x = self.model(x)

    return x

In [28]:
#  ## class method 싫은 경우

#  def create_model(label_dim) :
#    weight_init = tf.keras.initializers.RandomNormal()

#    model = tf.keras.Sequential()
#    model.add(flatten())

#    for i in range(2) :
#      model.add(dense(256, weight_init))
#      model.add(relu())

#     model.add(dense(label_dim, weight_init)) 

#     return model

In [29]:
# Define loss

def loss_fn(model, images, labels) : 
  logits = model(images, training = True) 
  loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = labels))
  return loss  

def accuracy_fn(model, images, labels) : 
  logits = model(images, training = False)
  prediction = tf.equal(tf.argmax(logits, -1), tf.argmax(labels, -1))  # logits, labels : (batchsize, label_dim)
  accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32))  # T,F,T > 1,0,1
  return accuracy

def grad(model, images, labels) :
  with tf.GradientTape() as tape :
    loss = loss_fn(model, images, labels)
  return tape.gradient(loss, model.variables)

In [34]:
## set hyperparameter

# dataset

train_x, train_y, test_x, test_y = load_mnist()

# hyperparameter

learning_rate = 0.001
batch_size = 128

training_epochs = 1
training_iterations = len(train_x) // batch_size

label_dim = 10


## graph input using dataset API

## buffer_size : 주어진 input 개수보다 크게..  크다면 random 하게 shuffling
## prefetch : network가 batch size만큼 학습하고 있을 때 미리 메모리에 batch size 만큼 올려놔라
# batch : batch size 만큼 network에 던져준다
# repeat() : 반복 
train_dataset = tf.data.Dataset.from_tensor_slices((train_x,train_y)).shuffle(buffer_size = 100000).prefetch(buffer_size = batch_size).batch(batch_size).repeat()

test_dataset = tf.data.Dataset.from_tensor_slices((test_x,test_y)).shuffle(buffer_size = 100000).prefetch(buffer_size = len(test_x)).batch(len(test_x)).repeat()


In [35]:
## Model 

# dataset Iterator
train_iterator = iter(train_dataset)
test_iterator = iter(test_dataset)

# model
network = create_model(label_dim)

# training
optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate)

In [39]:
checkpoint = tf.train.Checkpoint(dnn=network)  # 학습이 끊겼을 때 다시 재학습을 이루어내고 싶을때.. 변경이 되었던 weight들을 불러냄 / test data 돌릴때

for epoch in range(training_epochs) :
  for idx in range(1, training_iterations) : 
    train_input, train_label = train_iterator.get_next()
    test_input, test_label = test_iterator.get_next()

    grads = grad(network, train_input, train_label)
    optimizer.apply_gradients(grads_and_vars = zip(grads, network.variables))

    train_loss = loss_fn(network, train_input, train_label)
    train_accuracy = accuracy_fn(network, train_input, train_label)

    test_loss = loss_fn(network, test_input, test_label)
    test_accuracy = accuracy_fn(network, test_input, test_label)

    print("Epoch : {} train_loss : {:.4f}, train_accuracy : {:.4f}, test_accuracy : {:.4f}".format(epoch, train_loss, train_accuracy, test_accuracy))


Epoch : 0 train_loss : 0.1574, train_accuracy : 0.9609, test_accuracy : 0.9486
Epoch : 0 train_loss : 0.1776, train_accuracy : 0.9453, test_accuracy : 0.9447
Epoch : 0 train_loss : 0.1456, train_accuracy : 0.9297, test_accuracy : 0.9414
Epoch : 0 train_loss : 0.1028, train_accuracy : 0.9688, test_accuracy : 0.9404
Epoch : 0 train_loss : 0.0780, train_accuracy : 0.9844, test_accuracy : 0.9439
Epoch : 0 train_loss : 0.2042, train_accuracy : 0.9375, test_accuracy : 0.9493
Epoch : 0 train_loss : 0.1182, train_accuracy : 0.9688, test_accuracy : 0.9547
Epoch : 0 train_loss : 0.1032, train_accuracy : 0.9609, test_accuracy : 0.9562
Epoch : 0 train_loss : 0.2080, train_accuracy : 0.9297, test_accuracy : 0.9575
Epoch : 0 train_loss : 0.1731, train_accuracy : 0.9531, test_accuracy : 0.9579
Epoch : 0 train_loss : 0.1663, train_accuracy : 0.9609, test_accuracy : 0.9562
Epoch : 0 train_loss : 0.1201, train_accuracy : 0.9609, test_accuracy : 0.9546
Epoch : 0 train_loss : 0.2129, train_accuracy : 0.92

KeyboardInterrupt: ignored

In [40]:
train_input.shape

TensorShape([128, 28, 28, 1])

In [41]:
training_iterations

468