# MNIST: Custom training: walkthrough
- https://www.tensorflow.org/tutorials/customization/custom_training_walkthrough 



# 0. 환경 셋업

In [1]:
import os
import matplotlib.pyplot as plt

import tensorflow as tf
tf.random.set_seed(42)

In [2]:
print("TensorFlow version: {}".format(tf.__version__))
print("Eager execution: {}".format(tf.executing_eagerly()))

TensorFlow version: 2.1.3
Eager execution: True


In [3]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  # 텐서플로가 첫 번째 GPU만 사용하도록 제한
  try:
    tf.config.experimental.set_visible_devices(gpus[6], 'GPU')
  except RuntimeError as e:
    # 프로그램 시작시에 접근 가능한 장치가 설정되어야만 합니다
    print(e)

# 1. 데이터 준비

In [4]:
(mnist_images, mnist_labels), (x_test, y_test) = tf.keras.datasets.mnist.load_data(
    path="mnist-%d.npz" % 10
)



In [5]:
print("type: ", type(mnist_images))
print("shape: ", mnist_images.shape)
print("mnist_labels: ", mnist_labels.shape)

print("x_test: ", x_test.shape)
print("y_test: ", y_test.shape)

type:  <class 'numpy.ndarray'>
shape:  (60000, 28, 28)
mnist_labels:  (60000,)
x_test:  (10000, 28, 28)
y_test:  (10000,)


In [6]:
def create_train_dataset(mnist_images, mnist_labels, batch_size, buffer_size):
    dataset = tf.data.Dataset.from_tensor_slices(
        (tf.cast(mnist_images[..., tf.newaxis] / 255.0, tf.float32), 
         tf.cast(mnist_labels, tf.int64))
    )
    dataset = dataset.repeat(1).shuffle(buffer_size).batch(batch_size)
    print("# of batches : {0}".format(tf.data.experimental.cardinality(dataset).numpy()))

    return dataset
    

def create_test_dataset(mnist_images, mnist_labels, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices(
        (tf.cast(mnist_images[..., tf.newaxis] / 255.0, tf.float32), 
         tf.cast(mnist_labels, tf.int64))
    )
    dataset = dataset.repeat(1).batch(batch_size)
    print("# of batches : {0}".format(tf.data.experimental.cardinality(dataset).numpy()))

    return dataset



In [7]:
batch_size = 256
buffer_size = mnist_images.shape[0]
print("buffer_size: ", buffer_size)
dataset = create_train_dataset(mnist_images, mnist_labels,batch_size, buffer_size)
    
# dataset = dataset.repeat(1).shuffle(buffer_size).batch(batch_size)
# print(tf.data.experimental.cardinality(dataset))
# print(tf.data.experimental.cardinality(dataset).numpy())
dataset

buffer_size:  60000
# of batches : 235


<BatchDataset shapes: ((None, 28, 28, 1), (None,)), types: (tf.float32, tf.int64)>

In [8]:
batch_size = x_test.shape[0]
test_ds = create_test_dataset(x_test, y_test,batch_size)
test_ds

# of batches : 1


<BatchDataset shapes: ((None, 28, 28, 1), (None,)), types: (tf.float32, tf.int64)>

In [9]:
# features, labels = next(iter(dataset))
# print(features)

# 2. 모델 정의

In [10]:
mnist_model = tf.keras.Sequential(
    [
        tf.keras.layers.Conv2D(32, [3, 3], activation="relu"),
        tf.keras.layers.Conv2D(64, [3, 3], activation="relu"),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
        tf.keras.layers.Dropout(0.25),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(10, activation="softmax"),
    ]
)

# 3. 모델 생성

## Loss 오브젝트 정의

In [11]:
loss = tf.losses.SparseCategoricalCrossentropy()

In [12]:
opt = tf.optimizers.Adam(0.000125 * 8)

In [13]:
checkpoint = tf.train.Checkpoint(model=mnist_model, optimizer=opt)

## Gradient 생성 함수 정의

In [14]:
@tf.function
def training_step(images, labels, first_batch):
    with tf.GradientTape() as tape:
        probs = mnist_model(images, training=True)
        print("probs: ", probs)
        print("labels: ", labels)        
        
        loss_value = loss(labels, probs)
        
    grads = tape.gradient(loss_value, mnist_model.trainable_variables)
    opt.apply_gradients(zip(grads, mnist_model.trainable_variables))
    
    return loss_value

In [15]:
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

@tf.function
def test_step(images, labels):
    # training=False is only needed if there are layers with different
    # behavior during training versus inference (e.g. Dropout).
    predictions = mnist_model(images, training=False)
#   print("labels: ", labels)
#   print("predictions: ", predictions)    
    
    t_loss = loss(labels, predictions)

    test_loss(t_loss)
    test_accuracy(labels, predictions)
    return t_loss

# 4. 모델 훈련

In [16]:
EPOCHS = 5
print_interval = 100

for epoch in range(EPOCHS):
    for batch, (images, labels) in enumerate(dataset):
        loss_value = training_step(images, labels, batch == 0)

        if batch % print_interval == 0:
            print("Epoch: %d\t, Step #%d\tLoss: %.6f" %  (epoch, batch, loss_value))
            
    # Reset the metrics at the start of the next epoch
    test_loss.reset_states()
    test_accuracy.reset_states()


    for test_images, test_labels in test_ds:
        test_step(test_images, test_labels)

        print(
        #ㅁ    f'Epoch {epoch + 1}, '
        f'Test Loss: {test_loss.result()}, '
        f'Test Accuracy: {test_accuracy.result() * 100}'
        )

            

[2021-10-03 07:56:29.180 ip-172-16-67-78:124063 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None
[2021-10-03 07:56:29.479 ip-172-16-67-78:124063 INFO profiler_config_parser.py:111] Unable to find config at /opt/ml/input/config/profilerconfig.json. Profiler is disabled.
probs:  Tensor("sequential/dense_1/Softmax:0", shape=(256, 10), dtype=float32)
labels:  Tensor("labels:0", shape=(256,), dtype=int64)
probs:  Tensor("sequential/dense_1/Softmax:0", shape=(256, 10), dtype=float32)
labels:  Tensor("labels:0", shape=(256,), dtype=int64)
Epoch: 0	, Step #0	Loss: 2.312272
probs:  Tensor("sequential/dense_1/Softmax:0", shape=(256, 10), dtype=float32)
labels:  Tensor("labels:0", shape=(256,), dtype=int64)
Epoch: 0	, Step #100	Loss: 0.235716
Epoch: 0	, Step #200	Loss: 0.083622
probs:  Tensor("sequential/dense_1/Softmax:0", shape=(96, 10), dtype=float32)
labels:  Tensor("labels:0", shape=(96,), dtype=int64)
Test Loss: 0.062250152230262756, Test Accuracy: 97.93999481201172
Epoch: 1	, Step #0	

In [17]:
# #for epoch in range(EPOCHS):
# n_batch = 1000
# print_interval = 1000
# for batch, (images, labels) in enumerate(dataset.take(n_batch)):
#     loss_value = training_step(images, labels, batch == 0)

#     if batch % print_interval == 0:
#         print("Step #%d\tLoss: %.6f" %  (batch, loss_value))
        

In [18]:
import os
log_dir = 'log'
os.makedirs(log_dir, exist_ok=True)
checkpoint_dir = log_dir

In [19]:
mnist_model.save(os.path.join(checkpoint_dir, "1"))

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


Instructions for updating:
If using Keras pass *_constraint arguments to layers.


INFO:tensorflow:Assets written to: log/1/assets


INFO:tensorflow:Assets written to: log/1/assets


In [20]:
! rm -rf log_dir

# 5. 추론

In [21]:
# Reset the metrics at the start of the next epoch
test_loss.reset_states()
test_accuracy.reset_states()


for test_images, test_labels in test_ds:
    test_step(test_images, test_labels)

    print(
    #ㅁ    f'Epoch {epoch + 1}, '
    f'Test Loss: {test_loss.result()}, '
    f'Test Accuracy: {test_accuracy.result() * 100}'
    )


Test Loss: 0.03226584568619728, Test Accuracy: 98.94000244140625
