In [1]:
import tensorflow as tf
import numpy as np

tf.random.set_seed(0)

#### Prepare MNIST data.

In [2]:
from tensorflow.keras.datasets import mnist

(x_train, y_train),(x_test, y_test) = mnist.load_data()

- Convert from unit8 to floate32 and
- normalize images value from[0,255] to [0,1]

In [3]:
x_train = tf.convert_to_tensor(x_train, dtype=tf.float32)/255
x_test = tf.convert_to_tensor(x_test, dtype=tf.float32)/255

#### add new axis for channel

In [4]:
x_train = tf.reshape(x_train,[-1,28,28,1])
x_test = tf.reshape(x_test,[-1,28,28,1])

#### lenet

In [5]:
from tensorflow.keras import layers, Sequential

lenet = Sequential([
        #input 28 * 28 * 1 #output = 24*24*6
        layers.Conv2D(6,(5,5), padding="VALID"),
        layers.Activation("relu"),
        #polling layer
        #input 24*24*6 #output 12*12*6
        layers.MaxPooling2D(pool_size = (2,2), strides=2),
        #convlolutional Layer
        #input 12*12*6 #output 8*8*16
        layers.Conv2D(16,(5,5), padding="VALID"),
        layers.Activation("relu"),
        #polling Layer
        #input 8*8*16 #output 4*4*16
        layers.MaxPooling2D(pool_size = (2,2), strides=2),
        #flatten layer
        # input 4*4*16 output =256
        layers.Flatten(),
        layers.Dense(120, activation="relu"),
        layers.Dense(84, activation="relu"),
        layers.Dense(10)])

lenet.build(input_shape=(None,28,28,1))
lenet.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              multiple                  156       
_________________________________________________________________
activation (Activation)      multiple                  0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) multiple                  0         
_________________________________________________________________
conv2d_1 (Conv2D)            multiple                  2416      
_________________________________________________________________
activation_1 (Activation)    multiple                  0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 multiple                  0         
_________________________________________________________________
flatten (Flatten)            multiple                  0

#### Cross-Entropy loss function.

In [6]:
def cross_entropy(y_pred, y_true):
    # Convert labels to int 64 for tf cross-entropy function.
    y_true = tf.cast(y_true, tf.int64)
    # Apply softmax to logits and compute cross-entropy.
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_true, 
                                                          logits=y_pred)
    # Average loss across the batch.
    return tf.reduce_mean(loss)

#### 학습 및 평가

In [7]:
# Parameters for Model Training
learning_rate = 0.1
batch_size = 256
epochs = 10

# Stochastic gradient descent optimizer.
optimizer = tf.optimizers.SGD(learning_rate)

# Use tf.data API to batch data.
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
training_batch = train_data.batch(batch_size).repeat(epochs)

# Accuracy metric.
def accuracy(y_pred, y_true):
    # Predicted class is the index of highest score in prediction vector (i.e. argmax).
    correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


# Training parameters.
display_step = 500

# Run training for the given number of steps.
for step, (batch_x, batch_y) in enumerate(training_batch, 1):
    # Wrap computation inside a GradientTape for automatic differentiation.
    with tf.GradientTape() as g:
        pred = lenet(batch_x)
        loss = cross_entropy(pred, batch_y)

    # Compute gradients.
    trainable_variables = lenet.trainable_variables
    gradients = g.gradient(loss, trainable_variables)
    
    # Update W and b following gradients.
    optimizer.apply_gradients(zip(gradients, trainable_variables))
    
    if step % display_step == 0:
        pred = lenet(x_test)
        loss = cross_entropy(pred, y_test)
        acc = accuracy(pred, y_test)
        print("step: %i, loss: %f, accuracy: %f" % (step, loss, acc))

step: 500, loss: 0.090076, accuracy: 0.972400
step: 1000, loss: 0.059174, accuracy: 0.981200
step: 1500, loss: 0.050391, accuracy: 0.984000
step: 2000, loss: 0.047117, accuracy: 0.984600
