# 平均損失函數

In [1]:
import numpy as np
import tensorflow as tf
tf.compat.v1.reset_default_graph()
class MNISTLoader():
    def __init__(self):
        mnist = tf.keras.datasets.mnist
        (self.train_data, self.train_label), (self.test_data, self.test_label) = mnist.load_data()

        self.train_data = np.expand_dims(self.train_data.astype(np.float32) / 255.0, axis=-1)      # [60000, 28, 28, 1]
        self.test_data = np.expand_dims(self.test_data.astype(np.float32) / 255.0, axis=-1)        # [10000, 28, 28, 1]
        self.train_label = self.train_label.astype(np.int32)    # [60000]
        self.test_label = self.test_label.astype(np.int32)      # [10000]
        self.num_train_data, self.num_test_data = self.train_data.shape[0], self.test_data.shape[0]

    def get_batch(self, batch_size):

        index = np.random.randint(0, self.num_train_data, batch_size)
        return self.train_data[index, :], self.train_label[index]
    
class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten()    
        self.dense1 = tf.keras.layers.Dense(units=100, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(units=10)

    def call(self, inputs):         # [batch_size, 28, 28, 1]
        x = self.flatten(inputs)    # [batch_size, 784]
        x = self.dense1(x)          # [batch_size, 100]
        x = self.dense2(x)          # [batch_size, 10]
        output = tf.nn.softmax(x)
        return output

num_epochs = 5
batch_size = 5
learning_rate = 0.001

model = MLP()
data_loader = MNISTLoader()

num_batches = int(data_loader.num_train_data // batch_size * num_epochs)

X, y = data_loader.get_batch(batch_size)
with tf.GradientTape() as tape:
    y_pred = model(X)
    loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_pred)
    loss = tf.reduce_mean(loss)
    print(loss)


tf.Tensor(2.3536828, shape=(), dtype=float32)


# 優化神經網路

In [2]:
import numpy as np
class MNISTLoader():
    def __init__(self):
        mnist = tf.keras.datasets.mnist
        (self.train_data, self.train_label), (self.test_data, self.test_label) = mnist.load_data()

        self.train_data = np.expand_dims(self.train_data.astype(np.float32) / 255.0, axis=-1)      # [60000, 28, 28, 1]
        self.test_data = np.expand_dims(self.test_data.astype(np.float32) / 255.0, axis=-1)        # [10000, 28, 28, 1]
        self.train_label = self.train_label.astype(np.int32)    # [60000]
        self.test_label = self.test_label.astype(np.int32)      # [10000]
        self.num_train_data, self.num_test_data = self.train_data.shape[0], self.test_data.shape[0]

    def get_batch(self, batch_size):

        index = np.random.randint(0, self.num_train_data, batch_size)
        return self.train_data[index, :], self.train_label[index]
    
class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten()    
        self.dense1 = tf.keras.layers.Dense(units=100, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(units=10)

    def call(self, inputs):         # [batch_size, 28, 28, 1]
        x = self.flatten(inputs)    # [batch_size, 784]
        x = self.dense1(x)          # [batch_size, 100]
        x = self.dense2(x)          # [batch_size, 10]
        output = tf.nn.softmax(x)
        return output

num_epochs = 5
batch_size = 50
learning_rate = 0.001

model = MLP()
data_loader = MNISTLoader()
#optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
#optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)

num_batches = int(data_loader.num_train_data // batch_size * num_epochs)
for batch_index in range(num_batches):
    X, y = data_loader.get_batch(batch_size)
    with tf.GradientTape() as tape:
        y_pred = model(X)
        loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_pred)
        loss = tf.reduce_mean(loss)
        print("batch %d: loss %f" % (batch_index, loss.numpy()))
    grads = tape.gradient(loss, model.variables)
    optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))


batch 0: loss 2.396677
batch 1: loss 2.112873
batch 2: loss 2.183515
batch 3: loss 1.943295
batch 4: loss 1.820923
batch 5: loss 1.697846
batch 6: loss 1.571865
batch 7: loss 1.597428
batch 8: loss 1.628342
batch 9: loss 1.309693
batch 10: loss 1.454890
batch 11: loss 1.258303
batch 12: loss 1.285961
batch 13: loss 1.254707
batch 14: loss 0.994358
batch 15: loss 0.893446
batch 16: loss 1.137083
batch 17: loss 1.004136
batch 18: loss 1.031638
batch 19: loss 0.982998
batch 20: loss 1.008984
batch 21: loss 0.963277
batch 22: loss 1.035021
batch 23: loss 0.797306
batch 24: loss 0.860072
batch 25: loss 0.840207
batch 26: loss 0.702268
batch 27: loss 0.723299
batch 28: loss 0.964678
batch 29: loss 0.712005
batch 30: loss 0.681655
batch 31: loss 0.786665
batch 32: loss 0.619811
batch 33: loss 0.650310
batch 34: loss 0.714649
batch 35: loss 0.610785
batch 36: loss 0.833940
batch 37: loss 0.628161
batch 38: loss 0.661477
batch 39: loss 0.724105
batch 40: loss 0.604361
batch 41: loss 0.602755
ba

# 優化神經網路(keras版本)

In [3]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
tf.compat.v1.reset_default_graph()

(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = x_train.reshape([x_train.shape[0], -1])
x_test = x_test.reshape([x_test.shape[0], -1])
print(x_train.shape, ' ', y_train.shape)
print(x_test.shape, ' ', y_test.shape)

(60000, 784)   (60000,)
(10000, 784)   (10000,)


In [4]:
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(784,)),
    layers.Dense(64, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
])


#keras.optimizers.Adagrad(learning_rate=0.01)
#keras.optimizers.Adam(learning_rate=0.01)
#keras.optimizers.RMSprop(learning_rate=0.01)

# provide labels as one_hot representation => tf.keras.losses.CategoricalCrossentropy
# provide labels as integers => tf.keras.losses.SparseCategoricalCrossentropy 
model.compile(optimizer=keras.optimizers.Adam(),
             loss=keras.losses.SparseCategoricalCrossentropy(),
             metrics=['accuracy'])
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [5]:
#history = model.fit(x_train, y_train, batch_size=256, epochs=100, validation_split=0.3, verbose=2)
history = model.fit(x_train, y_train, batch_size=256, epochs=100, validation_data=(x_test, y_test), verbose=2)


Epoch 1/100
235/235 - 2s - 8ms/step - accuracy: 0.7573 - loss: 2.6060 - val_accuracy: 0.8610 - val_loss: 0.6462
Epoch 2/100
235/235 - 1s - 3ms/step - accuracy: 0.8838 - loss: 0.4757 - val_accuracy: 0.8985 - val_loss: 0.4269
Epoch 3/100
235/235 - 1s - 2ms/step - accuracy: 0.9154 - loss: 0.3157 - val_accuracy: 0.9106 - val_loss: 0.3523
Epoch 4/100
235/235 - 1s - 2ms/step - accuracy: 0.9319 - loss: 0.2427 - val_accuracy: 0.9198 - val_loss: 0.3006
Epoch 5/100
235/235 - 1s - 2ms/step - accuracy: 0.9406 - loss: 0.2037 - val_accuracy: 0.9303 - val_loss: 0.2669
Epoch 6/100
235/235 - 1s - 2ms/step - accuracy: 0.9498 - loss: 0.1729 - val_accuracy: 0.9348 - val_loss: 0.2560
Epoch 7/100
235/235 - 1s - 2ms/step - accuracy: 0.9553 - loss: 0.1508 - val_accuracy: 0.9360 - val_loss: 0.2452
Epoch 8/100
235/235 - 1s - 2ms/step - accuracy: 0.9604 - loss: 0.1328 - val_accuracy: 0.9407 - val_loss: 0.2315
Epoch 9/100
235/235 - 1s - 3ms/step - accuracy: 0.9630 - loss: 0.1199 - val_accuracy: 0.9406 - val_loss: