#### 说明
虽然“交叉熵+dropout”和“交叉熵+正则化”的效果不如“仅使用交叉熵”的，但关键在于如何使用这些优化方法，不同的案例效果可能不同。

#### 目录
1. 仅使用交叉熵
2. 交叉熵+dropout
3. 交叉熵+正则化
4. 交叉熵+Adam优化器

#### 仅使用交叉熵

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import keras
from keras.datasets import mnist
from keras.utils import np_utils
from keras.layers import Dense, Dropout
from keras.optimizers import SGD, Adam

Using TensorFlow backend.


In [10]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(x_train.shape[0], -1)/255.0
x_test = x_test.reshape(x_test.shape[0], -1)/255.0
y_train = np_utils.to_categorical(y_train, num_classes=10)
y_test = np_utils.to_categorical(y_test, num_classes=10)

# 建立模型 784 ---》200 ---》100 ---》10
###################################第一种：仅使用交叉熵#############################################
model = keras.Sequential()
model.add(Dense(units=200, input_dim=784, activation='tanh',name='layer1'))
model.add(Dense(units=100, input_dim=200, activation='tanh',name='layer2'))
model.add(Dense(units=10, input_dim=100,activation='softmax',name='output'))

sgd = SGD(lr=0.3)
model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(x_train, y_train, epochs=10)

loss, accuracy = model.evaluate(x_test, y_test)
print('loss', loss)
print('accuracy', accuracy)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Epoch 1/10
 1088/60000 [..............................] - ETA: 7:35 - loss: 1.0751 - acc: 0.6480 



Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
loss 0.0691565755004398
accuracy 0.9809


#### 交叉熵+dropout

In [0]:
import keras
from keras.datasets import mnist
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dropout, Dense
from keras.optimizers import SGD

In [7]:
################################## 第二种方法：交叉熵 + dropout #########################################
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(x_train.shape[0], -1)/255.0
x_test = x_test.reshape(x_test.shape[0], -1)/255.0
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)

# 784 ---> 200 ---> 100 ---> 10
model = keras.Sequential()
model.add(Dense(units=200, input_dim=784, activation='tanh', name='layer1'))
model.add(Dropout(0.15))
model.add(Dense(units=100, activation='tanh', name='layer2'))
model.add(Dropout(0.15))
model.add(Dense(units=10, activation='softmax', name='output0'))
sgd = SGD(lr=0.3)
model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(x_train, y_train, epochs=10)
loss, acc = model.evaluate(x_test, y_test)
print('test loss', loss, 'test accuracy', acc)





Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
test loss 0.07573686599871143 test accuracy 0.9783


#### 交叉熵 + 正则化

In [1]:
from keras.models import Sequential
from keras.datasets import mnist
from keras.utils import np_utils
from keras.layers import Dense
from keras.optimizers import SGD
from keras.regularizers import l2

Using TensorFlow backend.


In [6]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(x_train.shape[0], -1) / 255.0
x_test = x_test.reshape(x_test.shape[0], -1) / 255.0
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)

# 784 200 100 10
model = Sequential()
model.add(Dense(units=200, input_dim=784, activation='tanh', kernel_regularizer=l2(0.0003), name='layer_1'))
model.add(Dense(units=100, activation='tanh', kernel_regularizer=l2(0.0003), name='layer_2'))
model.add(Dense(units=10, activation='softmax', name='output_0'))
sgd = SGD(lr=0.3)
model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(x_train, y_train, epochs=10)

loss, acc = model.evaluate(x_test, y_test)

print('test loss is',loss, 'test accuracy is', acc)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
test loss is 0.18946782796382905 test accuracy is 0.9668


#### 交叉熵+Adam优化器

In [0]:
from keras.datasets import mnist
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

In [8]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(x_train.shape[0], -1) / 255.0
x_test = x_test.reshape(x_test.shape[0], -1) / 255.0
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)

model = Sequential()
model.add(Dense(units=200, input_dim=784, activation='tanh', name='layer_0'))
model.add(Dense(units=100, activation='tanh', name='layer_1'))
model.add(Dense(units=10, activation='softmax', name='output_0'))
adam = Adam(lr=0.001)
model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(x_train, y_train, epochs=10)

loss, acc = model.evaluate(x_test, y_test)
print('test loss is',loss, 'test accuracy is', acc)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
test loss is 0.08933973452044301 test accuracy is 0.9749


看上去似乎存在一点过拟合，因为训练集的acc比测试集的大的相对对一点。