# 利用keras做手寫數字(MNIST)辨識
*首先預設第一版的參數,之後分別做了以下的比較
### (1)增加神經元數量(model 2)
### (2)batch size減少(model 3)
### (3)層數增加(model 4)
### (4)替換Activation function(model 5)
### (5)做正規化(model 6)
### 最後將以上測出的結果做統整,做優化測試(model_optimization)

In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import mnist

Using TensorFlow backend.


In [2]:
(x_train,y_train),(x_test,y_test) = mnist.load_data()
x_train = x_train.reshape(60000,784)
x_test = x_test.reshape(10000,784)

In [3]:
from keras.utils import np_utils
y_train = np_utils.to_categorical(y_train,10)
y_test = np_utils.to_categorical(y_test,10)

In [4]:
from keras.models import Sequential
from keras.layers import Dense,Activation
from keras.optimizers import SGD

# 測試第一板的模型

In [6]:
#model使用最基本形式
model = Sequential()
model.add(Dense(10,input_dim = 784))
model.add(Activation('sigmoid'))
model.add(Dense(50))
model.add(Activation('sigmoid'))
model.add(Dense(10))
model.add(Activation('softmax'))
model.compile(loss='mse',optimizer=SGD(lr = 0.05),metrics=['accuracy'])
model.summary()
model.fit(x_train,y_train,batch_size=100,epochs=20,verbose = 1,validation_data = (x_test,y_test))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 10)                7850      
_________________________________________________________________
activation_4 (Activation)    (None, 10)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 50)                550       
_________________________________________________________________
activation_5 (Activation)    (None, 50)                0         
_________________________________________________________________
dense_6 (Dense)              (None, 10)                510       
_________________________________________________________________
activation_6 (Activation)    (None, 10)                0         
Total params: 8,910
Trainable params: 8,910
Non-trainable params: 0
_________________________________________________________________
Train on

<keras.callbacks.History at 0x7f1bfb3cbdd8>

# (model2)將神經元*10倍,測試成效: testing accuracy可到達0.8857
* 以下model接與此做比較


In [7]:
model2 = Sequential()
model2.add(Dense(100,input_dim = 784))
model2.add(Activation('sigmoid'))
model2.add(Dense(200))
model2.add(Activation('sigmoid'))
model2.add(Dense(10))
model2.add(Activation('softmax'))
model2.compile(loss='mse',optimizer=SGD(lr = 0.05),metrics=['accuracy'])
model2.summary()
model2.fit(x_train,y_train,batch_size=100,epochs=20,verbose = 0)

score = model2.evaluate(x_test,y_test)
print("---------Training accuracy = ",score)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_7 (Dense)              (None, 100)               78500     
_________________________________________________________________
activation_7 (Activation)    (None, 100)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 200)               20200     
_________________________________________________________________
activation_8 (Activation)    (None, 200)               0         
_________________________________________________________________
dense_9 (Dense)              (None, 10)                2010      
_________________________________________________________________
activation_9 (Activation)    (None, 10)                0         
Total params: 100,710
Trainable params: 100,710
Non-trainable params: 0
_________________________________________________________________


# (model3)將batch調整成原本的十分之一,測試成效: testing accuracy可到達0.9403




In [8]:
model3 = Sequential()
model3.add(Dense(100,input_dim = 784))
model3.add(Activation('sigmoid'))
model3.add(Dense(200))
model3.add(Activation('sigmoid'))
model3.add(Dense(10))
model3.add(Activation('softmax'))
model3.compile(loss='mse',optimizer=SGD(lr = 0.05),metrics=['accuracy'])
model3.summary()
model3.fit(x_train,y_train,batch_size=10,epochs=20,verbose = 0)

score = model3.evaluate(x_test,y_test)
print("---------Training accuracy = ",score)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_10 (Dense)             (None, 100)               78500     
_________________________________________________________________
activation_10 (Activation)   (None, 100)               0         
_________________________________________________________________
dense_11 (Dense)             (None, 200)               20200     
_________________________________________________________________
activation_11 (Activation)   (None, 200)               0         
_________________________________________________________________
dense_12 (Dense)             (None, 10)                2010      
_________________________________________________________________
activation_12 (Activation)   (None, 10)                0         
Total params: 100,710
Trainable params: 100,710
Non-trainable params: 0
_________________________________________________________________


# (model4)相近數量的神經元,層數變多:效果不一定變好


In [9]:
model4 = Sequential()
model4.add(Dense(100,input_dim = 784))
model4.add(Activation('sigmoid'))
model4.add(Dense(150))
model4.add(Activation('sigmoid'))
model4.add(Dense(60))
model4.add(Activation('sigmoid'))
model4.add(Dense(10))
model4.add(Activation('softmax'))
model4.compile(loss='mse',optimizer=SGD(lr = 0.05),metrics=['accuracy'])
model4.summary()
model4.fit(x_train,y_train,batch_size=100,epochs=20,verbose = 0)

score = model4.evaluate(x_test,y_test)
print("---------Training accuracy = ",score)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_13 (Dense)             (None, 100)               78500     
_________________________________________________________________
activation_13 (Activation)   (None, 100)               0         
_________________________________________________________________
dense_14 (Dense)             (None, 150)               15150     
_________________________________________________________________
activation_14 (Activation)   (None, 150)               0         
_________________________________________________________________
dense_15 (Dense)             (None, 60)                9060      
_________________________________________________________________
activation_15 (Activation)   (None, 60)                0         
_________________________________________________________________
dense_16 (Dense)             (None, 10)                610       
__________

# (model5)前端的Activation function改成RELU


In [10]:
model5 = Sequential()
model5.add(Dense(100,input_dim = 784))
model5.add(Activation('relu'))
model5.add(Dense(200))
model5.add(Activation('relu'))
model5.add(Dense(10))
model5.add(Activation('softmax'))
model5.compile(loss='mse',optimizer=SGD(lr = 0.05),metrics=['accuracy'])
model5.summary()
model5.fit(x_train,y_train,batch_size=100,epochs=20,verbose = 0)

score = model5.evaluate(x_test,y_test)
print("---------Training accuracy = ",score)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_17 (Dense)             (None, 100)               78500     
_________________________________________________________________
activation_17 (Activation)   (None, 100)               0         
_________________________________________________________________
dense_18 (Dense)             (None, 200)               20200     
_________________________________________________________________
activation_18 (Activation)   (None, 200)               0         
_________________________________________________________________
dense_19 (Dense)             (None, 10)                2010      
_________________________________________________________________
activation_19 (Activation)   (None, 10)                0         
Total params: 100,710
Trainable params: 100,710
Non-trainable params: 0
_________________________________________________________________


# 將資料正規化


In [11]:
#進化版模型中對x進行normalize
x_train_norm = x_train/x_train.max()
x_test_norm = x_test/x_test.max()

# (model6)將資料正規劃

In [12]:
model6 = Sequential()
model6.add(Dense(100,input_dim = 784))
model6.add(Activation('sigmoid'))
model6.add(Dense(200))
model6.add(Activation('sigmoid'))
model6.add(Dense(10))
model6.add(Activation('softmax'))
model6.compile(loss='mse',optimizer=SGD(lr = 0.05),metrics=['accuracy'])
model6.summary()
model6.fit(x_train_norm,y_train,batch_size=100,epochs=20,verbose = 0)

score = model6.evaluate(x_test_norm,y_test)
print("---------Training accuracy = ",score)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_20 (Dense)             (None, 100)               78500     
_________________________________________________________________
activation_20 (Activation)   (None, 100)               0         
_________________________________________________________________
dense_21 (Dense)             (None, 200)               20200     
_________________________________________________________________
activation_21 (Activation)   (None, 200)               0         
_________________________________________________________________
dense_22 (Dense)             (None, 10)                2010      
_________________________________________________________________
activation_22 (Activation)   (None, 10)                0         
Total params: 100,710
Trainable params: 100,710
Non-trainable params: 0
_________________________________________________________________


### 測試前,已知learning rate調小,最後資料在fit的時候能夠與training data有較微小的修正,不過調過小的話會造成計算時間冗長
* 測試後發現可能讓準確率提高的因素
### (1)增加神經元數量(o)
### (2)batch size簡少(o)
### (3)層數增加(不一定)
### (4)替化Activation function(不一定)
### (5)做正規化(不一定)

In [14]:
model_optimize = Sequential()
model_optimize.add(Dense(100,input_dim = 784))
model_optimize.add(Activation('sigmoid'))
model_optimize.add(Dense(200))
model_optimize.add(Activation('sigmoid'))
model_optimize.add(Dense(10))
model_optimize.add(Activation('softmax'))
model_optimize.compile(loss='mse',optimizer=SGD(lr = 0.02),metrics=['accuracy'])
model_optimize.summary()
model_optimize.fit(x_train,y_train,batch_size=10,epochs=60,verbose = 1,validation_data = (x_test,y_test))
score = model_optimize）.evaluate(x_test,y_test)
print("---------Training accuracy = ",score)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_23 (Dense)             (None, 100)               78500     
_________________________________________________________________
activation_23 (Activation)   (None, 100)               0         
_________________________________________________________________
dense_24 (Dense)             (None, 200)               20200     
_________________________________________________________________
activation_24 (Activation)   (None, 200)               0         
_________________________________________________________________
dense_25 (Dense)             (None, 10)                2010      
_________________________________________________________________
activation_25 (Activation)   (None, 10)                0         
Total params: 100,710
Trainable params: 100,710
Non-trainable params: 0
_________________________________________________________________
Trai

Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


### 最後準確率可以高達95.5％

In [21]:
model_MNIST = model_optimize.to_json()
open('model_MNIST95.json', 'w').write(model_MNIST)
model_optimize.save_weights('model_MNIST95.h5')