In [1]:
%matplotlib inline

from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten

from keras.utils import np_utils
from scipy import io
import numpy as np
from sklearn.cross_validation import train_test_split
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.callbacks import ModelCheckpoint

import seaborn as sns
sns.set_style("white")

Using TensorFlow backend.


In [2]:
data = io.loadmat("/media/zuziel/Windows/Users/Zuza/Documents/Programowanie/keras/notMNIST_small.mat")

In [3]:
X = data['images']
y = data['labels']
resolution = 28
classes = 10

X = np.transpose(X, (2, 0, 1))
X = X.reshape(X.shape[0], 1, 28, 28)

y = y.astype('int32')
X = X.astype('float32') / 255.

# 3 -> [0., 0., 0., 2., 0., 0., 0., 0., 0., 0.]
Y = np_utils.to_categorical(y, 10)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=42)

Sieć podobna do poprzedniej jednak ze zmienioną liczbą warstw - zamiast jednej warstwy typu dense są dwie o tym samym rozmiarze (gdzieś twierdzono, że kolejne warstwy powinny mieć ten sam rozmiar, bo wtedy działają lepiej)

In [4]:
model = Sequential()
model.add(Convolution2D(32, 5, 5, border_mode='valid', input_shape=(1, 28, 28), activation='relu', dim_ordering="th"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(classes, activation='softmax'))
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [6]:
from keras_ascii_sequential import sequential_model_to_ascii_printout
sequential_model_to_ascii_printout(model)

      OPERATION           DATA DIMENSIONS   WEIGHTS(N)   WEIGHTS(%)

          Input   #####   (1, 28, 28)
  Convolution2D    \|/  -------------------       832     0.1%
           relu   #####   (32, 24, 24)
   MaxPooling2D   YYYYY -------------------         0     0.0%
                  #####   (16, 12, 24)
        Dropout    | || -------------------         0     0.0%
                  #####   (16, 12, 24)
        Flatten   ||||| -------------------         0     0.0%
                  #####   (4608,)
          Dense   XXXXX -------------------    589952    96.9%
           relu   #####   (128,)
          Dense   XXXXX -------------------     16512     2.7%
           relu   #####   (128,)
          Dense   XXXXX -------------------      1290     0.2%
        softmax   #####   (10,)


In [7]:
# checkpoint
filepath="net1_weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

In [None]:
history = model.fit(X_train, Y_train,
                    nb_epoch=10,
                    batch_size=32,
                    validation_data=(X_test, Y_test),
                    callbacks=callbacks_list)

Train on 14979 samples, validate on 3745 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

In [9]:
print(history.history)

{'loss': [0.49785161953861451, 0.26321060708514743, 0.21091312236257923, 0.15570066654166631, 0.12899473071410697, 0.10778850068612808, 0.084416994348665647, 0.065220142632949923, 0.058808257702768013, 0.050119443321280509], 'val_loss': [0.30781285797801611, 0.28597269247625473, 0.24861959889829557, 0.2643890899077595, 0.25067322558172556, 0.25237081033763326, 0.26862382432209636, 0.2964454427669857, 0.3042222069902159, 0.3110885152292347], 'val_acc': [0.90253671562082782, 0.91855807743658213, 0.92176234979973293, 0.92870493991989322, 0.93164218958611478, 0.93057409879839781, 0.93351134846461947, 0.93538050734312417, 0.93217623497997326, 0.93324432576769023], 'acc': [0.85820148207490488, 0.92135656586284831, 0.9355764737298885, 0.9513318646104546, 0.95854195874621839, 0.96428332999532684, 0.97289538687495825, 0.97830295747777585, 0.98070632218439147, 0.98391080846518464]}


Historia wskazuje, że dodanie kolejnej warstwy nie zmieniło znacząco wyników.

In [10]:
score = model.evaluate(X,Y)
print(score)



In [11]:
print(model.evaluate(X_test,Y_test))



In [None]:
history = model.fit(X_train, Y_train,
                    nb_epoch=10,
                    batch_size=32,
                    validation_data=(X_test, Y_test),
                    callbacks=callbacks_list)

Train on 14979 samples, validate on 3745 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

In [13]:
print(history.history)

{'loss': [0.043815781152765985, 0.046554608345159509, 0.033267787058192458, 0.031807630089128715, 0.033556850971959228, 0.029903140274472072, 0.026922052762379252, 0.028549571699321676, 0.026234998018623768, 0.021976025553635009], 'val_loss': [0.36857800421855835, 0.36338026180903171, 0.34219374024671051, 0.35794903107686277, 0.41572992423902427, 0.37672527571555692, 0.39441454487508326, 0.38327367053732764, 0.44530530089692921, 0.40167688434130361], 'val_acc': [0.92710280373831777, 0.92630173564753004, 0.93377837116154871, 0.92843791722296398, 0.92736982643524701, 0.93084112149532705, 0.93351134846461947, 0.9324432576769025, 0.93564753004005341, 0.93084112149532705], 'acc': [0.98491221042793242, 0.98517925095133185, 0.98951865945657258, 0.98965217971827224, 0.98945189932572264, 0.99105414246611923, 0.99185526403631752, 0.99132118298951866, 0.99232258495226655, 0.9938580679618132]}


W porównaniu okazuje się nawet, że bardziej rozbudowana sieć uzyskała nieco gorsze wyniki po identycznej liczbie iteracji.

In [15]:
best_model =  Sequential()
best_model.add(Convolution2D(32, 5, 5, border_mode='valid', input_shape=(1, 28, 28), activation='relu', dim_ordering="th"))
best_model.add(MaxPooling2D(pool_size=(2, 2)))
best_model.add(Dropout(0.2))
best_model.add(Flatten())
best_model.add(Dense(128, activation='relu'))
best_model.add(Dense(128, activation='relu'))
best_model.add(Dense(classes, activation='softmax'))
# Compile model
best_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
best_model.load_weights("net1_weights-improvement-07-0.94.hdf5")

In [16]:
print(best_model.evaluate(X,Y))
print(best_model.evaluate(X_train,Y_train))
print(best_model.evaluate(X_test,Y_test))



# Poszukiwanie najczęściej mylonych liter

In [17]:
predictions = best_model.predict(X)

In [18]:
errors = [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
for i in range(0,X.shape[0]):
    errors[np.argmax(Y[i])][np.argmax(predictions[i])] += 1
print(errors)

[[1839, 3, 0, 10, 1, 0, 1, 15, 3, 0], [5, 1816, 1, 27, 4, 1, 9, 7, 3, 0], [0, 1, 1821, 4, 5, 5, 33, 0, 4, 0], [1, 4, 1, 1854, 1, 1, 2, 3, 3, 3], [1, 10, 7, 0, 1825, 5, 12, 5, 8, 0], [1, 1, 1, 4, 3, 1844, 3, 5, 4, 6], [3, 3, 7, 8, 2, 4, 1838, 2, 3, 2], [4, 2, 1, 3, 0, 0, 4, 1852, 5, 1], [2, 3, 2, 8, 0, 2, 3, 6, 1818, 28], [4, 1, 1, 14, 0, 1, 0, 2, 19, 1830]]


Wynika z tego, że najczęściej mylone są:
 1. C z G (33 razy)
 2. I z J (28 razy)
 3. B z D (27 razy)
 
 4. J z I (19 razy)
 5. A z H (15 razy)
 6. J z D (14 razy)
 7. E z G (12 razy)
 8. A z D (10 razy)
  . E z B (10 razy)

Powyższe wartości wydają się być mniej zrównoważone niż w przypadku poprzedniej sieci.