In [1]:
# import all the libraries we need
from datetime import datetime

import matplotlib.pyplot as plt

import pandas as pd

import numpy as np

import keras
from keras.models import Sequential
from keras.layers import Dense


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [4]:
# For visualizing a row of the dataset, transforming it into 8x8 image form
def visualize_img(img_vec, title=""):
    plt.imshow(img_vec.values.reshape((8,8)), cmap="hot")
    plt.title(title)
    plt.show()

In [7]:
# To add a timestamp to a string of text
def add_timestamp(text):
    return text + str(datetime.now().year) \
           + str(datetime.now().month) \
           + str(datetime.now().day) \
           + str(datetime.now().second)

In [8]:
# load data from text files
x = pd.read_csv('NumNet/data/optdigits_train.txt', usecols=[x for x in range(64)], header=None)
y = pd.read_csv('NumNet/data/optdigits_train.txt', usecols=[64], header=None)
test = pd.read_csv('NumNet/data/optdigits_test.txt', usecols=[x for x in range(64)], header=None)
test_label = pd.read_csv('NumNet/data/optdigits_test.txt', usecols=[64], header=None)

In [9]:
# Convert the y data to [0 0 1 0 ... 0] form
y = keras.utils.to_categorical(y, num_classes=10)
test_label = keras.utils.to_categorical(test_label, num_classes=10)

In [10]:
# get a feel for what data in in the training set
print(x.describe())

           0            1            2            3            4   \
count  3823.0  3823.000000  3823.000000  3823.000000  3823.000000   
mean      0.0     0.301334     5.481821    11.805912    11.451478   
std       0.0     0.866986     4.631601     4.259811     4.537556   
min       0.0     0.000000     0.000000     0.000000     0.000000   
25%       0.0     0.000000     1.000000    10.000000     9.000000   
50%       0.0     0.000000     5.000000    13.000000    13.000000   
75%       0.0     0.000000     9.000000    15.000000    15.000000   
max       0.0     8.000000    16.000000    16.000000    16.000000   

                5            6            7            8            9   \
count  3823.000000  3823.000000  3823.000000  3823.000000  3823.000000   
mean      5.505362     1.387392     0.142297     0.002093     1.960502   
std       5.613060     3.371444     1.051598     0.088572     3.052353   
min       0.000000     0.000000     0.000000     0.000000     0.000000   
25%     

In [11]:
# to visualize dataset
random_indices = np.random.randint(0, x.shape[0], 3)  
for idx in random_indices:
    visualize_img(x.iloc[idx, :], title=str(idx))

In [12]:
# gather means and std's
x_means = x.mean(axis=0)
x_stds = x.std(axis=0)
test_means = test.mean(axis=0)
test_stds = test.std(axis=0)

In [13]:
# Make training and testing set have Zero mean
# and 1 standard deviation
x = x.subtract(x_means)
x = x.divide(x_stds).fillna(0)
test = test.subtract(test_means)
test = test.divide(test_stds).fillna(0)

In [15]:
# build the model
model = Sequential()
model.add(Dense(10, activation='relu', input_dim=64))
model.add(Dense(10, activation='softmax'))
# I went with the simplest model i could. I wanted to try to keep max
# accuracy above 94% and have a fast training time

In [16]:
model.compile(loss='categorical_crossentropy',      # A way to compare outputs of categorical problems where each is in a range [0,1]
              optimizer='adam',                     # a good optimizer
              metrics=['accuracy'])                 # use the model accuracy for training

In [18]:
# Train the model for a fixed amount of epochs
model.fit(x.as_matrix(), y,
          epochs=20)

  


Epoch 1/20


  32/3823 [..............................] - ETA: 24s - loss: 2.4470 - acc: 0.2188







Epoch 2/20
  32/3823 [..............................] - ETA: 0s - loss: 1.4833 - acc: 0.5312







Epoch 3/20
  32/3823 [..............................] - ETA: 0s - loss: 1.0500 - acc: 0.7188







Epoch 4/20
  32/3823 [..............................] - ETA: 0s - loss: 0.5660 - acc: 0.9062







Epoch 5/20
  32/3823 [..............................] - ETA: 0s - loss: 0.4318 - acc: 0.9688







Epoch 6/20
  32/3823 [..............................] - ETA: 0s - loss: 0.2875 - acc: 0.9688







Epoch 7/20
  32/3823 [..............................] - ETA: 0s - loss: 0.1602 - acc: 1.0000







Epoch 8/20
  32/3823 [..............................] - ETA: 0s - loss: 0.4452 - acc: 0.9062







Epoch 9/20
  32/3823 [..............................] - ETA: 0s - loss: 0.2598 - acc: 0.9375







Epoch 10/20
  32/3823 [..............................] - ETA: 0s - loss: 0.1674 - acc: 0.9375







Epoch 11/20
  32/3823 [..............................] - ETA: 0s - loss: 0.0909 - acc: 1.0000







Epoch 12/20
  32/3823 [..............................] - ETA: 0s - loss: 0.1676 - acc: 0.9375







Epoch 13/20
  32/3823 [..............................] - ETA: 0s - loss: 0.3202 - acc: 0.9375







Epoch 14/20


  32/3823 [..............................] - ETA: 0s - loss: 0.1195 - acc: 1.0000







Epoch 15/20
  32/3823 [..............................] - ETA: 0s - loss: 0.1485 - acc: 1.0000







Epoch 16/20
  32/3823 [..............................] - ETA: 0s - loss: 0.2324 - acc: 0.8750







Epoch 17/20
  32/3823 [..............................] - ETA: 0s - loss: 0.1348 - acc: 0.9688







Epoch 18/20
  32/3823 [..............................] - ETA: 0s - loss: 0.0446 - acc: 1.0000







Epoch 19/20
  32/3823 [..............................] - ETA: 0s - loss: 0.2717 - acc: 0.9375







Epoch 20/20
  32/3823 [..............................] - ETA: 0s - loss: 0.0841 - acc: 0.9688









<keras.callbacks.History at 0x278ee75ce80>

In [19]:
# Check the accuracy of the model on the test set
score = model.evaluate(test.as_matrix(), test_label)
print('Loss: {0} Accuracy {1}'.format(score[0], score[1]))

  32/1797 [..............................] - ETA: 1s



Loss: 0.18557541992237536 Accuracy 0.9415692821368948


  


In [20]:
# see what the network weights look like
for layer in model.layers:
    weights = layer.get_weights()  # list of numpy arrays
    print(weights)

[array([[-8.39899182e-02, -2.71123081e-01,  1.04885995e-02,
        -2.47850358e-01, -7.15716481e-02, -2.47423410e-01,
        -2.28406191e-01,  1.89225078e-02, -5.38599938e-02,
        -2.55346388e-01],
       [-2.84942716e-01, -2.64771014e-01, -3.67605910e-02,
        -2.85908598e-02,  1.22491822e-01, -2.04150919e-02,
         7.42179602e-02,  9.23032612e-02, -9.77505967e-02,
        -4.16309871e-02],
       [-1.47685096e-01, -1.40856877e-01, -1.88993007e-01,
        -5.77501357e-02,  1.89730331e-01,  9.03048068e-02,
         1.77553385e-01, -3.55079845e-02, -2.75454909e-01,
        -9.56388712e-02],
       [-1.40524015e-01, -1.42211124e-01,  2.73103714e-01,
        -9.95252952e-02,  2.40732029e-01,  1.97173521e-01,
         2.88619608e-01,  1.35091776e-02,  2.73862178e-03,
         3.54225039e-02],
       [-3.29132855e-01,  2.60330196e-02,  2.34922662e-01,
         2.18450720e-03,  3.65714356e-02,  2.71878481e-01,
         6.11238420e-01,  9.82859358e-03, -9.65029299e-02,
        -1

In [22]:
# save the model for later use
filename = 'NumNet/models/tictacModel'
model.save(add_timestamp(filename))