In [1]:
from sklearn import datasets
digits = datasets.load_digits()
print(digits.DESCR)

.. _digits_dataset:

Optical recognition of handwritten digits dataset
--------------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 5620
    :Number of Attributes: 64
    :Attribute Information: 8x8 image of integer pixels in the range 0..16.
    :Missing Attribute Values: None
    :Creator: E. Alpaydin (alpaydin '@' boun.edu.tr)
    :Date: July; 1998

This is a copy of the test set of the UCI ML hand-written digits datasets
http://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits

The data set contains images of hand-written digits: 10 classes where
each class refers to a digit.

Preprocessing programs made available by NIST were used to extract
normalized bitmaps of handwritten digits from a preprinted form. From a
total of 43 people, 30 contributed to the training set and different 13
to the test set. 32x32 bitmaps are divided into nonoverlapping blocks of
4x4 and the number of on pixels are counted in each bloc

In [2]:
import numpy as np
print('shape is', np.shape(digits.data))
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size=0.25, random_state=0)

shape is (1797, 64)


In [3]:
from sklearn.linear_model import LogisticRegression
logisticRegr = LogisticRegression()
logisticRegr.fit(x_train, y_train)
logisticRegr.score(x_train, y_train)




0.9962880475129918

In [4]:
logisticRegr.score(x_test, y_test)

0.9533333333333334

In [5]:
# prepare data for tensorflow multi labels
from sklearn import preprocessing
enc = preprocessing.OneHotEncoder()

# 2. FIT
enc.fit(np.expand_dims(y_train, axis=1))

# 3. Transform
onehotlabels = enc.transform(np.expand_dims(y_train, axis=1)).toarray()
onehotlabels_test = enc.transform(np.expand_dims(y_test, axis=1)).toarray()

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [6]:
def next_batch(num, data, labels):
    '''
    Return a total of `num` random samples and labels. 
    '''
    idx = np.arange(0 , len(data))
    np.random.shuffle(idx)
    idx = idx[:num]
    data_shuffle = [data[ i] for i in idx]
    labels_shuffle = [labels[ i] for i in idx]

    return np.asarray(data_shuffle), np.asarray(labels_shuffle)

In [9]:
import tensorflow as tf
import numpy as np
print("Tensorflow version " + tf.__version__)
tf.set_random_seed(0)

# neural network with 5 layers
#
# · · · · · · · · · ·          (input data, flattened pixels)       X [batch, 784]   # 784 = 28*28
# \x/x\x/x\x/x\x/x\x/       -- fully connected layer (sigmoid)      W1 [784, 200]      B1[200]
#  · · · · · · · · ·                                                Y1 [batch, 200]
#   \x/x\x/x\x/x\x/         -- fully connected layer (sigmoid)      W2 [200, 100]      B2[100]
#    · · · · · · ·                                                  Y2 [batch, 100]
#     \x/x\x/x\x/           -- fully connected layer (sigmoid)      W3 [100, 60]       B3[60]
#      · · · · ·                                                    Y3 [batch, 60]
#       \x/x\x/             -- fully connected layer (sigmoid)      W4 [60, 30]        B4[30]
#        · · ·                                                      Y4 [batch, 30]
#         \x/               -- fully connected layer (softmax)      W5 [30, 10]        B5[10]
#          ·                                                        Y5 [batch, 10]


# input X: 64X1 (after reshaped from 8X8) grayscale images, the first dimension (None) will index the images in the mini-batch
X = tf.placeholder(tf.float32, [None, 64])
# correct answers will go here
Y_ = tf.placeholder(tf.float32, [None, 10])

# five layers and their number of neurons (tha last layer has 10 softmax neurons)
L = 200
M = 100
N = 60
O = 30
# Weights initialised with small random values between -0.2 and +0.2
# When using RELUs, make sure biases are initialised with small *positive* values for example 0.1 = tf.ones([K])/10
W1 = tf.Variable(tf.truncated_normal([64, L], stddev=0.1))  # 64 = 8 * 8
B1 = tf.Variable(tf.zeros([L]))
W2 = tf.Variable(tf.truncated_normal([L, M], stddev=0.1))
B2 = tf.Variable(tf.zeros([M]))
W3 = tf.Variable(tf.truncated_normal([M, N], stddev=0.1))
B3 = tf.Variable(tf.zeros([N]))
W4 = tf.Variable(tf.truncated_normal([N, O], stddev=0.1))
B4 = tf.Variable(tf.zeros([O]))
W5 = tf.Variable(tf.truncated_normal([O, 10], stddev=0.1))
B5 = tf.Variable(tf.zeros([10]))

# The model
XX = X # change this if you need to reshape your data
Y1 = tf.nn.sigmoid(tf.matmul(XX, W1) + B1)
Y2 = tf.nn.sigmoid(tf.matmul(Y1, W2) + B2)
Y3 = tf.nn.sigmoid(tf.matmul(Y2, W3) + B3)
Y4 = tf.nn.sigmoid(tf.matmul(Y3, W4) + B4)
Ylogits = tf.matmul(Y4, W5) + B5
Y = tf.nn.softmax(Ylogits)


# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=Ylogits, labels=Y))
learning_rate = 0.003
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

# Evaluate model (with test logits, for dropout to be disabled)
correct_pred = tf.equal(tf.argmax(Ylogits, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# cross-entropy loss function (= -sum(Y_i * log(Yi)) ), normalised for batches of 100  images
# TensorFlow provides the softmax_cross_entropy_with_logits function to avoid numerical stability
# problems with log(0) which is NaN
# cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Y_)
# cross_entropy = tf.reduce_mean(cross_entropy)*100

# # accuracy of the trained model, between 0 (worst) and 1 (best)
# correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
# accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# # training step, learning rate = 0.003
# learning_rate = 0.003
# train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)

# init
init = tf.global_variables_initializer()
sess = tf.Session()
# Start training
batch_size = 100
num_steps = 1000
display_step = 50
sess=tf.Session()
#with tf.Session() as sess:

# Run the initializer
sess.run(init)

for step in range(1, num_steps+1):
    batch_x, batch_y = next_batch(batch_size, x_train, onehotlabels)
    #batch_y=np.swapaxes(batch_y, 1, 0)
    # Run optimization op (backprop)
    sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
    if step % display_step == 0 or step == 1:
        # Calculate batch loss and accuracy
        loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,
                                                             Y: batch_y})
        print("Step " + str(step) + ", Minibatch Loss= " + \
              "{:.4f}".format(loss) + ", Training Accuracy= " + \
              "{:.3f}".format(acc))

print("Optimization Finished!")

print("Testing Accuracy:", \
sess.run(accuracy, feed_dict={X: x_test,
                              Y: onehotlabels_test}))

Tensorflow version 1.10.0
Step 1, Minibatch Loss= 2.2929, Training Accuracy= 0.150
Step 50, Minibatch Loss= 1.7619, Training Accuracy= 0.400
Step 100, Minibatch Loss= 1.0570, Training Accuracy= 0.820
Step 150, Minibatch Loss= 0.6434, Training Accuracy= 0.830
Step 200, Minibatch Loss= 0.4553, Training Accuracy= 0.830
Step 250, Minibatch Loss= 0.3694, Training Accuracy= 0.890
Step 300, Minibatch Loss= 0.1734, Training Accuracy= 1.000
Step 350, Minibatch Loss= 0.0894, Training Accuracy= 1.000
Step 400, Minibatch Loss= 0.0570, Training Accuracy= 1.000
Step 450, Minibatch Loss= 0.0472, Training Accuracy= 1.000
Step 500, Minibatch Loss= 0.0371, Training Accuracy= 1.000
Step 550, Minibatch Loss= 0.0266, Training Accuracy= 1.000
Step 600, Minibatch Loss= 0.0224, Training Accuracy= 1.000
Step 650, Minibatch Loss= 0.0187, Training Accuracy= 1.000
Step 700, Minibatch Loss= 0.0163, Training Accuracy= 1.000
Step 750, Minibatch Loss= 0.0145, Training Accuracy= 1.000
Step 800, Minibatch Loss= 0.0126,

In [10]:
# to check the probability predictions
predictions=Y.eval(feed_dict={X:x_train},session=sess)