In [38]:
import tensorflow as tf
import input_data
import numpy as np
import cv2
import sys
import math
from scipy import ndimage
sys.path.append('../src/')

# get the center_of_mass mass
def getBestShift(img):
    cy,cx = ndimage.measurements.center_of_mass(img)
    rows,cols = img.shape
    shiftx = np.round(cols/2.0-cx).astype(int)
    shifty = np.round(rows/2.0-cy).astype(int)
    return shiftx,shifty

# shifts the image in the given directions
def shift(img,sx,sy):
    rows,cols = img.shape
    M = np.float32([[1,0,sx],[0,1,sy]])
    shifted = cv2.warpAffine(img,M,(cols,rows))
    return shifted  

In [40]:
def train_predict_and_test(input_images):
    # create a MNIST_data folder with the MNIST dataset if necessary   
    mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

    # a placeholder for the image data:
    # (None is an unspecified number of images)
    x = tf.placeholder("float", [None, 784])

    # we need our weights for our neural net
    W = tf.Variable(tf.zeros([784,10]))
    
    # and the biases
    b = tf.Variable(tf.zeros([10]))
    
    # activation function (softmax here)
    y = tf.nn.softmax(tf.matmul(x,W) + b)

    # labels
    y_ = tf.placeholder("float", [None,10])

    # we use the cross_entropy function
    # which we want to minimize to improve our model
    cross_entropy = -tf.reduce_sum(y_*tf.log(y))

    # use a learning rate of 0.01
    # to minimize the cross_entropy error
    train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
    
    # initialize all variables
    init = tf.global_variables_initializer()

    # create a session
    sess = tf.Session()
    sess.run(init)

    # use 1000 batches with a size of 100 each to train our net
    for i in range(1000):
        batch_xs, batch_ys = mnist.train.next_batch(100)
        # run the train_step function with the given image values (x)
        # and the real output (y_)
        sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
        
    # get the accuracy of the model:
    # (it is correct if the index with the highest y value
    # is the same as in the real digit vector).
    # The accuracy is the mean of the correct_prediction.
    correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))


    ############### Test ############
    # create an array for our 6 images
    images = np.zeros((6,784))
    # and their labels
    labels = np.zeros((6,10))
    
    # test my images after preprocessing and formatting
    i = 0
    for no in input_images:  # [2, 7, 6, 9, 8, 5]
        gray = cv2.imread("d_"+ str(no) +".jpg", 0)
        # rescale
        gray = cv2.resize(255-gray, (28, 28))
        # filter noise
        (thresh, gray) = cv2.threshold(gray, 128, 255,
                                    cv2.THRESH_BINARY | cv2.THRESH_OTSU)

        # fit the images into a 20x20 pixel box,
        # remove every row and column at the sides of the image
        # which are completely black
        while np.sum(gray[0]) == 0:
            gray = gray[1:]
        while np.sum(gray[:,0]) == 0:
            gray = np.delete(gray,0,1)
        while np.sum(gray[-1]) == 0:
            gray = gray[:-1]
        while np.sum(gray[:,-1]) == 0:
            gray = np.delete(gray,-1,1)
        rows,cols = gray.shape

        # Resize our outer box to fit it into a 20x20 box (with the resize factor)
        if rows > cols:
            factor = 20.0/rows
            rows = 20
            cols = int(round(cols*factor))
            gray = cv2.resize(gray, (cols,rows))
        else:
            factor = 20.0/cols
            cols = 20
            rows = int(round(rows*factor))
            gray = cv2.resize(gray, (cols, rows))

        # But at the end we need a 28x28 pixel image so we add the missing black
        # rows and columns using the np.lib.pad function which adds 0s to the sides
        colsPadding = (int(math.ceil((28-cols)/2.0)),int(math.floor((28-cols)/2.0)))
        rowsPadding = (int(math.ceil((28-rows)/2.0)),int(math.floor((28-rows)/2.0)))
        gray = np.lib.pad(gray,(rowsPadding,colsPadding),'constant')

        shiftx,shifty = getBestShift(gray)
        shifted = shift(gray,shiftx,shifty)
        gray = shifted

        # save new images
        cv2.imwrite("d_"+ str(no) +"stest.png", gray)

        # scale into a range [0-1]
        flatten = gray.flatten() / 255.0
        
        # store the flattened image into array of lables
        # Converts a label [0-9] into a 10-dimensional unit 
        # vector with 1 at the ith position and 0s elsewhere
        # (e.g., [0,0,1,0,0,0,0,0,0,0] as 2)
        images[i] = flatten
        label = np.zeros((10))
        label[no] = 1
        labels[i] = label
        i += 1
        
    prediction = tf.argmax(y,1)

    # find the prediction and accuracy for generated arrays (images and labels)
    # print(sess.run(prediction, feed_dict={x: images, y_: labels}))
    print(sess.run(accuracy, feed_dict={x: images, y_: labels}))
    return(sess.run(prediction, feed_dict={x: images, y_: labels}))
   
    
if __name__ == '__main__':
    print(train_predict_and_test([2, 7, 6, 9, 8, 5]))

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
0.9207
0.8333333
[2 7 6 9 5 5]


![image.png](attachment:image.png)

![image.png](attachment:image.png)
