In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import time
import math
import skimage
from skimage import transform
from skimage import util
from skimage import filters

In [2]:
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
sessConfig=tf.ConfigProto(gpu_options=gpu_options)

In [3]:
# Define paramaters for the model
learning_rate = 0.01
batch_size = 128
n_epochs = 50
batch_augment_proportion = 10 #augmentated data will be 10 times
batch_augment_portion = 2 # 100% of data will be augmented.
imgSide = 28

# Step 1: Read in data
# using TF Learn's built in function to load MNIST data to the folder data/mnist
mnist = input_data.read_data_sets('/data/mnist', one_hot=True) 


Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /data/mnist\train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /data/mnist\train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting /data/mnist\t10k-images-idx3-ubyte.gz
Extracting /data/mnist\t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [4]:
mnist.train.num_examples


55000

In [5]:
X = tf.placeholder( name="X", shape=[None, 784], dtype=tf.float32 )
Y = tf.placeholder( name="Y", shape=[None, 10], dtype=tf.int32 )

In [6]:
w = tf.Variable( tf.random_normal( shape=[784, 10], stddev=0.01 ), name = 'weights' )
b = tf.Variable( tf.zeros( shape=[1,10]), name = 'bias' )

In [7]:
logits = tf.matmul( X, w ) + b 
logits = tf.identity( logits, name = "logits" )

lossFunction = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits = logits, labels = Y ) )
optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate ).minimize( lossFunction ) 

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [8]:
#image batch augmentation

#'blur': random_blur,

def random_rotation( imgMat ):
    deg = np.random.uniform( -10, 10 )
    return skimage.transform.rotate( imgMat, deg )

def random_noise( imgMat ):
    return skimage.util.random_noise( imgMat )

def random_blur( imgMat ):
    return skimage.filters.gaussian( imgMat, sigma = 2 )

def random_otsu( imgMat ):
    thres = skimage.filters.threshold_otsu( imgMat )
    return (imgMat <= thres).astype( 'float32' )


def augmentImage( imgVec ):
    imgMat = np.reshape( imgVec, ( imgSide, imgSide ) )
    
    operationKey = np.random.choice( list( augmentOperations ) )
    
    return augmentOperations[operationKey]( imgMat ).flatten()
    
    

def augmentOneImageFromBatch( batch ):
    index = np.random.randint( 0, batch_size )
    return ( augmentImage( batch[0][index] ), batch[1][index] )
    

def augmentBatch( batch ):
    
    # augment portions
    numImagesToAugment = math.floor( batch_size * batch_augment_portion )
    #newImages = [ augmentOneImageFromBatch( batch ) for i in range( numImagesToAugment ) ]
    xList = []
    yList = []
    for i in range( numImagesToAugment ):
        newTuple = augmentOneImageFromBatch( batch )
        xList.append( newTuple[0] )
        yList.append( newTuple[1] )
    
    xList = np.array( xList )
    yList = np.array( yList )
    #print( "x {0}, y {1}".format( xList.shape, yList.shape ) )
    #print( "batch {0}, batch {1}".format( len( batch[0] ), len( batch[1] ) ) )
    newX = np.append( batch[0], xList, axis = 0 )
    newY = np.append( batch[1], yList, axis = 0 ) 
    #print( "new x {0}, new y {1}".format( newX.shape, newY.shape ) )
    return ( newX, newY )

augmentOperations = {
    
    'rotate': random_rotation,
    'noise': random_noise,
    'otsu': random_otsu
}


In [9]:
batch = mnist.train.next_batch( batch_size )
print( batch[0].shape )
print( batch[1].shape )
batch = augmentBatch( batch )
print( batch[0].shape )

(128, 784)
(128, 10)
(384, 784)


In [10]:
batches_per_epoch = math.ceil( mnist.train.num_examples / batch_size )
print( batches_per_epoch )

n_epochs = 10
with tf.Session( config=sessConfig ) as sess:
    start_time = time.time()
    writer = tf.summary.FileWriter( "./graphs/logistic", sess.graph )
    sess.run( tf.global_variables_initializer() )
    
    for epoch in range( n_epochs ):
        print( "#Starting Epoch {0}".format( epoch + 1 ) )
        try:
            total_loss = 0.0
            i = 0
            for i in range( batches_per_epoch ) :
                if ( i %  math.ceil( ( batches_per_epoch / 10 ) ) == 0 ):
                    print( "----Starting batch {0}".format( i + 1 ) )
                #get batch
                batch = mnist.train.next_batch( batch_size )
                batch = augmentBatch( batch )
                #run batch
                _,batchLoss = sess.run( [optimizer, lossFunction], feed_dict = { X: batch[0], Y: batch[1] } )
                total_loss += batchLoss
        except tf.errors.OutOfRangeError:
            print( "Epoch {0}: {1}".format( epoch, math.sqrt( total_loss / mnist.train.num_examples ) ) )
            pass
    
    writer.flush()
    writer.close()
    print('Total time: {0} seconds'.format(time.time() - start_time))   
    
    w_out, b_out = sess.run( [w,b] )
    
    print('Optimization Finished!') # should be around 0.35 after 25 epochs
   
    

430
#Starting Epoch 1
----Starting batch 1
----Starting batch 44
----Starting batch 87
----Starting batch 130
----Starting batch 173
----Starting batch 216
----Starting batch 259
----Starting batch 302
----Starting batch 345
----Starting batch 388
#Starting Epoch 2
----Starting batch 1
----Starting batch 44
----Starting batch 87
----Starting batch 130
----Starting batch 173
----Starting batch 216
----Starting batch 259
----Starting batch 302
----Starting batch 345
----Starting batch 388
#Starting Epoch 3
----Starting batch 1
----Starting batch 44
----Starting batch 87
----Starting batch 130
----Starting batch 173
----Starting batch 216
----Starting batch 259
----Starting batch 302
----Starting batch 345
----Starting batch 388
#Starting Epoch 4
----Starting batch 1
----Starting batch 44
----Starting batch 87
----Starting batch 130
----Starting batch 173
----Starting batch 216
----Starting batch 259
----Starting batch 302
----Starting batch 345
----Starting batch 388
#Starting Epoch 5
--

In [11]:
mnist.test.epochs_completed

0

In [12]:
# test the model
preds = tf.nn.softmax(logits)
correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32)) # need numpy.count_nonzero(boolarr) :(

n_batches = int(mnist.test.num_examples/batch_size)
total_correct_preds = 0.

with tf.Session( config=sessConfig ) as sess:
    sess.run( tf.global_variables_initializer() )
    sess.run( [w.assign( w_out ), b.assign( b_out ) ] )

    for i in range(n_batches):
        X_batch, Y_batch = mnist.test.next_batch(batch_size)
        accuracy_batch = sess.run( accuracy, feed_dict={X: X_batch, Y:Y_batch}) 
        #print( accuracy_batch )
        total_correct_preds += accuracy_batch

    print('Accuracy {0}'.format(total_correct_preds/mnist.test.num_examples))

Accuracy 0.8782
