In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import csv
import os
import shutil
import time

In [2]:
filename = "iris.data.csv"

## Streaming in and splitting example/label pairs in to training, validation and test sets

In [4]:
def datasplitter(filename):
    train_rows = 0
    valid_rows = 0
    test_rows = 0
    
    if os.path.exists('iristrain'):
        shutil.rmtree('iristrain')
    os.makedirs('iristrain')
    
    
    if os.path.exists('irisvalid'):
        shutil.rmtree('irisvalid')
    os.makedirs('irisvalid')
    
    if os.path.exists('iristest'):
        shutil.rmtree('iristest')
    os.makedirs('iristest')

    with open(filename, 'r') as f:
        reader = csv.reader(f)
        for row in reader:
            if len(row)>0:
                filtered = (line.replace('\n', '') for line in row)
                filtered = (line.replace("\r", "") for line in filtered)
                filtered = (line.replace("'", "") for line in filtered)
                row = filtered

                group = np.random.multinomial(1,[.6,.2,.2])
                if np.argmax(group)==0:
                    with open('iristrain/iristrain{0}.csv'.format(train_rows), 'w+', newline = None) as trainfile:
                        trainwriter = csv.writer(trainfile, delimiter=',')
                        trainwriter.writerow(row)
                    train_rows += 1


                if np.argmax(group)==1:
                    with open('irisvalid/irisvalid{0}.csv'.format(valid_rows), 'w+', newline = None) as validfile:
                        validwriter = csv.writer(validfile, delimiter=',')
                        validwriter.writerow(row)
                    valid_rows += 1

                if np.argmax(group)==2:
                    with open('iristest/iristest{0}.csv'.format(test_rows), 'w+', newline = None) as testfile:
                        testwriter = csv.writer(testfile, delimiter=',')
                        testwriter.writerow(row)
                    test_rows += 1
                    
    return train_rows, valid_rows, test_rows

## Function to format inputs

#### Continuous valued predicators and one-hot encoded labels

In [5]:
def read_file_format(filename_queue, possible_labels):
    reader = tf.TextLineReader()
    _, value = reader.read(filename_queue)

    record_defaults = [tf.constant([],dtype=tf.float32), tf.constant([],dtype=tf.float32),
                       tf.constant([],dtype=tf.float32), tf.constant([],dtype=tf.float32),
                       tf.constant([],dtype=tf.float32), tf.constant([],dtype=tf.string)]
    col1, col2, col3, col4, col5, col6 = tf.decode_csv(value, record_defaults=record_defaults)
    
    example = tf.stack([col1, col2, col3, col4, col5])
    label = tf.one_hot(tf.where(tf.equal(possible_labels, col6))[0], depth = possible_labels.shape[0], on_value = 1, off_value = 0)
    label = label[0]
    return example, label

## Function to format, queue and read inputs in batches

In [6]:
def input_pipeline(filenames, possible_labels, batch_size = 3, num_epochs = None, evaluation = False):   
    filename_queue = tf.train.string_input_producer(
        train_set, num_epochs=num_epochs, shuffle=True)

    example, label = read_file_format(filename_queue, possible_labels)

#     # min_after_dequeue defines how big a buffer we will randomly sample
#     #   from -- bigger means better shuffling but slower start up and more
#     #   memory used.
#     # capacity must be larger than min_after_dequeue and the amount larger
#     #   determines the maximum we will prefetch.  Recommendation:
#     #   min_after_dequeue + (num_threads + a small safety margin) * batch_size
    min_after_dequeue = 10
    capacity = min_after_dequeue + 3 * batch_size
    example_batch, label_batch = tf.train.shuffle_batch(
      [example, label], batch_size=batch_size,capacity = capacity, 
        min_after_dequeue = min_after_dequeue
    )    
        
    return example_batch, label_batch

### Helper function for creating layer activation summaries for Tensorboard 

In [7]:
def _activation_summary(x):
    tensor_name = x.name
    tensor_name = tensor_name.replace(':', '_')
    tensor_name = tensor_name.replace('(', '_')
    tensor_name = tensor_name.replace(')', '_')
    tensor_name = tensor_name.replace(' ', '_')

    tf.summary.histogram(tensor_name + '/activations', x)
    tf.summary.scalar(tensor_name + '/sparsity', tf.nn.zero_fraction(x))

## Specifying the predictive function we're looking to optimize

#### Here, we'll do: linear => sigmoid => linear => log-softmax

In [8]:
def make_prediction1(X):  
    with tf.variable_scope('sigmoid1') as scope:
        weights = tf.Variable(tf.random_normal([4, 3]), name='weights', trainable=True)
        biases = tf.Variable(tf.random_normal([3]), name='bias', trainable=True)
        lin_y1 = tf.matmul(X, weights) + biases
        _activation_summary(lin_y1)
        
        sig_y1 = tf.sigmoid(lin_y1)
        _activation_summary(sig_y1)
    
    with tf.variable_scope('softmax2') as scope:
        weights = tf.Variable(tf.random_normal([3, 3]), name='weights', trainable=True)
        biases = tf.Variable(tf.random_normal([3]), name='bias', trainable = True)
        lin_y2 = tf.matmul(sig_y1, weights) + biases
        _activation_summary(lin_y2)
        
        smax_num = tf.transpose(tf.exp(lin_y2 - tf.reduce_max(lin_y2)))
        smax_den = tf.reduce_sum(tf.exp(lin_y2 - tf.reduce_max(lin_y2)), -1)
        softmax_y2 = tf.transpose(tf.divide(smax_num, smax_den))
        _activation_summary(softmax_y2)

    return softmax_y2

#### Also implementing the made-up "ReQu" unit; our model is now: linear => ReQu => linear => log-softmax

In [9]:
def make_prediction2(X):  
    with tf.variable_scope('requ') as scope:
        weights = tf.Variable(tf.random_normal([4, 3]), name='weights', trainable=True)
        biases = tf.Variable(tf.random_normal([3]), name='bias', trainable=True)
        lin_y1 = tf.matmul(X, weights) + biases
        _activation_summary(lin_y1)
       
        requ_y1 = tf.square(tf.maximum(0., lin_y1))
        _activation_summary(requ_y1)
    
    with tf.variable_scope('softmax2') as scope:
        weights = tf.Variable(tf.random_normal([3, 3]), name='weights', trainable=True)
        biases = tf.Variable(tf.random_normal([3]), name='bias', trainable = True)
        lin_y2 = tf.matmul(requ_y1, weights) + biases
        _activation_summary(lin_y2)
        
        smax_num = tf.transpose(tf.exp(lin_y2 - tf.reduce_max(lin_y2)))
        smax_den = tf.reduce_sum(tf.exp(lin_y2 - tf.reduce_max(lin_y2)), -1)
        softmax_y2 = tf.transpose(tf.divide(smax_num, smax_den))
        _activation_summary(softmax_y2)

    return softmax_y2

#### Note the use of variable_scope so we don't have to name new tf.Variable()'s for every layer; rather we can just keep calling them weights and biases and their "scope" will be limited to their specific use in each layer

## Specifying our loss function

#### And giving ourselves Tensorboard summaries to monitor change in loss during SGD

In [10]:
def calculate_loss(logits, labels):
    NLLCriterion = -tf.reduce_mean(tf.reduce_sum(tf.multiply(labels, tf.log(logits + 1e-10)), axis=1))

    tf.add_to_collection('losses', NLLCriterion)
    return tf.add_n(tf.get_collection('losses'), name='total_loss')

#### Here, borrowing a TF function which adds a smoothed loss, to reduce noise in our loss plot

In [11]:
def _add_loss_summaries(total_loss):
    # Compute the moving average of all individual losses and the total loss.
    loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
    losses = tf.get_collection('losses')
    loss_averages_op = loss_averages.apply(losses + [total_loss])

    # Attach a scalar summary to all individual losses and the total loss; do the
    # same for the averaged version of the losses.
    for l in losses + [total_loss]:
    # Name each loss as '(raw)' and name the moving average version of the loss
    # as the original loss name.
        l_name = l.name.replace(":", "_")
        tf.summary.scalar(l_name + '_raw_', l)
        tf.summary.scalar(l_name, loss_averages.average(l))

    return loss_averages_op

## Specifying a training operation

#### We're going to decay our loss rate to avoid jumping over potentially lower error rates in our parameter space. Also using tf.train.MomentumOptimizer() to mitigate the same concern.

#### Applying the gradient of the loss here, given by:

## $\frac{\partial L}{\partial X} = \frac{\partial L}{\partial z^4} \centerdot \frac{\partial z^4}{\partial z^3} \centerdot \frac{\partial z^3}{\partial z^2} \centerdot \frac{\partial z^2}{\partial z^1} \centerdot \frac{\partial z^1}{\partial X} =$

In [12]:
def train(total_loss, global_step):
 
    # Variables that affect learning rate.
    num_batches_per_epoch = num_examples_per_train_epoch / batch_size
    decay_steps = int(num_batches_per_epoch * num_epochs_to_decay)

    # Decay the learning rate exponentially based on the number of steps.
    lr = tf.train.exponential_decay(
        initial_learning_rate, global_step,
        decay_steps, learning_rate_decay_factor, staircase=True)
    tf.summary.scalar('learning_rate', lr)

    # Generate moving averages of all losses and associated summaries.
    loss_averages_op = _add_loss_summaries(total_loss)

    # Compute gradients.
    with tf.control_dependencies([loss_averages_op]):
        opt = tf.train.MomentumOptimizer(lr, momentum)
        grads = opt.compute_gradients(total_loss)

    # Apply gradients.
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    # Add histograms for trainable variables.
    for var in tf.trainable_variables():
        tf.summary.histogram(var.op.name, var)

    # Add histograms for gradients.
    for grad, var in grads:
        if grad is not None:
            tf.summary.histogram(var.op.name + '/gradients', grad)

    # Track the moving averages of all trainable variables.
    variable_averages = tf.train.ExponentialMovingAverage(
        moving_average_decay, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
        train_op = tf.no_op(name='train')

    return train_op

In [13]:
def evaluate_accuracy(y_hat, y_):
    correct = tf.equal(tf.argmax(y_hat, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    tf.summary.scalar('validation_accuracy', accuracy)
    return accuracy

In [14]:
train_len, valid_len, test_len = datasplitter(filename=filename)

## Testing our pipeline

In [None]:
train_set = ['iristrain/iristrain{0}.csv'.format(i) for i in range(train_len)]
valid_set = ['irisvalid/irisvalid{0}.csv'.format(i) for i in range(valid_len)]
test_set = ['iristest/iristest{0}.csv'.format(i) for i in range(test_len)]


In [16]:
for i in range(train_len):
    if i == 0:
        debug = pd.read_csv('iristrain/iristrain{0}.csv'.format(i), header = None)
    else:
        temp = pd.read_csv('iristrain/iristrain{0}.csv'.format(i), header = None)
        debug = debug.append(temp)
debug_ex = debug.ix[:,:4]

In [17]:
num_epochs = 1
batch_size = 3

pipe_ex = debug_ex.iloc[0:0,:]

with tf.Graph().as_default():
    unique_labels = tf.constant(["Iris-setosa", "Iris-versicolor", "Iris-virginica"], dtype=tf.string)
    
    example_feed, labels_feed = input_pipeline(train_set, possible_labels = unique_labels,
                                               batch_size = batch_size, num_epochs = num_epochs)
    
    with tf.Session() as sess:
        # Start populating the filename queue.
        
        init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
        sess.run(init_op)
        coord = tf.train.Coordinator()  
        threads = tf.train.start_queue_runners(coord=coord, sess=sess)
        
        step = 0
        while not coord.should_stop():
            try:
                start_time = time.time()                
               
                example_batch, label_batch = sess.run([example_feed, labels_feed])
                
                pipe_ex = pipe_ex.append(pd.DataFrame(example_batch))
                
                duration = time.time() - start_time
                step += 1

            except (tf.errors.OutOfRangeError, tf.errors.InvalidArgumentError) as e:
           
                print('Done training for %d epochs, %d steps.' % (num_epochs, step))
                # When done, ask the threads to stop.
                coord.request_stop()

        
        # Wait for threads to finish.
        coord.join(threads)
        sess.close()

Done training for 1 epochs, 32 steps.


In [18]:
debug_ex.ix[:,0].astype(float).reset_index(drop=True).equals((pipe_ex.sort_values(0).reset_index(drop=True).ix[:,0]))

False

In [19]:
global num_examples_per_train_epoch
num_examples_per_train_epoch = train_len

global num_epochs
num_epochs= 10

global batch_size
batch_size = 3

global moving_average_decay
moving_average_decay = 0.9999     # The decay to use for the moving average.

global num_epochs_to_decay
num_epochs_to_decay = 5    # Epochs after which learning rate decays.

global learning_rate_decay_factor
learning_rate_decay_factor = 0.001  # Learning rate decay factor.

global initial_learning_rate
initial_learning_rate = 0.01       # Initial learning rate.

global momentum
momentum = 0.05

In [20]:
logdir = 'TF_Logs'

In [21]:
with tf.Graph().as_default():
    global_step = tf.Variable(0, trainable=False)
        
    unique_labels = tf.constant(["Iris-setosa", "Iris-versicolor", "Iris-virginica"], dtype=tf.string)
    
    example_feed, labels_feed = input_pipeline(train_set, possible_labels=unique_labels,
                                               batch_size=batch_size, num_epochs=num_epochs)
    
    valid_example_feed, valid_labels_feed = input_pipeline(
        valid_set, possible_labels=unique_labels,
        batch_size=valid_len, num_epochs=1)
    
    x = tf.placeholder(tf.float32, shape=[None, 4])
    y_ = tf.placeholder(tf.float32, shape = [None, 3])
    
    y_hat = make_prediction1(x)
    
    loss = calculate_loss(y_hat, y_)
    
    train_op = train(loss, global_step=global_step)
    step = 0
    
    accuracy = evaluate_accuracy(y_hat, y_)
    
    with tf.Session() as sess:
        # Start populating the filename queue.
        merged = tf.summary.merge_all()
        writer = tf.summary.FileWriter(logdir, sess.graph)

        init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
        sess.run(init_op)
        coord = tf.train.Coordinator()  
        threads = tf.train.start_queue_runners(coord=coord, sess=sess)

        step = 0
        while not coord.should_stop():
            try:
                start_time = time.time()                

                example_batch, label_batch = sess.run([example_feed, labels_feed])
                example_batch = example_batch[:, 1:]

                result, summary =  sess.run([train_op, merged],
                                            feed_dict={x: example_batch,
                                                       y_: label_batch})
                writer.add_summary(summary, global_step.eval())

                step += 1
                print(step)
                duration = time.time() - start_time
                print(duration)

            except (tf.errors.OutOfRangeError, tf.errors.InvalidArgumentError) as e:
                print("Training complete, entering validation...")

                print("Evaluating inputs...")
                valid_examples, valid_labels = sess.run([valid_example_feed, valid_labels_feed])
                valid_examples = valid_examples[:, 1:]

                print("Calculating accuracy...")
                acc = sess.run(accuracy, feed_dict={x: valid_examples,
                                                             y_: valid_labels})
                print("Evaluating validation summary...")
                summary = sess.run(merged,
                                   feed_dict={
                                       x: valid_examples, y_: valid_labels
                                   })

                print('Done training for %d epochs, %d steps.' % (num_epochs, step))
                # When done, ask the threads to stop.
                coord.request_stop()

        
        # Wait for threads to finish.
        coord.join(threads)
        sess.close()

1
0.05374288558959961
2
0.008307695388793945
3
0.007004976272583008
4
0.004003286361694336
5
0.00400233268737793
6
0.006012678146362305
7
0.009998798370361328
8
0.006457090377807617
9
0.006005525588989258
10
0.004003047943115234
11
0.009326696395874023
12
0.006003856658935547
13
0.015505313873291016
14
0.0072362422943115234
15
0.007004976272583008
16
0.00600433349609375
17
0.007005453109741211
18
0.013160228729248047
19
0.007004737854003906
20
0.008387327194213867
21
0.005004405975341797
22
0.008006811141967773
23
0.008218050003051758
24
0.008006572723388672
25
0.005284309387207031
26
0.004001140594482422
27
0.008001327514648438
28
0.015150785446166992
29
0.008335351943969727
30
0.008006095886230469
31
0.004002094268798828
32
0.008632183074951172
33
0.005003452301025391
34
0.008162498474121094
35
0.008007287979125977
36
0.009226322174072266
37
0.006003379821777344
38
0.008109569549560547
39
0.008006572723388672
40
0.010011672973632812
41
0.008000373840332031
42
0.004002809524536133
43


In [22]:
acc

0.4375

In [24]:
with tf.Graph().as_default():
    global_step = tf.Variable(0, trainable=False)
        
    unique_labels = tf.constant(["Iris-setosa", "Iris-versicolor", "Iris-virginica"], dtype=tf.string)
    
    example_feed, labels_feed = input_pipeline(train_set, possible_labels=unique_labels,
                                               batch_size=batch_size, num_epochs=num_epochs)
    
    valid_example_feed, valid_labels_feed = input_pipeline(
        valid_set, possible_labels=unique_labels,
        batch_size=valid_len, num_epochs=1)
    
    x = tf.placeholder(tf.float32, shape=[None, 4])
    y_ = tf.placeholder(tf.float32, shape = [None, 3])
    
    y_hat = make_prediction2(x)
    
    loss = calculate_loss(y_hat, y_)
    
    train_op = train(loss, global_step=global_step)
    step = 0
    
    accuracy = evaluate_accuracy(y_hat, y_)
    
    with tf.Session() as sess:
        # Start populating the filename queue.
        merged = tf.summary.merge_all()
        writer = tf.summary.FileWriter(logdir, sess.graph)

        init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
        sess.run(init_op)
        coord = tf.train.Coordinator()  
        threads = tf.train.start_queue_runners(coord=coord, sess=sess)

        step = 0
        while not coord.should_stop():
            try:
                start_time = time.time()                

                example_batch, label_batch = sess.run([example_feed, labels_feed])
                example_batch = example_batch[:, 1:]

                result, summary =  sess.run([train_op, merged],
                                            feed_dict={x: example_batch,
                                                       y_: label_batch})
                writer.add_summary(summary, global_step.eval())

                step += 1
                print(step)
                duration = time.time() - start_time
                print(duration)

            except (tf.errors.OutOfRangeError, tf.errors.InvalidArgumentError) as e:
                print("Training complete, entering validation...")

                print("Evaluating inputs...")
                valid_examples, valid_labels = sess.run([valid_example_feed, valid_labels_feed])
                valid_examples = valid_examples[:, 1:]

                print("Calculating accuracy...")
                acc = sess.run(accuracy, feed_dict={x: valid_examples,
                                                             y_: valid_labels})
                print("Evaluating validation summary...")
                summary = sess.run(merged,
                                   feed_dict={
                                       x: valid_examples, y_: valid_labels
                                   })

                print('Done training for %d epochs, %d steps.' % (num_epochs, step))
                # When done, ask the threads to stop.
                coord.request_stop()

        
        # Wait for threads to finish.
        coord.join(threads)
        sess.close()

1
0.05187797546386719
2
0.0
3
0.0
4
0.015626192092895508
5
0.0
6
0.0
7
0.015625476837158203
8
0.0
9
0.0
10
0.01563882827758789
11
0.0
12
0.0
13
0.0
14
0.015629291534423828
15
0.0
16
0.0
17
0.015607595443725586
18
0.0
19
0.0
20
0.01562643051147461
21
0.0
22
0.0
23
0.0
24
0.016911029815673828
25
0.004274129867553711
26
0.0
27
0.0
28
0.015630006790161133
29
0.0
30
0.0
31
0.0
32
0.015621662139892578
33
0.0
34
0.0
35
0.015625715255737305
36
0.0
37
0.0
38
0.0
39
0.01811981201171875
40
0.004984617233276367
41
0.00412750244140625
42
0.004015207290649414
43
0.003990650177001953
44
0.0030701160430908203
45
0.011417150497436523
46
0.002998828887939453
47
0.005005598068237305
48
0.006002664566040039
49
0.008008480072021484
50
0.005002498626708984
51
0.0040013790130615234
52
0.0052318572998046875
53
0.004003286361694336
54
0.007999420166015625
55
0.008068561553955078
56
0.0
57
0.0
58
0.01562666893005371
59
0.0
60
0.015642642974853516
61
0.0
62
0.0
63
0.0
64
0.016172170639038086
65
0.004264116287231

In [25]:
acc

0.25