In [59]:
import tensorflow as tf
import time

In [83]:
tf.reset_default_graph()

In [84]:
# defining a logistic regression function that can be used other places too
def logistic_regression(X, y, initializer=None, seed=42, learning_rate=0.01):
    # identifying the shape of the input
    num_features = int(X.get_shape()[1])
    num_classes = int(y.get_shape()[1])
    #n_inputs_including_bias = int(X.get_shape()[1])
    with tf.name_scope("logistic_regression"):
        with tf.name_scope("model"):
            # initializing the weights, default
            if initializer is None:
                initializer = tf.random_normal([num_features,num_classes ], stddev=0.01, seed=seed)
            
            # defining the weights
            weights = tf.Variable(initializer, name="weights")
            # initializing zero vectors
            bias = tf.Variable(tf.zeros([1, num_classes]), name="bias")
            # fundamental function to be added to softmax
            logits = tf.matmul(X, weights, name="logits") + bias 
            # softmax function
            y_predicted = tf.nn.softmax(logits, name="softmax")
            
        with tf.name_scope("train"):
            # softmax regression with cross entropy loss
            entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y, name='cross_entropy_loss')
            # final loss
            loss = tf.reduce_mean(entropy)
            # using adam optimizer to minimize loss
            optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
            
            # loss summary for tensorboard
            loss_summary = tf.summary.scalar('cross_entropy', loss)

        with tf.name_scope("init"):
            init = tf.global_variables_initializer()
        with tf.name_scope("save"):
            saver = tf.train.Saver()
    return y_predicted, loss, optimizer, loss_summary, init, saver, weights

In [85]:
# Creating a function for log directories of tensorboard
from datetime import datetime

def log_dir(prefix=""):
    now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
    root_logdir = "tf_logs"
    if prefix:
        prefix += "-"
    name = prefix + "run-" + now
    return "./{}/{}/".format(root_logdir, name)

In [86]:
logdir = log_dir("logregression")


# Creating placeholders for features and labels
# each image in the MNIST data is of shape 28*28 = 784
# therefore, each image is represented with a 1x784 tensor
# there are 10 classes for each image, corresponding to digits 0 - 9. 
# each lable is one hot vector.
X = tf.placeholder(tf.float32, shape=([None, 784]), name='X_placeholder') 
Y = tf.placeholder(tf.int32, shape=([None, 10]), name='Y_placeholder')

# Define paramaters for the model
lr = 0.001
batch_size = 128
n_epochs = 2



In [87]:
# Creating the computation graph
y_pred, loss, optimizer, loss_summary, init, saver, weights= logistic_regression(X, Y, learning_rate=lr)


# Writing it to tensorboard
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

# Summary for epoch loss



In [88]:
# Downloading the data
# using TF Learn's built in function to load MNIST data to the folder data/mnist
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets('./data/mnist', one_hot=True) 

Extracting ./data/mnist/train-images-idx3-ubyte.gz
Extracting ./data/mnist/train-labels-idx1-ubyte.gz
Extracting ./data/mnist/t10k-images-idx3-ubyte.gz
Extracting ./data/mnist/t10k-labels-idx1-ubyte.gz


In [90]:
# Code for training module with mini batch gradient descent
varinit = tf.global_variables_initializer()
with tf.Session() as sess:
    start_time = time.time()
    # initializing variables
    varinit.run()
    # extracting the number of batches 
    n_batches = int(mnist.train.num_examples/batch_size)
    
    # train the model n_epochs times
    for i in range(n_epochs):
        total_loss =0
        
        # for all the batches
        for batch_index in range(n_batches):
            X_batch, Y_batch = mnist.train.next_batch(batch_size)
            optimizer_val, batch_loss_val, batch_loss_summary_val = sess.run([optimizer, loss, loss_summary], feed_dict={X: X_batch, Y:Y_batch})
            step_num = i * n_batches + batch_index
            total_loss += batch_loss_val
             # write loss to the plot
            file_writer.add_summary(batch_loss_summary_val, step_num)
        
        # Average loss per ecpoch
        print('Average loss epoch {0}: {1}'.format(i, total_loss/n_batches))
    
    best_weights = weights.eval()
    print best_weights.shape
    print best_weights
    print('Total time: {0} seconds'.format(time.time() - start_time))
    print ('Saving File')
    save_path = saver.save(sess, "./models/log_regression.ckpt")
    print('Optimization Finished!') 
    

Average loss epoch 0: 0.688596247307
Average loss epoch 1: 0.369799320098
(784, 10)
[[-0.00280775 -0.00137752 -0.0067633  ...,  0.01381016 -0.01455798
  -0.00246214]
 [-0.01360841  0.0108796  -0.00351166 ..., -0.00752883 -0.00433573
   0.02453853]
 [ 0.00313744 -0.00527362  0.00852499 ...,  0.00886036 -0.00659783
   0.02701756]
 ..., 
 [ 0.00467879 -0.00443958 -0.01910986 ..., -0.00420249 -0.00103485
  -0.00468861]
 [-0.01132285 -0.00246789  0.00394058 ...,  0.0023485  -0.01456116
   0.00449873]
 [ 0.00181303  0.01575438  0.00934678 ...,  0.02459529  0.01033248
  -0.03450175]]
Total time: 1.25560688972 seconds
Saving File
Optimization Finished!


In [101]:
# Testing the model

# Importing sklearns metrics
from sklearn.metrics import precision_score, recall_score, accuracy_score, confusion_matrix


with tf.Session() as sess:
    # restore the training session
    saver.restore(sess, "./models/log_regression.ckpt")
    # make predictions on test set
    '''
    The softmax classifier predicts value for each class and sum of all the probs is 1
    y_pred[i] contains probability score for each class, for the test instance X[i]
    e.g y_pred[0] = [  1.85683282e-04   9.23909568e-07   2.67304393e-04   2.62420531e-03
    4.23794518e-05   1.35844122e-04   3.43982242e-06   9.93277133e-01
    2.15818320e-04   3.24724824e-03]
    
    To extract the index of the highest scoring class, we use argmax across axis 1
    argmax(y_pred_val, 1) --> 7
    ''' 
    # 
    # 
    y_pred_argmax = tf.argmax(y_pred,1)
    y_pred_val = y_pred_argmax.eval(feed_dict={X:mnist.test.images, Y:mnist.test.labels})
    
    '''
    For true labels, the input is a one hot encoded vector, Y[0]= [0 0 0 0 0 0 1 0 0]
    If we take an argmax of the above Y[0] across axis 1
        index_location = tf.argmax(Y[0],1)
        print (index_location) --> 7
    
    '''
    y_true = tf.argmax(Y,1)
    # evaluate true value
    y_true_val = y_true.eval(feed_dict={X:mnist.test.images, Y:mnist.test.labels})
    print y_true_val
    print("Precision Score")
    print precision_score(y_true_val,y_pred_val, average="micro")
    print("Recall Score")
    print recall_score(y_true_val,y_pred_val, average="micro")
    print("Accuracy Score")    
    print accuracy_score(y_true_val,y_pred_val)
    print("Confusion Matrix")    
    print confusion_matrix(y_true_val,y_pred_val)
    

INFO:tensorflow:Restoring parameters from ./models/log_regression.ckpt
[7 2 1 ..., 4 5 6]
Precision Score
0.9131
Recall Score
0.9131
Accuracy Score
0.9131
Confusion Matrix
[[ 962    0    1    1    0    2    8    1    5    0]
 [   0 1112    2    4    1    1    4    0   11    0]
 [  12   10  902   18   13    1   14   13   44    5]
 [   4    2   21  905    0   26    4   12   26   10]
 [   2    1    4    1  915    0   14    1    9   35]
 [  11    6    4   34   13  747   18    6   44    9]
 [  15    3    4    0   10   12  909    1    4    0]
 [   2   18   24    4   12    0    0  926    4   38]
 [  14    8    9   19    9   19   12   13  859   12]
 [  15    7    4   13   43    8    0   18    7  894]]
