# Logistic Regression with TensorFlow

## Imports

In [1]:
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
from tensorflow.python.framework import ops

# This is just to make TensorFlow use only one of my GPUs.
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

## Load the Data

In [2]:
mnist_data = input_data.read_data_sets("MNIST_data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


## Create the Model
**For Logistic Regression recall that:**  
A linear function is first calculated.  
**z = xw + b**  
Where **w** is a tensor of weights, **x** is a tensor of features, and **b** is a tensor of biases.  


Then the output of the linear function is run through an activation function.  
**a = g(z)**  
Where **g()** is the activation function.


With TensorFlow the activation of the last linear function is built in to the cost function.  So all we have to implement here is the linear function that calculates z.

In [3]:
def build_model(x_tensor, num_features, num_labels):
    # Initialize tensors for weights and biases.
    with tf.variable_scope('ParameterInitialization', reuse=tf.AUTO_REUSE):
        w_tensor = tf.get_variable(name='w', 
                                   shape=(num_features, num_labels), 
                                   initializer=tf.contrib.layers.xavier_initializer())
        
        b_tensor = tf.get_variable(name='b', 
                                   shape=(1, num_labels), 
                                   initializer=tf.zeros_initializer())
        
    # The linear function
    with tf.variable_scope('LinearFunction', reuse=tf.AUTO_REUSE):
        z_tensor = tf.matmul(x_tensor, w_tensor) + b_tensor
        
    return z_tensor

## Define the Cost
The measurement of how how well the parameters fit the training values during training.  The goal is the minimize this difference.


In TensorFlow, the last activation function is built into the cost.  Here, we are using the softmax activation which gives a probability of each output class being true.  All the probabilities sum to 1 for each example.


The cost function we are using is cross entropy, which measures the distance between the tensor of probabilities from the output of the softmax and the actual values, y.

In [4]:
def build_cost(z_tensor, y_tensor):
    with tf.variable_scope('CostFunction'):
        cost_tensor = tf.nn.softmax_cross_entropy_with_logits(logits=z_tensor, labels=y_tensor)
        cost_tensor = tf.reduce_mean(cost_tensor)
        
    return cost_tensor

## Define the Optimizer
Chose which optimization algorithm to use.  This is the algorithm that adjusts the weights and biases each execution to bring the cost down.  TensorFlow comes with a good selection of pre-built optimizers.  We'll use Adam here.

In [5]:
def build_optimizer(cost_tensor, learning_rate):
    with tf.variable_scope('Optimizer'):
        optimizer_tensor = tf.train.AdamOptimizer(learning_rate).minimize(cost_tensor)
        
    return optimizer_tensor

## Define the Accuracy Measurement
The measurement of accurate the model is at predicting outcomes.

In [6]:
def build_accuracy(y_hat_tensor, y_tensor):
    with tf.variable_scope('AccuracyFunction'):
        # A bool tensor of where the predictions matched the labels.
        correct_predictions_tensor = tf.equal(tf.argmax(y_hat_tensor, axis=1), 
                                              tf.argmax(y_tensor, axis=1))
        
        # Convert the true/false values into 0 or 1.
        correct_predictions_tensor = tf.cast(correct_predictions_tensor, tf.float32)
        
        # The mean of the correct_preditions_tensor will now give us the accuracy.
        accuracy_tensor = tf.reduce_mean(correct_predictions_tensor)
        
    return accuracy_tensor

## Train Function
Put the pieces together to build the model, input features, and train it.

In [7]:
def train(learning_rate=0.0001, iterations=1000, batch_size=100):
    ops.reset_default_graph()
    
    x_tensor = tf.placeholder(tf.float32, [None, 784])
    y_tensor = tf.placeholder(tf.float32, [None, 10])
    
    z_tensor = build_model(x_tensor, 784, 10)
    cost_tensor = build_cost(z_tensor, y_tensor)
    optimizer_tensor = build_optimizer(cost_tensor, learning_rate)
    accuracy_tensor = build_accuracy(z_tensor, y_tensor)
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        
        for i in range(1, iterations + 1):
            x, y = mnist_data.train.next_batch(batch_size)
            accuracy, cost, _ = sess.run([accuracy_tensor, cost_tensor, optimizer_tensor], 
                                         feed_dict={x_tensor:x, y_tensor:y})
            
            if i % 100 == 0:
                print('Iteration {0} cost: {1}, accuracy: {2}'.format(i, cost, accuracy))

## Try it out!
With logistic regression the train accuracy is in the low 90's for the MNIST dataset.  More complex models can get much better accuracies.

In [8]:
%%time
train(iterations=5000, batch_size=1000)

Iteration 100 cost: 1.9049204587936401, accuracy: 0.4099999964237213
Iteration 200 cost: 1.561523675918579, accuracy: 0.6420000195503235
Iteration 300 cost: 1.3152207136154175, accuracy: 0.746999979019165
Iteration 400 cost: 1.1565009355545044, accuracy: 0.781000018119812
Iteration 500 cost: 1.0288691520690918, accuracy: 0.7940000295639038
Iteration 600 cost: 0.9443337321281433, accuracy: 0.8029999732971191
Iteration 700 cost: 0.8769931793212891, accuracy: 0.8180000185966492
Iteration 800 cost: 0.8077770471572876, accuracy: 0.8259999752044678
Iteration 900 cost: 0.7378003597259521, accuracy: 0.847000002861023
Iteration 1000 cost: 0.7188477516174316, accuracy: 0.8410000205039978
Iteration 1100 cost: 0.6753692030906677, accuracy: 0.8420000076293945
Iteration 1200 cost: 0.6445215344429016, accuracy: 0.8479999899864197
Iteration 1300 cost: 0.5717931389808655, accuracy: 0.8809999823570251
Iteration 1400 cost: 0.5806944370269775, accuracy: 0.8569999933242798
Iteration 1500 cost: 0.5597135424