In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


### Steps explained

In [3]:
import tensorflow as tf

In [4]:
# 1) Images in X and labels in y. 
# 2) Each image is a 28 x 28 square pixel = 784 numbers to define intensity
# 3) Tensor is a mxn dimensional array (m x 784)
# 4) Each entry in the tensor is a pixel intensity between 0 and 1
# 5) Each label y is a number between 0 and 9 represented as a 10x1 vectory or 0s and 1s.
#    We will use a [mx10] matrix for labels
# 6) Softmax regression is used - a) Compute score for each label, b) Convert to probabilities using sigmoid function
# 7) score = wX+b, y_pred = softmax(score)
# 8) Here softmax serves as a "link" function or "activation" function
# 9) y = softmax(wX+b)
# 10) w = [784,10], X = [m x 784] --> (X.w) = m x 10 with the score values


### Setup the model, variables, parameters, loss function, step update

In [5]:
# Create a placeholder for a 784 square pixel tensor with any length
x = tf.placeholder(tf.float32, [None,784])

In [6]:
# Create parameter and noise variables
W = tf.Variable(tf.zeros([784,10]))
b = tf.Variable(tf.zeros([10]))

In [7]:
# y_predictions are a softmax function of score
y = tf.nn.softmax(tf.matmul(x,W) + b)

In [8]:
# Loss function using cross entropy
# Measures how inefficient our predictions are for describing the truth

# Before the loss function, let's create a placeholder

y_ = tf.placeholder(tf.float32, [None, 10])

# CE = SUM {y_ . log(y)}
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))

# For numerical stability, we use ... tf.nn.softmax_cross_entropy_with_logits on score
# tf.nn.softmax_cross_entropy_with_logits(tf.matmul(x, W) +b)



In [9]:
# Parameter update based on backpropagating cross entropy loss
# 1) Determine how variables affect loss
# 2) Update variables to reduce the loss at each iteration
# Optimal search algorithm used is Gradient descent with learning rate alpha = 0.5
# To choose other algorithms change the function in the call below

train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

### Launch the model in an InteractiveSession

In [10]:
# Initiative session
sess = tf.InteractiveSession()

In [11]:
# Initialize varaibles
tf.global_variables_initializer().run()

In [12]:
# Train - run step x1000 times
# This is stochastic gradient descent with batch size of 100 points
# At each step we take 100 data points at random, calculate loss, backpropagate and update parameters

for _ in range(1000):
  batch_xs, batch_ys = mnist.train.next_batch(500)
  sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

#### Evaluating the model

In [13]:
# Use argmax function to get location of highest value in [m x 10] vector
# This tells us what digit the prediction is tending to
# Compare this to the argmax of true labels tensor 
# This gives us a list of booleans (0,1) - 1 if equal, 0 if not equal

correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))

In [14]:
# Check accuracy - number of correctly predicted labels/total
# To calculate what fractions are correct....

accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [15]:
# Apply model on test images
print (sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))

0.9207


### Combining all code into one block

In [16]:
def myfirsttensor(batch_size, alpha):
    train_step = tf.train.GradientDescentOptimizer(alpha).minimize(cross_entropy)
    sess = tf.InteractiveSession()
    tf.global_variables_initializer().run()

    for _ in range(1000):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
    
    correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    acc_train = sess.run(accuracy, feed_dict={x: mnist.train.images, y_: mnist.train.labels})
    acc_test = sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels})

    
    print ("Model accuracy on train set = ", round(acc_train,2))
    print ("Model accuracy on test set  = ", round(acc_test,2))


#### Batch sizes vs Classification accuracy

In [19]:
myfirsttensor(250, 0.5)

Model accuracy on train set =  0.92
Model accuracy on test set  =  0.92


In [43]:
myfirsttensor(10)

Model accuracy on train set =  0.87
Model accuracy on test set  =  0.87


In [44]:
myfirsttensor(1000)

Model accuracy on train set =  0.92
Model accuracy on test set  =  0.92


### Basics of Tensors

In [3]:
import tensorflow as tf 
node1 = tf.constant(3.0, tf.float32) # A constant tensor node
node2 = tf.constant(4.0) # also tf.float32 implicitly
print(node1, node2)

# The central unit of data in TensorFlow is the tensor. 
# A tensor consists of a set of primitive values shaped into an array of any number of dimensions. A tensor's rank is its number of dimensions.


Tensor("Const:0", shape=(), dtype=float32) Tensor("Const_1:0", shape=(), dtype=float32)


In [7]:
sess = tf.Session()
print (sess.run([node1, node2]))

#  A computational graph is a series of TensorFlow operations arranged into a graph of nodes. Let's build a simple computational graph. Each node takes zero or more tensors as inputs and produces a tensor as an output. One type of node is a constant. Like all TensorFlow constants, it takes no inputs, and it outputs a value it stores internally. 

[3.0, 4.0]


In [8]:
node3 = tf.add(node1, node2)
print ("node3 : ", node3)
print ("sess.run(node3): ", sess.run(node3))

node3 :  Tensor("Add:0", shape=(), dtype=float32)
sess.run(node3):  7.0


In [9]:
a = tf.placeholder(tf.float32)
b = tf.placeholder(tf.float32)
adder_node = a + b

In [10]:
print (sess.run(adder_node, {a: 3, b:4.5}))
print (sess.run(adder_node, {a:[1,3], b:[2,4]}))

7.5
[ 3.  7.]


In [15]:
add_and_triple = adder_node * 3.
print (sess.run(add_and_triple, {a: 4.5, b:4.5}))

27.0


### Linear Model construction

In [16]:
W = tf.Variable([.3], tf.float32)
b = tf.Variable([-.3], tf.float32)
x = tf.placeholder(tf.float32)
linear_model = W * x + b

In [17]:
init = tf.global_variables_initializer()
sess.run(init)