# Linear Regression on MNIST database example 

In [None]:
#Import Lib
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import time

The MNIST database (Mixed National Institute of Standards and Technology database) is
probably one of the most popular databases used for training various image processing systems
It is a database of handwritten digits. The images look like this:
![image.png](attachment:image.png)

In [None]:
# Step 1 : Read in Data
# using TF Learn's built in function to load MNIST data to the folder data/mnist
MNIST = input_data.read_data_sets("../data/mnist", one_hot=True)
# Step 2 Define paramaters for the model
learning_rate = 0.01
batch_size = 128
n_epochs = 25



One-hot encoding <br>
In digital circuits, one-hot refers to a group of bits among which the legal combinations of
values are only those with a single high (1) bit and all the others low (0).
<br>
In this case, one-hot encoding means that if the output of the image is the digit 7, then the
output will be encoded as a vector of 10 elements with all elements being 0, except for the
element at index 7 which is 1.
<br>
The construction of the logistic regression model in this problem is pretty similar to the linear regression model.
<br>
However, now we have <b> A LOT </b> more data. We learned in CS229 that if we calculate gradient
after every single data point it’d be painfully slow. One way to go around this is to batch ‘em up. (aka process a group of input save output then continue) 
<br>
Fortunately, TensorFlow has a wonderful support for batching data.

#### Step 3 : create placeholders for features and labels
each image in MNIST data is of the shape 28*28 = 784<br>
Therefore, each image is presented by 1x784 tensor 
There are 10 classed for each image , correspongding batch size 10 label , one hot vector(binary)

In [None]:
tf.reset_default_graph() # clean up graph
X = tf.placeholder(tf.float32, [batch_size, 784],name ="image")
Y = tf.placeholder(tf.float32, [batch_size, 10],name ="label")

###### Step 4: create weight and bias
w is inited to random variables mean of  0 , stddev of 0.01
<br>
b is init to 0 
<br> shape of w depend of dim of X and Y so that 
<br> shape of b depend on Y

In [None]:
w = tf.Variable(tf.random_normal(shape =[784,10],stddev=0.01), name="weights")
b = tf.Variable(tf.zeros([1,10]), name="bias")                

###### Step 6: Predict Y from X and w,b
<br>
the model that returns probability distribution of possible label of the image through softmax layer <br>
The <b< softmax function </b< is often used in the final layer of a neural network-based classifier. Such networks are commonly trained under a log loss (or cross-entropy) regime, giving a non-linear variant of multinomial logistic regression.
![image.png](attachment:image.png)

In [None]:
logits = tf.matmul(X,w) + b


###### Step 6: define loss function
<br>
use softmax cross entropy with logits as the loss function
compute mean cross entropy, softmax is applied internally
###### tf.nn.softmax_cross_entropy_with_logits(logits, Y)
###### then use tf.reduce_mean to get the mean loss of the batch

In [None]:
entropy = tf.nn.softmax_cross_entropy_with_logits(labels = Y, logits= logits ,name="entropy")
# computes the mean over examples in the batch
loss = tf.reduce_mean(entropy,name="reduce_mean") 



###### Step 8 : define training op
using gradientdescent with rate 0.001



In [None]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(loss)
init = tf.global_variables_initializer()


In [None]:
sess = tf.Session()
# See your model in tensorboard
writer = tf.summary.FileWriter('./my_graph/03/linear_reg_MNIST',sess.graph)

start_time = time.time()
sess.run(init)
n_batches = int(MNIST.train.num_examples/batch_size)
for i in range(n_epochs): # train the model in n_epochs times
    total_loss = 0
    for _ in range(n_batches):
        X_batch, Y_batch = MNIST.train.next_batch(batch_size)
        _, loss_batch = sess.run([optimizer,loss],feed_dict={X: X_batch, Y:Y_batch})
        total_loss += loss_batch
    print('Average loss epoch {0}: {1}'.format(i, total_loss/n_batches))
print('Total time: {0} seconds'.format(time.time() - start_time))
print('Optimization Finished!') 


###### Test the model

The <b> sigmoid function </b>is used for the two-class logistic regression, whereas the softmax function is used for the multiclass logistic regression (a.k.a. MaxEnt, multinomial logistic regression,

In [None]:
# define a op route to calculate accuracy
preds = tf.nn.softmax(logits,name="prediction")
correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32),name="accuracy")

n_batches = int(MNIST.test.num_examples/batch_size)
total_correct_preds = 0


In [None]:
# fetch in test data
for i in range(n_batches):
    X_batch, Y_batch = MNIST.test.next_batch(batch_size)
    accuracy_batch = sess.run([accuracy], feed_dict={X: X_batch, Y:Y_batch})
    total_correct_preds += accuracy_batch[0]

print('Accuracy {0}'.format(total_correct_preds/MNIST.test.num_examples))
# Accuracy 0.91

In [None]:
writer.close()
sess.close()

### Assignment 1: Improve this model 

#### Accuracy 0.91 is bad , let's improve it further 
https://www.tensorflow.org/get_started/mnist/pros
apply this tutorial
The model is presented as follow
![image.png](attachment:image.png)

In [None]:
#Import Lib
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import time

# A model template
class DeepMnist:
    """Build the graph for word2vec model"""
    def __init__(self,MNIST,learning_rate,batch_size,n_epochs):
        self.mnist = MNIST
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.n_epochs = n_epochs
        
    """ Step 1: define the place holder for inputs and outputs"""
    def _create_placeholders(self):
         with tf.name_scope("data"):
            self.x = tf.placeholder(tf.float32, shape=[None, 784] , name ="image")
            self.y_ = tf.placeholder(tf.float32, shape=[None, 10] , name = "label")
  
    def _create_embedding(self):
        """ Step 2: define weights. In word2vec, it's actually the weight we are traing"""
        with tf.name_scope("weights"):
            self.W = tf.Variable(initial_value = tf.truncated_normal([784,10],stddev=0.1),name="weights")
            self.b = tf.Variable(tf.constant(0.1,shape=[10]),name="bias")
        
    def _create_loss(self):
        """ Step 3+4: define the interference(forward path) + the loss function"""
        with tf.name_scope("loss"):
            self.y = tf.matmul(self.x,self.W) + self.b
            self.loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(labels=self.y_, logits=self.y))
            
    def _create_optimizer(self):
        """Step 5: define optmizer"""
        self.optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(self.loss)
    def _create_summaries(self,sess):
        # self.correct_prediction = tf.equal(tf.argmax(self.y,1), tf.argmax(self.y_,1))
        #  self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32))
        # print(self.accuracy.eval(feed_dict={self.x: self.mnist.test.images, self.y_: self.mnist.test.labels}))
        # fetch in test data
        # define a op route to calculate accuracy
        y_preds = tf.nn.softmax(self.y,name="prediction")
        correct_preds = tf.equal(tf.argmax(y_preds, 1), tf.argmax(self.y_, 1))
        accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32),name="accuracy")
        n_batches = int(self.mnist.test.num_examples/self.batch_size)
        total_correct_preds = 0
        for i in range(n_batches):
            X_batch, Y_batch = self.mnist.test.next_batch(self.batch_size)
            accuracy_batch = sess.run([accuracy], feed_dict={self.x: X_batch, self.y_:Y_batch})
            total_correct_preds += accuracy_batch[0]
        print('Accuracy {0}'.format(total_correct_preds/self.mnist.test.num_examples))

    
   # """ Build the graph for our model"""
    def build_graph(self):
        self._create_placeholders()
        self._create_embedding()
        self._create_loss()
        self._create_optimizer()
    #self._create_summaries()
        
def train_model(model):
    saver = tf.
    sess = tf.Session()
# See your model in tensorboard
    writer = tf.summary.FileWriter('./my_graph/03b/linear_reg_MNIST_MODEL',sess.graph)
    start_time = time.time()
    sess.run(tf.global_variables_initializer())
    n_batches = int(model.mnist.train.num_examples/model.batch_size)
    for i in range(model.n_epochs): # train the model in n_epochs times
        total_loss = 0
        for _ in range(n_batches):
            X_batch, Y_batch = model.mnist.train.next_batch(model.batch_size)
            _, loss_batch = sess.run([model.optimizer,model.loss],feed_dict={model.x: X_batch, model.y_: Y_batch})
            total_loss += loss_batch
        print('Average loss epoch {0}: {1}'.format(i, total_loss/n_batches))
    print('Total time: {0} seconds'.format(time.time() - start_time))
    print('Optimization Finished!') 
    
    # summaries 
    print "Summaries : ....." 
    model._create_summaries(sess)
    writer.close()
    sess.close()
    
def main():
    model = DeepMnist(MNIST = input_data.read_data_sets("../data/mnist", one_hot=True),
                        learning_rate = 0.01,
                        batch_size = 128,
                        n_epochs = 25)
    tf.reset_default_graph() # clean up graph
    model.build_graph()
    train_model(model)
   
    
if __name__ == '__main__':
    main()
        

# Next, We study Saving and Restoring


In [9]:
#Import Lib
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import time

# A model template
class DeepMnist:
    """Build the graph for word2vec model"""
    def __init__(self,MNIST,learning_rate,batch_size,n_epochs):
        self.mnist = MNIST
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.n_epochs = n_epochs
        self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')
        
    """ Step 1: define the place holder for inputs and outputs"""
    def _create_placeholders(self):
         with tf.name_scope("data"):
            self.x = tf.placeholder(tf.float32, shape=[None, 784] , name ="image")
            self.y_ = tf.placeholder(tf.float32, shape=[None, 10] , name = "label")
  
    def _create_embedding(self):
        """ Step 2: define weights. In word2vec, it's actually the weight we are traing"""
        with tf.name_scope("weights"):
            self.W = tf.Variable(initial_value = tf.truncated_normal([784,10],stddev=0.1),name="weights")
            self.b = tf.Variable(tf.constant(0.1,shape=[10]),name="bias")
        
    def _create_loss(self):
        """ Step 3+4: define the interference(forward path) + the loss function"""
        with tf.name_scope("loss"):
            self.y = tf.matmul(self.x,self.W) + self.b
            self.loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(labels=self.y_, logits=self.y))
            
    def _create_optimizer(self):
        """Step 5: define optmizer"""
        self.optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(self.loss)
        
        with tf.name_scope("summaries"):
            tf.summary.scalar("loss", self.loss)
            #tf.summary.histogram("histogram loss", self.loss)
            # because you have several summaries, we should merge them all
            # into one op to make it easier to manage
            self.summary_op = tf.summary.merge_all()
    def _create_summaries(self,sess):
        y_preds = tf.nn.softmax(self.y,name="prediction")
        correct_preds = tf.equal(tf.argmax(y_preds, 1), tf.argmax(self.y_, 1))
        accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32),name="accuracy")
        n_batches = int(self.mnist.test.num_examples/self.batch_size)
        total_correct_preds = 0
        for i in range(n_batches):
            X_batch, Y_batch = self.mnist.test.next_batch(self.batch_size)
            accuracy_batch = sess.run([accuracy], feed_dict={self.x: X_batch, self.y_:Y_batch})
            total_correct_preds += accuracy_batch[0]
        print('Accuracy {0}'.format(total_correct_preds/self.mnist.test.num_examples))

    
   # """ Build the graph for our model"""
    def build_graph(self):
        self._create_placeholders()
        self._create_embedding()
        self._create_loss()
        self._create_optimizer()
 
        
def train_model(model):
    saver = tf.train.Saver() # provide method for saving and restore
    # Later, launch the model , initialize the variables and do some work 
    # and save the variable to disk
    initial_step = 0
    with tf.Session() as sess: # set as default session
        sess.run(tf.global_variables_initializer())
        # Restore variables only when it exist
        ckpt = tf.train.get_checkpoint_state("./DeepMnist/checkpoint/")
        if ckpt and ckpt.model_checkpoint_path :
            saver.restore(sess, ckpt.model_checkpoint_path)
            print("Model restored.")

        # See your model in tensorboard
        writer = tf.summary.FileWriter('./my_graph/03b/linear_reg_MNIST_MODEL',sess.graph)
        start_time = time.time()
        n_batches = int(model.mnist.train.num_examples/model.batch_size)
        initial_step = model.global_step.eval()
        print initial_step
        for i in range(initial_step, initial_step + model.n_epochs): # train the model in n_epochs times
            total_loss = 0
            for _ in range(n_batches):
                X_batch, Y_batch = model.mnist.train.next_batch(model.batch_size)
                _, loss_batch = sess.run([model.optimizer,model.loss],feed_dict={model.x: X_batch, model.y_: Y_batch})
                total_loss += loss_batch
            print('Average loss epoch {0}: {1}'.format(i, total_loss/n_batches))


            # Save the variables to disk.
            save_path = saver.save(sess,"./DeepMnist/checkpoint/",i)
            print("Model saved in the file: %s" % save_path)

        print('Total time: {0} seconds'.format(time.time() - start_time))
        print('Optimization Finished!') 

        # summaries 
        print "Summaries : ....." 
        model._create_summaries(sess)
        writer.close()
   
    
def main():
    tf.reset_default_graph() # clean up graph
    model = DeepMnist(MNIST = input_data.read_data_sets("../data/mnist", one_hot=True),
                        learning_rate = 0.01,
                        batch_size = 128,
                        n_epochs = 5)
  
    model.build_graph()
    train_model(model)
   
    
if __name__ == '__main__':
    main()
        

Extracting ../data/mnist/train-images-idx3-ubyte.gz
Extracting ../data/mnist/train-labels-idx1-ubyte.gz
Extracting ../data/mnist/t10k-images-idx3-ubyte.gz
Extracting ../data/mnist/t10k-labels-idx1-ubyte.gz
INFO:tensorflow:Restoring parameters from ./DeepMnist/checkpoint/-4
Model restored.
0
Average loss epoch 0: 0.259998513894
Model saved in the file: ./DeepMnist/checkpoint/-0
Average loss epoch 1: 0.260115493063
Model saved in the file: ./DeepMnist/checkpoint/-1
Average loss epoch 2: 0.258158704392
Model saved in the file: ./DeepMnist/checkpoint/-2
Average loss epoch 3: 0.258019433884
Model saved in the file: ./DeepMnist/checkpoint/-3
Average loss epoch 4: 0.257177621925
Model saved in the file: ./DeepMnist/checkpoint/-4
Total time: 3.70827102661 seconds
Optimization Finished!
Summaries : .....
Accuracy 0.9233
