In [18]:
import numpy as np
import tensorflow as tf
from sklearn.utils import shuffle

np.random.seed(42)
rng = np.random

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [4]:
X_train = mnist.train.images
y_train = mnist.train.labels 
X_test = mnist.test.images
y_test = mnist.test.labels

In [6]:
X_train.shape, y_train.shape

((55000, 784), (55000, 10))

In [168]:
from time import time

In [173]:
class LogisticRegressionTF(object):

    def __init__(self, eta=0.01, epochs=50, display_step = 10, batch_size=10, regularization=0.01,
                hidden_layers=[256,128]):
        self.eta = eta
        self.epochs = epochs
        self.display_step_ = display_step
        self.batch_size_ = batch_size
        self.hidden_layers_ = hidden_layers
        self.regularization_ = regularization
        
    def get_minibatches(self, X):
        return zip(range(0, len(X), self.batch_size_), 
                                      range(self.batch_size_, 
                                      len(X)+1, self.batch_size_))
        
    def train(self, X_train, y_train, X_test, y_test):
        
        n_samples, n_features = X_train.shape
        _, n_classes = y_train.shape
        self.cost_ = []
    
        X_ = tf.placeholder(tf.float32, name='features')
        Y_ = tf.placeholder(tf.float32, name='targets')
        ri = tf.random_normal_initializer()
        ci = tf.constant_initializer(0)
        

        weights = {}
        biases = {}
        num_layers = len(self.hidden_layers_)
        for layer, layer_size in enumerate(self.hidden_layers_):
            n = layer_size #if layer == 0 else self.hidden_layers[layer-1]
            m = self.hidden_layers_[layer+1] if layer+1 < num_layers else n_classes

            w_name = 'w{:}'.format(layer)
            b_name = 'b{:}'.format(layer)
            weights[str(layer)] = tf.Variable(tf.truncated_normal([n, m], stddev=0.1), name=w_name)
            biases[str(layer)] = tf.Variable(tf.zeros([m]), name=b_name)
            
        # Model
        layers = {}
        layers['0'] = tf.nn.relu(tf.matmul(X_, weights['0']) +  biases['0'])
        
        for i in range(1, num_layers):
            
            z = tf.matmul(layers[str(i-1)], weights[str(i)]) + biases[str(i)]
            activation = tf.nn.relu(z)
            layers[str(i)] = activation
                    
        out_z = tf.matmul(layers[str(num_layers-2)], 
                                 weights[str(num_layers-1)]) + biases[str(num_layers-1)]   
        
        out_act = tf.nn.sigmoid(out_z, name='predicted_probabilities')
        out_labels = tf.argmax(out_z, axis=1, name='predicted_labels')
    
        l2_loss = 0.
        for w in weights.values():
            l2_loss += tf.nn.l2_loss(w)
            
        loss = tf.nn.softmax_cross_entropy_with_logits(logits=out_z, labels=Y_)
        cost = tf.reduce_mean(loss, name='cost') + self.regularization_ * (l2_loss)

        
        optimizer = tf.train.GradientDescentOptimizer(self.eta)
        train_step = optimizer.minimize(cost, name='train')
        
        correct_prediction = tf.equal(tf.argmax(Y_, 1), out_labels)
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')
    
        init = tf.global_variables_initializer()
        with tf.Session() as sess:
            sess.run(init)
            
            for i in range(self.epochs):
                
                avg_cost = 0.
                
                x, y = shuffle(X_train, y_train)
                
                for start, end in self.get_minibatches(x):
                    _, c = sess.run(['train', 'cost:0'], feed_dict={'features:0': x[start:end],
                                                            'targets:0': y[start:end]})
                    
                    avg_cost += c

                if (i+1) % self.display_step_ == 0:

                    train_acc = sess.run('accuracy:0', feed_dict={'features:0': X_train,
                                                          'targets:0': y_train})
                    valid_acc = sess.run('accuracy:0', feed_dict={'features:0': X_test,
                                                          'targets:0': y_test})  

                    print("Epoch: %03d | AvgCost: %.3f" % (i + 1, avg_cost / (i + 1)), end="")
                    print(" | Train/Valid ACC: %.3f/%.3f" % (train_acc, valid_acc))

        return self
    
#     def predict(self, X):        
#         return self.W * X + self.b

In [179]:
n_iter = 100
eta = 0.01
hidden_layers = [784, 256, 128]

lr_tf = LogisticRegressionTF(eta, n_iter, display_step=10, batch_size=128, hidden_layers=hidden_layers)
lr_tf = lr_tf.train(X_train, y_train, X_test, y_test)

Epoch: 010 | AvgCost: 9.988 | Train/Valid ACC: 0.944/0.943
Epoch: 020 | AvgCost: 4.540 | Train/Valid ACC: 0.951/0.952
Epoch: 030 | AvgCost: 2.924 | Train/Valid ACC: 0.952/0.952
Epoch: 040 | AvgCost: 2.162 | Train/Valid ACC: 0.953/0.953
Epoch: 050 | AvgCost: 1.716 | Train/Valid ACC: 0.953/0.953
Epoch: 060 | AvgCost: 1.422 | Train/Valid ACC: 0.952/0.951
Epoch: 070 | AvgCost: 1.214 | Train/Valid ACC: 0.955/0.955
Epoch: 080 | AvgCost: 1.057 | Train/Valid ACC: 0.954/0.954
Epoch: 090 | AvgCost: 0.940 | Train/Valid ACC: 0.954/0.954
Epoch: 100 | AvgCost: 0.842 | Train/Valid ACC: 0.954/0.953
