In [92]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [62]:
import _pickle as cPickle, gzip, numpy
f = gzip.open('mnist.pkl.gz', 'rb')
train_set, valid_set, test_set = cPickle.load(f, encoding='latin1')
f.close()

In [63]:
X_train, Y_train = train_set
X_test, Y_test = test_set
labels = list(set(Y_train))
depth = len(labels)
print("labels: ", labels)
with tf.Session() as sess:
    YtrainOneHot = tf.one_hot(Y_train, depth, axis = 0)
    Y_train = sess.run(YtrainOneHot)
    YtestOneHot = tf.one_hot(Y_test, depth, axis = 0)
    Y_test = sess.run(YtestOneHot)
X_train = X_train.T
X_test = X_test.T
print("X_train dimension:{} ,Y_train dimention:{}".format(X_train.shape, Y_train.shape))
print("X_test dimension:{} ,Y_test dimention:{}".format(X_test.shape, Y_test.shape))

labels:  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
X_train dimension:(784, 50000) ,Y_train dimention:(10, 50000)
X_test dimension:(784, 10000) ,Y_test dimention:(10, 10000)


In [76]:
layer_dims = [784,25,25,10]

In [65]:
def placeholders(num_features, num_classes):
    A_0 = tf.placeholder(dtype = tf.float64, shape = ([num_features,None]))
    Y = tf.placeholder(dtype = tf.float64, shape = ([num_classes,None]))
    return A_0,Y

In [66]:
def initialize_parameters_deep(layer_dims):
    L = len(layer_dims)
    parameters = {}
    for l in range(1,L):
        parameters['W' + str(l)] = tf.Variable(initial_value=tf.random_normal([layer_dims[l], layer_dims[l-1]], dtype=tf.float64) * 0.01)
        parameters['b' + str(l)] = tf.Variable(initial_value=tf.zeros([layer_dims[l],1], dtype=tf.float64))
    return parameters 

In [67]:
def linear_forward_prop(A_prev,W,b, activation):
    Z = tf.add(tf.matmul(W, A_prev), b)
    if activation == "softmax":
        A = Z
    elif activation == "relu":
        A = tf.nn.relu(Z)
    return A

In [68]:
def l_layer_forwardProp(A_0, parameters):
    A = A_0
    L = len(parameters)//2
    for l in range(1,L):
        A_prev = A
        A = linear_forward_prop(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], "relu")
    A = linear_forward_prop(A, parameters['W' + str(L)], parameters['b' + str(L)], "softmax" )
    return A

In [80]:
def final_cost(Z_final, Y ):
    logits = tf.transpose(Z_final)
    labels = tf.transpose(Y)
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
    return cost

In [81]:
def random_samples_minibatch(X_train, Y_train, batch_size, seed = 1):
    np.random.seed(seed)
    m = X_train.shape[1]
    num_batches = int(m/batch_size)
    indices = np.random.permutation(m)
    shuffle_X = X_train[:,indices]
    shuffle_Y = Y_train[:,indices]
    mini_batches = []
    for i in range(num_batches):
        X_batch = shuffle_X[:,i * batch_size:(i+1) * batch_size]
        Y_batch = shuffle_Y[:,i * batch_size:(i+1) * batch_size]
        mini_batches.append((X_batch, Y_batch))
    if m % batch_size != 0:
        X_batch = shuffle_X[:, (num_batches * batch_size):]
        Y_batch = shuffle_Y[:, (num_batches * batch_size):]
        mini_batches.append((X_batch, Y_batch))
    return mini_batches

In [82]:
def model_with_minibatch(X_train,Y_train, layer_dims, learning_rate, num_iter, mini_batch_size):
    num_features, num_samples = X_train.shape
    num_classes = Y_train.shape[0]
    A_0, Y = placeholders(num_features, num_classes)
    parameters = initialize_parameters_deep(layer_dims)
    Z_final = l_layer_forwardProp(A_0, parameters)
    cost = final_cost(Z_final, Y)
    train_net = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
    seed = 1
    num_minibatches = int(num_samples / mini_batch_size)
    init = tf.global_variables_initializer()
    costs = []
    with tf.Session() as sess:
        sess.run(init)
        for epoch in range(num_iter):
            epoch_cost = 0
            mini_batches = random_samples_minibatch(X_train, Y_train, mini_batch_size, seed)
            seed = seed + 1
            for mini_batch in mini_batches:
                X_batch, Y_batch = mini_batch
                _,mini_batch_cost = sess.run([train_net, cost], feed_dict={A_0: X_batch, Y: Y_batch})
                epoch_cost += mini_batch_cost/num_minibatches
            if epoch % 100 == 0:
                costs.append(epoch_cost)
            if epoch % 1000 == 0:
                print(epoch_cost)
        plt.ylim(0.2, 3, 0.01)
        plt.xlabel("epoches per 100")
        plt.ylabel("cost")
        plt.plot(costs)
        plt.show()
        params = sess.run(parameters)
    return params

In [None]:
model_with_minibatch(X_train, Y_train, layer_dims, 0.01, 10000, 1024)

2.3504868158
