In [1]:
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
def random_mini_batches(X, Y, mini_batch_size = 64):
    """
    Creates a list of random minibatches from (X, Y)

    Arguments:
    X -- input data, of shape (input size, number of examples)
    Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)
    mini_batch_size - size of the mini-batches, integer
    
    Returns:
    mini_batches -- list of synchronous (mini_batch_X, mini_batch_Y)
    """

    m = X.shape[1]                  # number of training examples
    mini_batches = []

    # Step 1: Shuffle (X, Y)
    permutation = list(np.random.permutation(m))
    shuffled_X = X[:, permutation]
    shuffled_Y = Y[:, permutation]#.reshape((Y.shape[0],m))

    # Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case.
    num_complete_minibatches = m // mini_batch_size # number of mini batches of size mini_batch_size in your partitionning
    for k in range(0, num_complete_minibatches):
        mini_batch_X = shuffled_X[:, k * mini_batch_size : k * mini_batch_size + mini_batch_size]
        mini_batch_Y = shuffled_Y[:, k * mini_batch_size : k * mini_batch_size + mini_batch_size]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)

    # Handling the end case (last mini-batch < mini_batch_size)
    if m % mini_batch_size != 0:
        mini_batch_X = shuffled_X[:, num_complete_minibatches * mini_batch_size : m]
        mini_batch_Y = shuffled_Y[:, num_complete_minibatches * mini_batch_size : m]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)

    return mini_batches


In [3]:
# load the mnist data set
def load_data():
    mnist = input_data.read_data_sets('G:/MNIST_data/', one_hot = True)
    train_data = mnist.train.images.T
    train_label = mnist.train.labels.T
    validation_data = mnist.validation.images.T
    validation_label = mnist.validation.labels.T
    test_data = mnist.test.images.T
    test_label = mnist.test.labels.T
    return train_data,train_label,validation_data,validation_label,test_data,test_label
# train_data,train_label,validation_data,validation_label,test_data,test_label = load_data()
# print('train:',train_label.shape)
# print('validation:',validation_data.shape)
# print('test:',test_data.shape)

def create_placeholders(n_x, n_y):
    ''' create the placeholders for the tensorflow session.
    
    arguments:
    n_x -- scalar, size of an image vector (28*28 = 784)
    n_y -- scalar, scalar, number of classe (from 0 to 9, -> 10)
    
    returns:
    X -- placeholder for the data input, of shape [n_x, None] and dtype "float"
    Y -- placeholder for the input labels, of shape [n_y, None] and dtype "float"
    '''
    
    X = tf.placeholder(tf.float32, shape = [n_x, None], name = 'X')
    Y = tf.placeholder(tf.float32, shape = [n_y, None], name = 'Y')
    
    return X,Y

def initialize_parameters(layer_dims):
    ''' initializes parameters to build a neural network with tensorflow.
        (layer_dims: the numbers of layers)
    
    argument:
    weight: W; bias: b
    '''
    
    parameters = {}
    L = len(layer_dims)
    for l in range(1, L):
        parameters['W' + str(l)] = tf.get_variable('W'+str(l), [layer_dims[l], layer_dims[l-1]], initializer = tf.contrib.layers.xavier_initializer())
        parameters['b' + str(l)] = tf.get_variable('b'+str(l), [layer_dims[l], 1], initializer = tf.zeros_initializer())
        print("W"+str(l), parameters['W' + str(l)])
        print("b"+str(l), parameters['b' + str(l)])
    return parameters

def forward_propagation(X, parameters, p = 1.0):
    '''  Implements the forward propagation for the model: 
    LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SOFTMAX
    
    '''
    
    L = len(parameters) // 2
    A = X
    
    for l in range(1, L):
        A_ = A
        A = tf.nn.relu(tf.matmul(parameters['W'+ str(l)], A_) + parameters['b'+str(l)])
    ZL = tf.matmul(parameters['W'+ str(L)], A) + parameters['b'+str(L)]
    
    return ZL

def fc_layer(n, inputs, input_dims, out_dims, activation = None):
    '''全连接层'''
    W = tf.get_variable('W' + str(n), [out_dims, input_dims], initializer = tf.contrib.layers.xavier_initializer())
    b = tf.get_variable('b', + str(n), [out_dims, 1], initializer = tf.zeros_initializer())
    Z = tf.add(tf.matmul(W, inputs), b)
    if activation == None:
        outputs = Z
    elif activation == 'relu':
        outputs = tf.nn.relu(Z)
    
    return outputs

# tf.reset_default_graph()
# with tf.Session() as sess:
#     layer_dims = [2,3,4]
#     X,Y = create_placeholders(2, 4)
#     parameters = initialize_parameters(layer_dims)
#     print("W1 = " + str(parameters["W1"]))
#     print("b1 = " + str(parameters["b1"]))
#     print("W2 = " + str(parameters["W2"]))
#     print("b2 = " + str(parameters["b2"]))
#     ZL = forward_propagation(X, parameters)
#     print(ZL)

def compute_cost(ZL, Y):
    ''' '''
    ZL = tf.transpose(ZL)
    Y = tf.transpose(Y)
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = ZL, labels = Y))
    
    return cost

def model_train(train_data, train_label, val_data, val_label, layer_dims, learning_rate = 0.01,
          num_epochs = 100, minibatch_size = 64, print_cost = True):
    
    ops.reset_default_graph()
    n_x, n_y = train_data.shape[0], train_label.shape[0]
    print(n_x,n_y)
    X, Y = create_placeholders(n_x, n_y)
    print(X,Y)
    parameters = initialize_parameters(layer_dims)
    ZL = forward_propagation(X, parameters)
    print(ZL)
    cost = compute_cost(ZL, Y)
    print(cost)
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
    
    correct_prediction = tf.equal(tf.argmax(ZL, 0), tf.argmax(Y, 0))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        
        for epoch in range(num_epochs):
            epoch_cost = 0
            num_minibatches = int(train_data.shape[1] / minibatch_size)
#             print(num_minibatches)
            minibatches = random_mini_batches(train_data, train_label, minibatch_size)
                        
            for minibatch in minibatches:
                (mini_X, mini_Y) = minibatch  #                 mini_X, mini_Y = mnist.train.next_batch(minibatch_size)#                 tr_X ,Tr_Y= mini_X.T, mini_Y.T
#                 print(mini_X.shape, mini_Y.shape)
                _, mini_cost = sess.run([optimizer, cost], feed_dict = {X:mini_X, Y:mini_Y})                
                epoch_cost += mini_cost / num_minibatches               
#             print(mini_cost)
#             print(epoch_cost)
            
            val_loss,acc = sess.run([cost,accuracy], feed_dict = {X:val_dyrrrrrrrrrata, Y:val_label})
                
            if print_cost == True and (epoch+1) % 10 == 0:
                print("Train epoch: {}, Train_loss = {:.6f} || Validation_loss = {:.6f}, Accuracy = {:.4f}".format(epoch+1, epoch_cost, val_loss, acc))
    return parameters

In [4]:
layer_dims = [784,256,100,10]
train_data,train_label,val_data,val_label,test_data,test_label = load_data()
peremeters = model_train(train_data, train_label, val_data, val_label, layer_dims,minibatch_size = 100)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting G:/MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting G:/MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting G:/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting G:/MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
784 10
Tensor("X:0", shape=(784, ?), dtype=float32) Tensor("Y:0", shape=(10, ?), dtype=float32)
W1 <tf.Variable 'W1:0' shape=(256, 784) dtype=float32_ref>
b1 <tf.Variable 'b1:0' shape=(256, 1) dtype=float32_ref>
W2 <tf.Variable 'W2:0' shape=(100, 256) dtype=float32_ref>
b2 <tf.Variable 'b