# TensorFlow 在 MNIST 中的应用
接下来会使用，softmax, cnn, rnn以及自编码器的方式来在MNIST数据集上训练模型，来更好的上手tensorflow

## SoftMax 分类

In [4]:
# 1.加载数据
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

# flags = tf.app.flags
# FLAGS = flags.FLAGS
# flags.DEFINE_string('data_dir', './datasets/mnist/', 'Directory for storing data')
mnist = input_data.read_data_sets("./datasets/mnist/", one_hot=True)

# 使用one_hot 的直接原因是，我们使用0～9 个类别的多分类的输出层是softmax 层，它的输
# 出是一个概率分布，从而要求输入的标记也以概率分布的形式出现，进而可以计算交叉熵

Extracting ./datasets/mnist/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ./datasets/mnist/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting ./datasets/mnist/t10k-images-idx3-ubyte.gz
Extracting ./datasets/mnist/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [5]:
# 2.构造模型（定义计算图）

# 定义输入数据的占位符
x = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])

# 定义参数
w = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))

# 定义模型
y_pred = tf.matmul(x, w) + b

# 定义损失函数(交叉熵损失函数)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    labels=y, logits=y_pred)) #在这个函数里进行softmax
# 定义优化器
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(loss)

In [10]:
# 3.训练模型

# 全局初始化器
init = tf.global_variables_initializer()
# 创建交互式上下文的TensorFlow 会话 ,与常规会话不同的是，交互式会话会成为默认会话
sess = tf.InteractiveSession()
#with tf.Session() as sess:
sess.run(init)

# 迭代1000次，batchsize=128
for _ in range(1000):
    batch_x, batch_y = mnist.train.next_batch(128)
    sess.run(train_step, feed_dict={x: batch_x, y: batch_y})
        

In [11]:
# 4.评估模型

# 由于使用了 onehot编码，label从一维变为了十维,tf.argmax(y,1)返回的是模型对任一输入 x 预测到的标记值，
# tf.argmax(y_,1)代表正确的标记值。我们用 tf.equal 来检测预测值和真实值是否匹配，
# 并且将预测后得到的布尔值转化成浮点数，并取平均值

# 评估训练好的模型
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_pred, 1)) # 类似 y==y_pred,每行比较
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # sum(y,y_pred,axis=0)/len(y)

# 计算模型在测试集上的准确率 
print(sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels}))

0.9208


## CNN 分类

In [1]:
# 1. 导入库， 载数据
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from sklearn.model_selection import train_test_split
mnist = input_data.read_data_sets('./datasets/mnist/', one_hot=True)
# 划分训练集和测试集
trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels
# 转化为4维
trX, teX = list(map(lambda x: x.reshape([-1, 28, 28, 1]), [trX, teX]))
#print(trX[0])

  from ._conv import register_converters as _register_converters


Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ./datasets/mnist/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ./datasets/mnist/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting ./datasets/mnist/t10k-images-idx3-ubyte.gz
Extracting ./datasets/mnist/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [2]:
# 2.构造模型，计算图
# 网络结构: 3 个卷积层和3 个池化层 1 个全连接层和 1 个输出层
# 输入数据的占位符
x = tf.placeholder(tf.float32, [None, 28, 28, 1])
y = tf.placeholder(tf.float32, [None, 10])

# 定义权重
# 卷积核大小均为 3*3
w1 = tf.Variable(tf.random_normal([3, 3, 1, 32], stddev=.01), name='w1') # 32个卷积核
w2 = tf.Variable(tf.random_normal([3, 3, 32, 64], stddev=.01), name='w2') # 64个卷积核
w3 = tf.Variable(tf.random_normal([3, 3, 64, 128], stddev=.01), name='w3') # 128个卷积核
w4 = tf.Variable(tf.random_normal([128*4*4, 625], stddev=.01), name='w4') # 全连接层， 输入维度为 128 × 4 × 4,是上一层的输出数据又三维的转变成一维
wo = tf.Variable(tf.random_normal([625, 10], stddev=.01), name='wo') # 输出层，输出维度10代表结果为10个类别
# dropout
prob_keep_conv, prob_keep_full= 0.8, 0.5
# 定义网络结构
# conv1
l1 = tf.nn.conv2d(x, w1, strides=[1,1,1,1], padding='SAME')#shape=(?, 28, 28, 32) 
l1 = tf.nn.relu(l1)
l1 = tf.nn.max_pool(l1, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME') # shape=(?, 14, 14, 32) 
l1 = tf.nn.dropout(l1, prob_keep_conv)
# conv2
l2 = tf.nn.conv2d(l1, w2, strides=[1,1,1,1], padding='SAME') # shape=(?, 14, 14, 64) 
l2 = tf.nn.relu(l2)
l2 = tf.nn.max_pool(l2, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME') # shape=(?, 7, 7, 64) 
l2 = tf.nn.dropout(l2, prob_keep_conv)
# conv3
l3 = tf.nn.conv2d(l2, w3, strides=[1,1,1,1], padding='SAME') # shape=(?, 7, 7, 128) 
l3 = tf.nn.relu(l3)
l3 = tf.nn.max_pool(l3, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME') # shape=(?, 4, 4, 128) 
l3 = tf.reshape(l3, [-1, w4.get_shape().as_list()[0]]) # reshape to (? 2048)
l3 = tf.nn.dropout(l3, prob_keep_conv)
# full connection 1
l4 = tf.matmul(l3, w4)
l4 = tf.nn.relu(l4)
l4 = tf.nn.dropout(l4, prob_keep_full)
# 输出层
out = tf.matmul(l4, wo) 
# 注意：最后一层现在没有使用softmax，到计算损失函数的时候在使用,其实预测类别时无须使用softmax，值最大的经过softmax还是最大，求概率和交叉熵时使用

# 定义损失函数和优化器
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=out))
train_opt = tf.train.RMSPropOptimizer(learning_rate=.001, decay=.9).minimize(cost) # 学习率为0.001，衰减值为 0.9

predict_cls = tf.argmax(out, 1)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.



In [None]:
# 训练模型 和 评估模型
#print(trX.shape)
def get_next_batch(x, y, batch_size):
    print(x.shape)
    shuffle_train_indices = np.arange(len(trX))
    np.random.shuffle(shuffle_train_indices)
    x, y = x[shuffle_train_indices], y[shuffle_train_indices]
    batch_nums = len(x) // batch_size
    for i in range(0, batch_nums):
        yield x[i:(i+1)*batch_size], y[i:(i+1)*batch_size]
    now_ = batch_nums * batch_size 
    if now_ < len(x):
        yield x[now_:], y[now_:]

batch_size = 128
test_size = 256

with tf.Session() as sess:
    tf.global_variables_initializer().run() # 全局Variable 初始化
    for i in range(10):
        batch = 0
        for batch_x, batch_y in get_next_batch(trX, trY, batch_size):
            sess.run(train_opt, feed_dict={x: batch_x, y: batch_y})
            print("batch: {}, cost: {}".format(batch, sess.run(cost, feed_dict={x: batch_x, y: batch_y})))
            batch += 1
        #print("iters: {}, cost: {}".format(i+1, sess.run(cost, feed_dict={x: trX, y: trY})))
            
    # 评估模型
    correct_prediction = tf.reduce_sum(tf.argmax(y, 1), predict_cls)
    accuracy = correct_prediction / len(x)
    print(sess.run(accuracy, feed_dict={x: teX, y: teY}))
    

(55000, 28, 28, 1)
batch: 0, cost: 2.302584171295166
batch: 1, cost: 2.3025898933410645
batch: 2, cost: 2.3025877475738525
batch: 3, cost: 2.302583932876587
batch: 4, cost: 2.302582025527954


## RNN 分类

In [1]:
# 1. 加载数据 导入库
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets('./datasets/mnist', one_hot=True)

  from ._conv import register_converters as _register_converters


Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ./datasets/mnist\train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ./datasets/mnist\train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting ./datasets/mnist\t10k-images-idx3-ubyte.gz
Extracting ./datasets/mnist\t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [None]:
# 2.构建模型

# 设置超参数
lr = .001 # 学习率
train_iters = 10000 # 迭代次数
batch_size = 128 # mini_batch

# 设置网络参数
# 为了使用 RNN 来分类图片，把每张图片的行看成是一个像素序列
# 每一步输入的序列长度是28，输入的步数是28 步
n_inputs = 28
n_steps = 28
n_hidden_units = 128 # 隐层神经元的个数
n_classes = 10

# 输入数据的占位符
x = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, 10])

# 定义初始权重, rnn中参数共享
weights = {
    'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),
    'out': tf.Variable(tf.random_normal([n_hidden_units, n_classes]))
}
biases = {
    'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])), 
    'out': tf.Variable(tf.constant(0.1, shape=[n_classes, ]))
}

# 定义RNN模型
def RNN(x, weights, biases):
    # X ==> (128 batch * 28 steps, 28 inputs)
    X = tf.reshape(x, [-1, n_inputs])
    
    # 进入隐藏层
    X_in = tf.matmul(X, weights['in']) + biases['in'] # X_in > [128 batch*28 steps, 128 hidden_units]
    # X_in > [128 batch, 28 step, 128 hidden unit]
    X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units])
    
    # 基本的LSTM 循环网络单元
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden_units, forget_bias=1.0, state_is_tuple=True)
    # 初始化为零值，lstm单元由两个部分组成：(c_state, h_state)
    init_state = lstm_cell.zero_state(batch_size, dtype=tf.float32)
    # dynamic_rnn 接收输入张量(batch, steps, inputs)
    outputs, final_states = tf.nn.dynamic_rnn(lstm_cell, X_in, initial_state=init_state, time_major=False)
    
    results = tf.matmul(final_states[1], weights['out']) + biases['out']
    return results

# 定义损失函数和优化器
y_pred = RNN(x, weights, biases)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=y_pred)) # 先softmax 后交叉熵
train_optimizer = tf.train.AdamOptimizer(lr).minimize(cost)

In [None]:
# 训练和评估模型
correct_prediction = tf.reduce_sum(tf.equal(tf.argmax(y_pred, 1), tf.argmax(y, 1)))
accuracy = correct_prediction / len(y)

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    step = 0
    while step*batch_size < n_iters:
        batch_X, batch_y = mnist.train.next_batch(batch_size)
        batch_X = tf.reshape(batch_X, [batch_size, n_steps, n_inputs])
        sess.run(train_opt, feed_dict={x: batch_X, y: batch_y})
        
        if step % 50 == 0:
            now_loss = sess.run(cost, feed_dict={x: batch_X, y: batch_y})
            print(now_loss)
        step += 1