<a href="https://colab.research.google.com/github/foochane/Tensorflow-neural-network-framework/blob/master/04TensorFlow%E6%9E%84%E5%BB%BA%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C%E4%BB%A3%E7%A0%81%E7%A4%BA%E4%BE%8B.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 加载数据

In [0]:
import tensorflow as tf
import numpy as np
# 导入数据
(train_images, train_labels), (test_images, test_labels)= tf.keras.datasets.mnist.load_data()

In [0]:
# 数据预处理
x_train = train_images.reshape((60000, 28 * 28))
x_train = x_train.astype('float32') / 255

x_test = test_images.reshape((10000, 28 * 28))
x_test = x_test.astype('float32') / 255

def num2vocter(y):
  v = np.zeros(10)
  v[y] =1.0
  return v
y_train = np.array([num2vocter(y) for y in train_labels])
y_test = np.array([num2vocter(y) for y in test_labels])

## 版本1：简单神经网络

In [0]:
#定义参数
NUM_TRAIN_DATA = len(x_train) #所有训练数据的条数
EPOCHS = 50 #训练的轮数
BATCH = 1000 #每次喂入神经网络的数据条数

#创建网络
x = tf.placeholder(tf.float32,[None,784])#定义两个placeholder
y = tf.placeholder(tf.float32,[None,10])

W = tf.Variable(tf.zeros([784,10]))
b = tf.Variable(tf.zeros([10]))
y_hat = tf.nn.softmax(tf.matmul(x,W)+b)

#定义代价函数
loss = tf.reduce_mean(tf.square(y-y_hat))

#定义优化器
train_step = tf.train.GradientDescentOptimizer(0.2).minimize(loss)#使用梯度下降法

#定义准确率
#结果存放在一个布尔型列表中
correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(y_hat,1))#argmax返回一维张量中最大的值所在的位置
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))

#初始化变量
init = tf.global_variables_initializer()

#训练模型
with tf.Session() as sess:
  sess.run(init)
  for epoch in range(EPOCHS):  
    x_batches = [x_train[k:k+BATCH] for k in range(0,NUM_TRAIN_DATA,BATCH)]
    y_batches = [y_train[k:k+BATCH] for k in range(0,NUM_TRAIN_DATA,BATCH)]
    for xs,ys in zip(x_batches,y_batches):
      sess.run(train_step,feed_dict={x:xs,y:ys})    
  
    if epoch%10 == 0:
      test_accuracy = sess.run(accuracy,feed_dict={x:x_test,y:y_test})
      print("step %d, test accuracy %g" %(epoch,test_accuracy))

## 版本2：多层卷积网络



In [0]:
#每个批次的大小
BATCH = 100

#训练轮数
EPOCHS = 500

NUM_TRAIN_DATA = len(x_train) #所有训练数据的条数


#构造网络
def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial)

def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)

def conv2d(x, W):
  return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
  return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

x = tf.placeholder("float", shape=[None, 784])
y = tf.placeholder("float", shape=[None, 10])

# 第一层卷积
# 现在我们可以开始实现第一层了。它由一个卷积接一个max pooling完成。卷积在每个5x5的patch中算出32个特征。卷积的权重张量形状是[5, 5, 1, 32]，前两个维度是patch的大小，接着是输入的通道数目，最后是输出的通道数目。 而对于每一个输出通道都有一个对应的偏置量。

W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])

# 为了用这一层，我们把x变成一个4d向量，其第2、第3维对应图片的宽、高，最后一维代表图片的颜色通道数(因为是灰度图所以这里的通道数为1，如果是rgb彩色图，则为3)。
x_image = tf.reshape(x, [-1,28,28,1])
# We then convolve x_image with the weight tensor, add the bias, apply the ReLU function, and finally max pool. 我们把x_image和权值向量进行卷积，加上偏置项，然后应用ReLU激活函数，最后进行max pooling。

h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

# 第二层卷积
# 为了构建一个更深的网络，我们会把几个类似的层堆叠起来。第二层中，每个5x5的patch会得到64个特征。

W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
# 密集连接层
# 现在，图片尺寸减小到7x7，我们加入一个有1024个神经元的全连接层，用于处理整个图片。我们把池化层输出的张量reshape成一些向量，乘上权重矩阵，加上偏置，然后对其使用ReLU。

W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
# Dropout
# 为了减少过拟合，我们在输出层之前加入dropout。我们用一个placeholder来代表一个神经元的输出在dropout中保持不变的概率。这样我们可以在训练过程中启用dropout，在测试过程中关闭dropout。 TensorFlow的tf.nn.dropout操作除了可以屏蔽神经元的输出外，还会自动处理神经元输出值的scale。所以用dropout的时候可以不用考虑scale。

keep_prob = tf.placeholder("float")
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
# 输出层
# 最后，我们添加一个softmax层，就像前面的单层softmax regression一样。


W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

y_hat = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

# 定义代价函数
loss = tf.reduce_mean(tf.square(y-y_hat))
# cross_entropy = -tf.reduce_sum(y*tf.log(y_hat))

# 定义优化器
train_step = tf.train.GradientDescentOptimizer(0.2).minimize(loss)#使用梯度下降法
# train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

#定义准确率
correct_prediction = tf.equal(tf.argmax(y_hat,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

# 训练模型
with tf.Session() as sess:

  #初始化参数
  sess.run(tf.global_variables_initializer())
  sess.run(tf.local_variables_initializer())

  for epoch in range(EPOCHS):  
    x_batches = [x_train[k:k+BATCH] for k in range(0,NUM_TRAIN_DATA,BATCH)]
    y_batches = [y_train[k:k+BATCH] for k in range(0,NUM_TRAIN_DATA,BATCH)]
    for xs,ys in zip(x_batches,y_batches):
      sess.run(train_step,feed_dict={x:xs,y:ys,keep_prob: 0.5})   
  
    if epoch%10 == 0:
      train_accuracy = sess.run(accuracy,feed_dict={x:xs, y: ys, keep_prob: 1.0})
      test_accuracy = sess.run(accuracy,feed_dict={x:x_test,y:y_test,keep_prob: 1.0})
      print("step %d, train accuracy %g,test accuracy %g" %(epoch,train_accuracy,test_accuracy))



## 版本3: Tensorflow旧版

In [0]:
import os
if not os.path.exists("MNIST_data"):
  os.mkdir("MNIST_data")

!wget -P ./MNIST_data https://github.com/foochane/Tensorflow-neural-network-framework/raw/master/MNIST_data/t10k-images-idx3-ubyte.gz
!wget -P ./MNIST_data https://github.com/foochane/Tensorflow-neural-network-framework/raw/master/MNIST_data/t10k-labels-idx1-ubyte.gz
!wget -P ./MNIST_data https://github.com/foochane/Tensorflow-neural-network-framework/raw/master/MNIST_data/train-images-idx3-ubyte.gz
!wget -P ./MNIST_data https://github.com/foochane/Tensorflow-neural-network-framework/raw/master/MNIST_data/train-labels-idx1-ubyte.gz

In [0]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

#每个批次的大小
BATCH = 100

#训练轮数
EPOCHS = 500


def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial)

def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)

def conv2d(x, W):
  return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
  return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

x = tf.placeholder("float", shape=[None, 784])
y = tf.placeholder("float", shape=[None, 10])

# 第一层卷积
# 现在我们可以开始实现第一层了。它由一个卷积接一个max pooling完成。卷积在每个5x5的patch中算出32个特征。卷积的权重张量形状是[5, 5, 1, 32]，前两个维度是patch的大小，接着是输入的通道数目，最后是输出的通道数目。 而对于每一个输出通道都有一个对应的偏置量。

W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])

# 为了用这一层，我们把x变成一个4d向量，其第2、第3维对应图片的宽、高，最后一维代表图片的颜色通道数(因为是灰度图所以这里的通道数为1，如果是rgb彩色图，则为3)。
x_image = tf.reshape(x, [-1,28,28,1])
# We then convolve x_image with the weight tensor, add the bias, apply the ReLU function, and finally max pool. 我们把x_image和权值向量进行卷积，加上偏置项，然后应用ReLU激活函数，最后进行max pooling。

h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

# 第二层卷积
# 为了构建一个更深的网络，我们会把几个类似的层堆叠起来。第二层中，每个5x5的patch会得到64个特征。

W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
# 密集连接层
# 现在，图片尺寸减小到7x7，我们加入一个有1024个神经元的全连接层，用于处理整个图片。我们把池化层输出的张量reshape成一些向量，乘上权重矩阵，加上偏置，然后对其使用ReLU。

W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
# Dropout
# 为了减少过拟合，我们在输出层之前加入dropout。我们用一个placeholder来代表一个神经元的输出在dropout中保持不变的概率。这样我们可以在训练过程中启用dropout，在测试过程中关闭dropout。 TensorFlow的tf.nn.dropout操作除了可以屏蔽神经元的输出外，还会自动处理神经元输出值的scale。所以用dropout的时候可以不用考虑scale。

keep_prob = tf.placeholder("float")
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
# 输出层
# 最后，我们添加一个softmax层，就像前面的单层softmax regression一样。


W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

y_hat = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

# 定义代价函数
loss = tf.reduce_mean(tf.square(y-y_hat))
# cross_entropy = -tf.reduce_sum(y*tf.log(y_hat))

# 定义优化器
train_step = tf.train.GradientDescentOptimizer(0.2).minimize(loss)#使用梯度下降法
# train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

#定义准确率
correct_prediction = tf.equal(tf.argmax(y_hat,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))


#训练模型
with tf.Session() as sess:
  sess.run(tf.global_variables_initializer())
  for i in range(EPOCHS):
    # batch = mnist.train.next_batch(BATCH)
    # train_step.run(feed_dict={x: batch[0], y: batch[1], keep_prob: 0.5})

    train_step.run(feed_dict={x: mnist.train.images[:1000], y: mnist.train.labels[:1000], keep_prob: 0.5})

    if i%10 == 0:
      # train_accuracy = accuracy.eval(feed_dict={x:batch[0], y: batch[1], keep_prob: 1.0})
      train_accuracy = sess.run(accuracy,feed_dict={x:batch[0], y: batch[1], keep_prob: 1.0})

      # test_accuracy = accuracy.eval(feed_dict={x: mnist.test.images, y: mnist.test.labels, keep_prob: 1.0})
      test_accuracy = sess.run(accuracy,feed_dict={x: mnist.test.images, y: mnist.test.labels, keep_prob: 1.0})
      print("step %d, train accuracy %g,test accuracy %g" %(epoch,train_accuracy,test_accuracy))


# eval() 其实就是tf.Tensor的Session.run() 的另外一种写法。加上一个Session context manager：
# with tf.Session() as sess:
#   print(accuracy.eval({x:mnist.test.images,y_: mnist.test.labels}))
  
# 其效果和下面的代码是等价的：
# with tf.Session() as sess:
#   print(sess.run(accuracy, {x:mnist.test.images,y_: mnist.test.labels}))
  
# 但是要注意的是，eval()只能用于tf.Tensor类对象，也就是有输出的Operation。对于没有输出的Operation, 可以用.run()或者Session.run()。Session.run()没有这个限制。




## 版本4：Keras版本

In [0]:
from keras.datasets import mnist
from keras.utils import to_categorical

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype('float32') / 255

test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype('float32') / 255

train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

# 训练集、验证集和测试集, 从训练集种取出10000个样本作为验证集
x_train = train_images[:50000]
y_train = train_labels[:50000]

x_validation = train_images[50000:]
y_validation = train_labels[50000:]

x_test = test_images
y_test = test_labels


# 2 定义模型
from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Dense(512, activation='relu', input_shape=(28 * 28,)))
model.add(layers.Dense(10, activation='softmax'))

# 3 配置优化器、损失函数和指标
model.compile(optimizer='rmsprop',
                loss='categorical_crossentropy',
                metrics=['accuracy'])

# 4 训练
history = model.fit(x_train, 
                    y_train, 
                    epochs=100, 
                    batch_size=1000,
                    validation_data=(x_validation, y_validation))

## 版本5：Tensorflow新版

In [0]:
import tensorflow as tf
mnist = tf.keras.datasets.mnist

(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(512, activation=tf.nn.relu),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(x_train, y_train, epochs=5)
model.evaluate(x_test, y_test)