# 目录
### 1. 导入模块
### 2. 导入fashion_mnist数据
### 3. tf.data.Dataset制作dataset
  - `tf.data.Dataset.from_tensor_slices`
  - `shuffle`
  - `repeat`
  - `batch`
  
### 4. 定义模型（图）
  - 定义 features 和labels两个占位符
  - 把占位符传给dataset
  - 把迭代的数据传给模型
  
### 5. tf.Seesion 训练
>(使用tf.data的缺点：训练的过程中，只能使用同一批数据；如果想要验证的话，只能训练完后再使用验证数据)
### 6. 直接获取数据
  - placeholder
  - make_initializable_iterator
  - get_next
  - sess.run 初始化dataset
  - sess.run 取数据

## 1. 导入模块

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import sklearn

from tensorflow import keras
import tensorflow as tf
import sys
import os
import time
import datetime

for module in [np, pd, mpl, sklearn, keras, tf]:
    print(module.__name__, module.__version__)

numpy 1.17.2
pandas 0.25.1
matplotlib 3.1.1
sklearn 0.21.3
tensorflow.python.keras.api._v1.keras 2.2.4-tf
tensorflow 1.15.0


## 2. 导入fashion_mnist数据

In [2]:
fashion_mnist = keras.datasets.fashion_mnist

(x_train_all, y_train_all), (x_test, y_test) = fashion_mnist.load_data()

x_valid, x_train = x_train_all[:5000], x_train_all[5000:]
y_valid, y_train = y_train_all[:5000], y_train_all[5000:]

print(x_train.shape, y_train.shape)
print(x_valid.shape, y_valid.shape)
print(x_test.shape, y_test.shape)

(55000, 28, 28) (55000,)
(5000, 28, 28) (5000,)
(10000, 28, 28) (10000,)


In [3]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

x_train_scaled = scaler.fit_transform(x_train.astype(np.float32).reshape(-1,1)).reshape(-1, 28*28)
x_valid_scaled = scaler.transform(x_valid.astype(np.float32).reshape(-1,1)).reshape(-1, 28*28)
x_test_scaled = scaler.transform(x_test.astype(np.float32).reshape(-1,1)).reshape(-1, 28*28)

## 3. tf.data.Dataset制作dataset

In [4]:
def make_dataset(images, labels, shuffle=False, epochs=10, batch_size=32):
    dataset = tf.data.Dataset.from_tensor_slices((images, labels))
    if shuffle:
        dataset = dataset.shuffle(10000)
    dataset = dataset.repeat(epochs).batch(batch_size)
    return dataset

## 4. 定义模型（图）

In [5]:
# 网络结构
hidden_units = [100, 100]
class_num = 10

epochs = 10
batch_size = 32

x = tf.placeholder(tf.float32, shape=(None, 28*28))
y = tf.placeholder(tf.int64, shape=(None))

train_dataset = make_dataset(x, y, shuffle=True, batch_size=batch_size, epochs=epochs)
train_dataset_iter = train_dataset.make_initializable_iterator()
x_dataset, y_dataset = train_dataset_iter.get_next()

input_for_next_layer = x_dataset
for hidden_unit in hidden_units:
    input_for_next_layer = tf.layers.dense(input_for_next_layer, units=hidden_unit, activation=tf.nn.relu)

logits = tf.layers.dense(input_for_next_layer, class_num)

# 计算损失
loss = tf.losses.sparse_softmax_cross_entropy(labels=y_dataset, logits=logits)

# 预测
prediction = tf.argmax(logits, axis=1)

# 计算准确率
correct_prediction = tf.equal(prediction, y_dataset)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# 训练操作
train_op = tf.train.AdamOptimizer(1e-3).minimize(loss)

Instructions for updating:
Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_initializable_iterator(dataset)`.
Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use `layer.__call__` method instead.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


## 5. tf.Seesion 训练

In [6]:
init = tf.global_variables_initializer()

train_step_per_epoch = 1718


with tf.Session() as sess:
    sess.run(init)
    # dataset 初始化
    sess.run(train_dataset_iter.initializer, feed_dict={x: x_train_scaled, y: y_train})
    
    for epoch in range(epochs):
        for step in range(train_step_per_epoch):
            # 推理和反向传播
            loss_value, prediction_value, accuracy_value, _ = sess.run(
                [loss, prediction, accuracy, train_op]
            )
            print("\r[train] epoch: {}, loss: {:.5f}, acc: {:.2f}".format(epoch, loss_value, accuracy_value), end="")
        print()
            


[train] epoch: 0, loss: 0.22728, acc: 0.94
[train] epoch: 1, loss: 0.57262, acc: 0.75
[train] epoch: 2, loss: 0.54824, acc: 0.88
[train] epoch: 3, loss: 0.25395, acc: 0.94
[train] epoch: 4, loss: 0.55037, acc: 0.84
[train] epoch: 5, loss: 0.22819, acc: 0.94
[train] epoch: 6, loss: 0.11044, acc: 0.97
[train] epoch: 7, loss: 0.17002, acc: 0.88
[train] epoch: 8, loss: 0.27831, acc: 0.88
[train] epoch: 9, loss: 0.35195, acc: 0.91


## 6. 直接获取数据

In [7]:
# placeholoder
x = tf.placeholder(tf.float32, shape=(None, 28*28))
y = tf.placeholder(tf.int64, shape=(None))

# placeholoder --> dataset
dataset = make_dataset(x, y, shuffle=False, batch_size=5)
dataset_iter = dataset.make_initializable_iterator()
x_dataset, y_dataset = dataset_iter.get_next()

# 1. run 初始化 2. run 取数据
with tf.Session() as sess:
    sess.run(dataset_iter.initializer, feed_dict={x: x_test_scaled, y: y_test})
    x_value_test, y_value_test = sess.run([x_dataset, y_dataset])
    print(x_value_test)
    print(y_value_test)
    
    sess.run(dataset_iter.initializer, feed_dict={x: x_train_scaled, y: y_train})
    x_value_train, y_value_train = sess.run([x_dataset, y_dataset])
    print(x_value_train)
    print(y_value_train)
    
    sess.run(dataset_iter.initializer, feed_dict={x: x_train_scaled, y: y_train})
    x_value_train2, y_value_train2 = sess.run([x_dataset, y_dataset])
    print(x_value_train2)
    print(y_value_train2)
    
    x_value_train, y_value_train = sess.run([x_dataset, y_dataset])
    print(x_value_train)
    print(y_value_train)
    
    print("======")
    x_value_train2, y_value_train2 = sess.run([x_dataset, y_dataset])
    print(x_value_train2)
    print(y_value_train2)
    

[[-0.8105136 -0.8105136 -0.8105136 ... -0.8105136 -0.8105136 -0.8105136]
 [-0.8105136 -0.8105136 -0.8105136 ... -0.8105136 -0.8105136 -0.8105136]
 [-0.8105136 -0.8105136 -0.8105136 ... -0.8105136 -0.8105136 -0.8105136]
 [-0.8105136 -0.8105136 -0.8105136 ... -0.8105136 -0.8105136 -0.8105136]
 [-0.8105136 -0.8105136 -0.8105136 ... -0.8105136 -0.8105136 -0.8105136]]
[9 2 1 1 6]
[[-0.8105136 -0.8105136 -0.8105136 ... -0.8105136 -0.8105136 -0.8105136]
 [-0.8105136 -0.8105136 -0.8105136 ... -0.8105136 -0.8105136 -0.8105136]
 [-0.8105136 -0.8105136 -0.8105136 ... -0.8105136 -0.8105136 -0.8105136]
 [-0.8105136 -0.8105136 -0.8105136 ... -0.8105136 -0.8105136 -0.8105136]
 [-0.8105136 -0.8105136 -0.8105136 ... -0.8105136 -0.8105136 -0.8105136]]
[4 0 7 9 9]
[[-0.8105136 -0.8105136 -0.8105136 ... -0.8105136 -0.8105136 -0.8105136]
 [-0.8105136 -0.8105136 -0.8105136 ... -0.8105136 -0.8105136 -0.8105136]
 [-0.8105136 -0.8105136 -0.8105136 ... -0.8105136 -0.8105136 -0.8105136]
 [-0.8105136 -0.8105136 -