# 目录
### 1. 导入模块
### 2. 导入fashion_mnist数据
### 3. tf.data.Dataset制作dataset
  - `tf.data.Dataset.from_tensor_slices`
  - `shuffle`
  - `repeat`
  - `batch`
#### 3.1 使用tf2.0 的方法取数据（报错）
####  3.2 make_one_shot_iterator + get_next + session 获取数据
  
### 4. 定义模型（图）
### 5. tf.Seesion 训练
>使用tf.data的缺点：训练的过程中，只能使用同一批数据；如果想要验证的话，只能训练完后再使用验证数据)

## 1. 导入模块

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import sklearn

from tensorflow import keras
import tensorflow as tf
import sys
import os
import time
import datetime

for module in [np, pd, mpl, sklearn, keras, tf]:
    print(module.__name__, module.__version__)

numpy 1.17.2
pandas 0.25.1
matplotlib 3.1.1
sklearn 0.21.3
tensorflow.python.keras.api._v1.keras 2.2.4-tf
tensorflow 1.15.0


## 2. 导入fashion_mnist数据

In [2]:
fashion_mnist = keras.datasets.fashion_mnist

(x_train_all, y_train_all), (x_test, y_test) = fashion_mnist.load_data()

x_valid, x_train = x_train_all[:5000], x_train_all[5000:]
y_valid, y_train = y_train_all[:5000], y_train_all[5000:]

print(x_train.shape, y_train.shape)
print(x_valid.shape, y_valid.shape)
print(x_test.shape, y_test.shape)

(55000, 28, 28) (55000,)
(5000, 28, 28) (5000,)
(10000, 28, 28) (10000,)


In [3]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

x_train_scaled = scaler.fit_transform(x_train.astype(np.float32).reshape(-1,1)).reshape(-1, 28*28)
x_valid_scaled = scaler.transform(x_valid.astype(np.float32).reshape(-1,1)).reshape(-1, 28*28)
x_test_scaled = scaler.transform(x_test.astype(np.float32).reshape(-1,1)).reshape(-1, 28*28)

## 3.  tf.data.Dataset制作dataset

In [4]:
def make_dataset(images, labels, shuffle=False, epochs=10, batch_size=32):
    dataset = tf.data.Dataset.from_tensor_slices((images, labels))
    if shuffle:
        dataset = dataset.shuffle(10000)
    dataset = dataset.repeat(epochs).batch(batch_size)
    return dataset

tensorflow 2.0 的方法读取

In [5]:
test_dataset = make_dataset(x_train_scaled, y_train, shuffle=True)
for x, y in test_dataset.take(1):
    print(x)
    print("===")
    print(y)

RuntimeError: __iter__() is only supported inside of tf.function or when eager execution is enabled.

make_one_shot_iterator + get_next + session 获取数据

In [5]:
test_dataset = make_dataset(x_train_scaled, y_train, shuffle=True)
test_dataset_iter = test_dataset.make_one_shot_iterator()
x, y = test_dataset_iter.get_next()

with tf.Session() as sess:
    x_value, y_value = sess.run([x, y])
    print(x_value)
    print("===")
    print(y_value)

Instructions for updating:
Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)`.
[[-0.8105136 -0.8105136 -0.8105136 ... -0.8105136 -0.8105136 -0.8105136]
 [-0.8105136 -0.8105136 -0.8105136 ... -0.8105136 -0.8105136 -0.8105136]
 [-0.8105136 -0.8105136 -0.8105136 ... -0.8105136 -0.8105136 -0.8105136]
 ...
 [-0.8105136 -0.8105136 -0.8105136 ... -0.8105136 -0.8105136 -0.8105136]
 [-0.8105136 -0.8105136 -0.8105136 ... -0.8105136 -0.8105136 -0.8105136]
 [-0.8105136 -0.8105136 -0.8105136 ... -0.8105136 -0.8105136 -0.8105136]]
===
[1 6 8 2 5 2 3 6 6 8 4 3 1 5 5 1 0 1 3 9 9 1 3 3 8 0 5 5 3 1 5 0]


## 4. 定义模型（图）

In [6]:
# 网络结构
hidden_units = [100, 100]
class_num = 10

'''
x = tf.placeholder(tf.float32, shape=(None, 28*28))
y = tf.placeholder(tf.int64, shape=(None))
'''
epochs = 10
batch_size = 32

train_dataset = make_dataset(x_train_scaled, y_train, shuffle=True, epochs=epochs, batch_size=batch_size)
train_dataset_iter = train_dataset.make_one_shot_iterator()
x, y = train_dataset_iter.get_next()

y = tf.cast(y, tf.int64)

input_for_next_layer = x
for hidden_unit in hidden_units:
    input_for_next_layer = tf.layers.dense(input_for_next_layer, units=hidden_unit, activation=tf.nn.relu)

logits = tf.layers.dense(input_for_next_layer, class_num)

# 计算损失
loss = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=logits)

# 预测
prediction = tf.argmax(logits, axis=1)

# 计算准确率
correct_prediction = tf.equal(prediction, y)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# 训练操作
train_op = tf.train.AdamOptimizer(1e-3).minimize(loss)

Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use `layer.__call__` method instead.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


## 5. tf.Seesion 训练

In [7]:
init = tf.global_variables_initializer()

train_step_per_epoch = 1718

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(epochs):
        for step in range(train_step_per_epoch):
            # 推理和反向传播
            loss_value, prediction_value, accuracy_value, _ = sess.run(
                [loss, prediction, accuracy, train_op]
            )
            print("\r[train] epoch: {}, loss: {:.5f}, acc: {:.2f}".format(epoch, loss_value, accuracy_value), end="")
        print()
            

[train] epoch: 0, loss: 0.37181, acc: 0.84
[train] epoch: 1, loss: 0.58531, acc: 0.91
[train] epoch: 2, loss: 0.45089, acc: 0.84
[train] epoch: 3, loss: 0.20523, acc: 0.91
[train] epoch: 4, loss: 0.45204, acc: 0.84
[train] epoch: 5, loss: 0.40719, acc: 0.88
[train] epoch: 6, loss: 0.30147, acc: 0.84
[train] epoch: 7, loss: 0.15697, acc: 0.97
[train] epoch: 8, loss: 0.22709, acc: 0.91
[train] epoch: 9, loss: 0.10784, acc: 0.97
