In [5]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all" 

In [6]:
import numpy as np
import os
import gzip

import tensorflow as tf
from tensorflow import keras
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
tf.__version__

'2.1.0'

In [7]:
# GPU设置

tf.debugging.set_log_device_placement(True)
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memeory_growth(gpu, True)
print(len(gpus))
logical_gpus = tf.config.experimental.list_logical_devices('GPU')
print(len(logical_gpus))

0
0


In [None]:
# 定义加载数据的函数，data_folder为保存gz数据的文件夹，该文件夹下有4个文件
# 'train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz',
# 't10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz'

def load_data(data_folder):

  files = [
      'train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz',
      't10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz'
  ]

  paths = []
  for fname in files:
    paths.append(os.path.join(data_folder,fname))

  with gzip.open(paths[0], 'rb') as lbpath:
    y_train = np.frombuffer(lbpath.read(), np.uint8, offset=8)

  with gzip.open(paths[1], 'rb') as imgpath:
    x_train = np.frombuffer(
        imgpath.read(), np.uint8, offset=16).reshape(len(y_train), 28, 28)

  with gzip.open(paths[2], 'rb') as lbpath:
    y_test = np.frombuffer(lbpath.read(), np.uint8, offset=8)

  with gzip.open(paths[3], 'rb') as imgpath:
    x_test = np.frombuffer(
        imgpath.read(), np.uint8, offset=16).reshape(len(y_test), 28, 28)

  return (x_train, y_train), (x_test, y_test)

(x_train_all, y_train_all), (x_test, y_test) = load_data('F:\\2、Deep Learning\Tensorflow\Keras_datasets\Fashion-MNIST')
x_valid, x_train = x_train_all[:5000], x_train_all[5000:]
y_valid, y_train = y_train_all[:5000], y_train_all[5000:]
print(x_valid.shape, y_valid.shape)
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

def show_single_image(image_arr):
    plt.imshow(image_arr, cmap='binary')
    plt.show()

def show_images(n_rows, n_cols, x_data, y_data, class_names):
    assert len(x_data) == len(y_data)
    assert n_rows * n_cols < len(x_data)
    plt.figure(figsize=(n_cols * 1.4, n_rows * 1.6))
    for row in range(n_rows):
        for col in range(n_cols):
            index = n_cols * row + col
            plt.subplot(n_rows, n_cols, index + 1)
            plt.imshow(x_data[index], cmap='binary', interpolation='nearest')
            plt.axis('off')
            plt.title(class_names[y_data[index]])
    plt.show()

class_names = ['T-shirt', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal',
               'Shirt', 'Sneaker', 'Bag', 'Ankle Boot']

# show_single_image(x_train[0])

# show_images(3, 5, x_train, y_train, class_names)

In [None]:
# 对数据做归一化  
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
# x_train:[None, 28, 28] ->[None, 784] (函数需求)->[None, 28, 28]  归一化要用到除法转为float
x_train_scaled = scaler.fit_transform(x_train.astype(np.float32).reshape(-1, 1)).reshape(-1, 28, 28)
x_valid_scaled = scaler.transform(x_valid.astype(np.float32).reshape(-1, 1)).reshape(-1, 28, 28)
x_test_scaled = scaler.transform(x_test.astype(np.float32).reshape(-1, 1)).reshape(-1, 28, 28)

In [None]:
# 生成dataset
def make_dataset(images, labels, epochs, batch_size, shuffle = True):
    dataset = tf.data.Dataset.from_tensor_slices((images, labels))
    if shuffle:
        dataset = dataset.shuffle(10000)
    dataset = dataset.repeat(epochs).batch(batch_size).prefetch(50)
    return dataset

# 这里的batch_size可以 x4，因为MirroredStrategy是将数据均分
batch_size_per_replica = 256
batch_size = batch_size_per_replica * len(logical_gpus)
epochs = 100
train_dataset = make_dataset(x_train_scaled, y_train, epochs, batch_size)

In [None]:
# 加入分布式策略
strategy = tf.distribute.MirroredStrategy()

with startegy.scope():
    model =keras.Sequential()
    model.add(keras.layers.Conv2D(filters = 128, kernel_size = 3, padding = 'same',
                              activation = 'relu', input_shape = (28, 28, 1)))
    model.add(keras.layers.Conv2D(filters = 128, kernel_size = 3, padding = 'same',
                              activation = 'relu', input_shape = (28, 28, 1)))
    model.add(keras.layers.MaxPool2D(pool_size = 2))

    model.add(keras.layers.Conv2D(filters = 256, kernel_size = 3, padding = 'same',
                              activation = 'relu', input_shape = (28, 28, 1)))
    model.add(keras.layers.Conv2D(filters = 256, kernel_size = 3, padding = 'same',
                              activation = 'relu', input_shape = (28, 28, 1)))
    model.add(keras.layers.MaxPool2D(pool_size = 2))

    model.add(keras.layers.Conv2D(filters = 512, kernel_size = 3, padding = 'same',
                              activation = 'relu', input_shape = (28, 28, 1)))
    model.add(keras.layers.Conv2D(filters = 512, kernel_size = 3, padding = 'same',
                              activation = 'relu', input_shape = (28, 28, 1)))
    model.add(keras.layers.MaxPool2D(pool_size = 2))

    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(512, activation = 'relu'))
    model.add(keras.layers.Dense(10, activation='softmax'))
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam',
                  metrics=['accuracy'])
    

# 先添加分布式策略
strategy = tf.distribute.MirroredStrategy()
config = tf.estimator.RunConfig(
    train_distribute = strategy)

# 转为estimator
estimator = keras.estimator.model_to_estimator(model, config = config)

In [None]:
# 运算会放在tensorboard中
# 将两个 ,
# 用key-value的形式指定logdir，key就是在tensorboard中显示的名字，value就是文件夹的名字
# tensorboard --logdir=baseline:
estimator.train(
    input_fn = lambda : make_data(
        x_train_scaled, y_train, epochs, batch_size), 
    max_steps = 5000)# 最多训练500步

In [None]:
# baseline比分布式还要慢的可能原因：
# 1、源码中做分布式时，dataset也是需要分布式的，因为我们做的事数据并行。
#    而estimator默认数据没有做并行，所以IO操作成了速度的瓶颈
# 2、batch_size相同，分布式算力浪费。（这里没有下载baseline， 视频里都是256）