In [None]:
import glob
import tensorflow as tf
import numpy as np
import pandas as pd
import os
import cv2
import json
import matplotlib.pyplot as plt
os.environ['CUDA_VISIBLE_DEVICES']='0,1'



In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    # 设置两个逻辑GPU模拟多GPU训练
    try:
        tf.config.experimental.set_virtual_device_configuration(gpus[0],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=8192),
             tf.config.experimental.VirtualDeviceConfiguration(memory_limit=8192)])
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPU,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)

3 Physical GPU, 4 Logical GPUs


In [None]:
strategy = tf.distribute.MirroredStrategy()
print('Number of devices: %d' % strategy.num_replicas_in_sync)  # 输出设备数量

In [None]:
#黑白图片
def draw_cv2(raw_strokes, size=256, lw=6, time_color=True):
    img = np.zeros((BASE_SIZE, BASE_SIZE), np.uint8)
    for t, stroke in enumerate(raw_strokes):
        for i in range(len(stroke[0]) - 1):
            color = 255 - min(t, 10) * 13 if time_color else 255
            _ = cv2.line(img, (stroke[0][i], stroke[1][i]),
                         (stroke[0][i + 1], stroke[1][i + 1]), color, lw)
    if size != BASE_SIZE:
        return cv2.resize(img, (size, size))
    else:
        return img
    

In [None]:
#RGB图片
# def draw_cv2(raw_strokes, size=256, lw=6):
#     img = np.zeros((BASE_SIZE, BASE_SIZE, 3), np.uint8)
#     for t, stroke in enumerate(raw_strokes):
#         points_count = len(stroke[0]) - 1
#         grad = 255//points_count
#         for i in range(len(stroke[0]) - 1):
#             _ = cv2.line(img, (stroke[0][i], stroke[1][i]), (stroke[0][i + 1], stroke[1][i + 1]), (255, 255 - min(t,10)*13, max(255 - grad*i, 20)), lw)
#     if size != BASE_SIZE:
#         img = cv2.resize(img, (size, size))
#     return img

In [None]:
class DataLoader(object):
    def __init__(self, resize_height=64, resize_width=64, batch_size=512, fileList=None, size=256, lw=6):
        self.resize_height = resize_height #图片高
        self.resize_height = resize_height #图片宽
        self.batch_size = batch_size #batch
        self.fileList = fileList #文件数据
        self.size = size #画图时图片大小
        self.lw = lw

    def __call__(self):
        def _generator(size,lw):
            while True:
                for filename in self.fileList:
                    df = pd.read_csv(filename)
                    df['drawing'] = df['drawing'].apply(json.loads)
                    x = np.zeros((len(df), size, size))
                    for i, raw_strokes in enumerate(df.drawing.values):
                        x[i] = draw_cv2(raw_strokes, size=size, lw=lw)
                    x = x / 255.
                    x = x.reshape((len(df), size, size, 1)).astype(np.float32)
                    y = tf.keras.utils.to_categorical(df.y, num_classes=n_labels)
                    for x_i,y_i in zip(x,y):
                        yield (x_i,y_i)

        dataset = tf.data.Dataset.from_generator(generator=_generator,
                                                 output_types=(tf.dtypes.float32, tf.dtypes.int32),
                                                 output_shapes=((self.resize_height, self.resize_height, 1), (340, )),
                                                 args=(self.size, self.lw))
        dataset = dataset.prefetch(buffer_size=10240)
        dataset = dataset.shuffle(buffer_size=10240).batch(self.batch_size)
        return dataset    

In [None]:
DP_DIR = './shuffle_data_gzip/'


BASE_SIZE = 256
n_labels = 340
np.random.seed(seed=1987)
size = 64
batchsize = 1024*3   
fileList = glob.glob("./shuffle_data_gzip/*.csv.gz") 
train_fileList = fileList[:-1]
val_fileList = fileList[-1:]
train_ds = DataLoader(resize_height=64, resize_width=64, batch_size=batchsize, fileList=train_fileList, size=size, lw=6)()    
val_ds = DataLoader(resize_height=64, resize_width=64, batch_size=batchsize, fileList=val_fileList, size=size, lw=6)()    
        
    

In [None]:
def MobileNetModel():
    inputs = tf.keras.layers.Input(shape=(size, size, 1))
    base_model = tf.keras.applications.MobileNet(input_shape=(size, size, 1), include_top=False, weights=None, classes=n_labels)
    x = base_model(inputs)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(1024, activation='relu')(x)
    predictions = tf.keras.layers.Dense(n_labels, activation='softmax')(x)
    model = tf.keras.models.Model(inputs=inputs, outputs=predictions)
    return model

In [8]:
strategy = tf.distribute.MirroredStrategy()  
with strategy.scope():
    model = MobileNetModel()
    model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.002), 
                  loss='categorical_crossentropy',
                  metrics=[tf.keras.metrics.CategoricalCrossentropy(), 
                           tf.keras.metrics.CategoricalAccuracy(), 
                           tf.keras.metrics.TopKCategoricalAccuracy(k=3,name='top_3_categorical_accuracy')])

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensor

In [9]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 64, 64, 1)]       0         
_________________________________________________________________
mobilenet_1.00_64 (Model)    (None, 2, 2, 1024)        3228288   
_________________________________________________________________
flatten (Flatten)            (None, 4096)              0         
_________________________________________________________________
dense (Dense)                (None, 1024)              4195328   
_________________________________________________________________
dense_1 (Dense)              (None, 340)               348500    
Total params: 7,772,116
Trainable params: 7,750,228
Non-trainable params: 21,888
_________________________________________________________________


In [None]:
callbacks = [
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_top_3_categorical_accuracy', factor=0.75, patience=3, min_delta=0.001,
                          mode='max', min_lr=1e-5, verbose=1),
    tf.keras.callbacks.ModelCheckpoint('model_all.h5', monitor='val_top_3_categorical_accuracy', mode='max', save_best_only=True,
                    save_weights_only=True),
]

model.fit(
    train_ds, epochs=50, verbose=1,steps_per_epoch=16000,
    validation_data = val_ds,
    validation_steps = 100,
#     callbacks = callbacks
)




Train for 16000 steps, validate for 100 steps
Epoch 1/50
INFO:tensorflow:batch_all_reduce: 85 all-reduces with algorithm = nccl, num_packs = 1, agg_small_grads_max_bytes = 0 and agg_small_grads_max_group = 10
INFO:tensorflow:batch_all_reduce: 85 all-reduces with algorithm = nccl, num_packs = 1, agg_small_grads_max_bytes = 0 and agg_small_grads_max_group = 10
Epoch 2/50