# 引入Tensorflow

In [3]:
import tensorflow as tf
from tensorflow.keras import layers

print(tf.__version__)
print(tf.keras.__version__)

2.1.0
2.2.4-tf


# 数据输入 tf.data.Dataset.from_tensor_slices

In [30]:
import numpy as np
import tensorflow as tf


train_x = np.zeros((1000, 28, 28))
train_y = np.zeros((1000, 10))

dataset = tf.data.Dataset.from_tensor_slices((train_x, train_y)).shuffle(20).repeat(2).batch(512)

for x, y in dataset:
    print(x.shape, y.shape)


(512, 28, 28) (512, 10)
(512, 28, 28) (512, 10)
(512, 28, 28) (512, 10)
(464, 28, 28) (464, 10)


# 数据输入 tf.TFRecordReader

In [121]:
import numpy as np
import tensorflow as tf


tfrecord_filename = './train.tfrecord'

class encode_and_write:
    def __init__(self):
        self.feature_dict = {
            'ndarray' : self._ndarray_feature, 
            'bytes' : self._bytes_feature, 
            'float' : self._float_feature,
            'double' : self._float_feature, 
            'bool' : self._int64_feature,
            'enum' : self._int64_feature, 
            'int' : self._int64_feature,
            'uint' : self._int64_feature
        }
    def _ndarray_feature(self, value):
        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value.tobytes()]))
    
    def _bytes_feature(self, value):
        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

    def _float_feature(self, value):
        return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

    def _int64_feature(self, value):
        return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
    
    def _encode_example(self, example):
        """Creates a tf.Example message ready to be written to a file."""
        feature = {}
        for vname in example:
            vtype = type(example[vname]).__name__
            feature[vname] = self.feature_dict[vtype](example[vname])
        # Create a Features message using tf.train.Example.
        example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
        return example_proto.SerializeToString()

    def run(self, filename, datasets):
        with tf.io.TFRecordWriter(filename) as writer:
            for vdata in datasets:
                example = self._encode_example(vdata)
                writer.write(example)
  
class datasets_stream:
    def __iter__(self):
        self.cnt = 1000
        self.idx = 0
        return self

    def __next__(self):
        if self.idx < self.cnt:
            self.idx += 1
            return {"image": np.zeros((64, 64), np.uint8), "label": 0}
        else:
            raise StopIteration

encode_and_write().run(tfrecord_filename, iter(datasets_stream()))

class read_and_decode:
    def __init__(self):
        self.feature_description_dict = {
            'ndarray' : self._bytes_feature_description, 
            'bytes' : self._bytes_feature_description, 
            'float' : self._float_feature_description,
            'double' : self._float_feature_description, 
            'bool' : self._int64_feature_description,
            'enum' : self._int64_feature_description, 
            'int' : self._int64_feature_description,
            'uint' : self._int64_feature_description
        }
    
    def _bytes_feature_description(self):
        return tf.io.FixedLenFeature([], tf.string)

    def _float_feature_description(self):
        return tf.io.FixedLenFeature([], tf.float)

    def _int64_feature_description(self):
        return tf.io.FixedLenFeature([], tf.int64)
    
    def _decode_example(self, e, example):
        res = []
        for vname in example:
            vtype = type(example[vname]).__name__
            if vtype == "ndarray":
                res.append(tf.reshape(tf.io.decode_raw(e[vname], {
                    'float32' : tf.float32,
                    'float64' : tf.float64,
                    'int32' : tf.int32,
                    'uint16' : tf.uint16,
                    'uint8' : tf.uint8,
                    'int16' : tf.int16,
                    'int8' : tf.int8,
                    'int64' : tf.int64
                }[str(example[vname].dtype)]), example[vname].shape))
            else:
                res.append(tf.cast(e[vname], {
                    'float' : tf.float32,
                    'int' : tf.int32
                }[vtype]))
            """"""
        return res
    
    def run(self, filename, example):
        reader = tf.data.TFRecordDataset(filename)
        feature_description = {}
        for vname in example:
            vtype = type(example[vname]).__name__
            feature_description[vname] = self.feature_description_dict[vtype]()
        reader = reader.map(lambda e: tf.io.parse_single_example(e, feature_description))
        reader = reader.map(lambda e: self._decode_example(e, example))
        return reader

#tfrecord_filename = tf.io.gfile.glob(os.path.join(ds_path, 'records/*.tfrec'))#records/train*.tfrec
reader = read_and_decode().run(tfrecord_filename, {"image": np.zeros((64, 64), np.uint8), "label": 0})

batch =  reader.shuffle(20).repeat(1).batch(512)
for x, y in batch:
    print(x.shape, y.shape)


(512, 64, 64) (512,)
(488, 64, 64) (488,)


# 数据输入 tf.data.Dataset.from_generator

In [128]:
import numpy as np
import tensorflow as tf

def our_generator():
    for i in range(10):
        x = np.random.rand(28,28)
        y = np.random.randint(1,10, size=1)
        yield x, y
    
    
dataset = tf.data.Dataset.from_generator(our_generator, (tf.float32, tf.int16))
batch =  dataset.shuffle(20).repeat(2).batch(512)
for x, y in batch:
    print(x.shape, y.shape)

#(10, 28, 28) (10, 1)

(20, 28, 28) (20, 1)


# 默认数据增强

In [None]:
import numpy as np
import tensorflow as tf

train_x = np.zeros((1000, 28, 28, 1))
train_y = np.zeros((1000, 10))

datagen = tf.keras.preprocessing.image.ImageDataGenerator(
     featurewise_center=True,
     featurewise_std_normalization=True,
     rotation_range=20,
     width_shift_range=0.2,
     height_shift_range=0.2,
     horizontal_flip=True)
 
datagen.fit(train_x)
batch = datagen.flow(train_x, train_y, batch_size=512)

for x, y in batch:
    print(x.shape, y.shape)

# 自定义数据增强

In [167]:
import numpy as np
import tensorflow as tf

train_x = np.zeros((1000, 28, 28, 1), dtype = np.float32)
train_y = np.zeros((1000, 10), dtype = np.float32)

def mixup(img_batch, label_batch):
    batch_size = tf.shape(img_batch)[0]
    weight = tf.random.uniform([batch_size])
    x_weight = tf.reshape(weight, [batch_size, 1, 1, 1])
    y_weight = tf.reshape(weight, [batch_size, 1])
    index = tf.random.shuffle(tf.range(batch_size, dtype=tf.int32))
    x1, x2 = img_batch, tf.gather(img_batch, index)
    img_batch = x1 * x_weight + x2 * (1. - x_weight)
    y1, y2 = label_batch, tf.gather(label_batch, index)
    label_batch = y1 * y_weight + y2 * (1. - y_weight)
    return img_batch, label_batch

batch = tf.data.Dataset.from_tensor_slices((train_x, train_y)).shuffle(20).repeat(2).batch(512)
batch = batch.map(lambda a, b : mixup(a, b))


for x, y in batch:
    print(x.shape, y.shape)

(512, 28, 28, 1) (512, 10)
(512, 28, 28, 1) (512, 10)
(512, 28, 28, 1) (512, 10)
(464, 28, 28, 1) (464, 10)
