In [1]:
import tensorflow as tf

# List 列表数据

## 使用 tf.data.Dataset.from_tensor_slices 加载 List

In [2]:
dataset = tf.data.Dataset.from_tensor_slices([1 ,2, 3])
for element in dataset:
    print (element)

tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)


## 使用 tf.data.Dataset.from_generator 加载 Generator

### Generator 生成器

In [3]:
import itertools

In [4]:
def gen():
    for i in itertools.count(1):
        yield (i, [1] * i)

In [5]:
dataset = tf.data.Dataset.from_generator(
    gen,
    (tf.int64, tf.int64),
    (tf.TensorShape([]), tf.TensorShape([None])))

In [6]:
list(dataset.take(3).as_numpy_iterator())

[(1, array([1], dtype=int64)),
 (2, array([1, 1], dtype=int64)),
 (3, array([1, 1, 1], dtype=int64))]

## 使用 tf.data.TextLineDataset 加载文本

In [43]:
parent_dir = "./"
FILE_NAMES = ['test.txt']

In [44]:
import os

def labeler(example, index):
    return example, tf.cast(index, tf.int64)

labeled_data_sets = []

for i, file_name in enumerate(FILE_NAMES):
    lines_dataset = tf.data.TextLineDataset(os.path.join(parent_dir, file_name))
    labeled_dataset = lines_dataset.map(lambda ex: labeler(ex, i))
    labeled_data_sets.append(labeled_dataset)

In [45]:
BUFFER_SIZE = 50000
BATCH_SIZE = 64
TAKE_SIZE = 5000

all_labeled_data = labeled_data_sets[0]
for labeled_dataset in labeled_data_sets[1:]:
    all_labeled_data = all_labeled_data.concatenate(labeled_dataset)

all_labeled_data = all_labeled_data.shuffle(
    BUFFER_SIZE, reshuffle_each_iteration=False)

In [49]:
for ex in all_labeled_data.take(5):
    print(ex)

(<tf.Tensor: shape=(), dtype=string, numpy=b'data set test!'>, <tf.Tensor: shape=(), dtype=int64, numpy=0>)
(<tf.Tensor: shape=(), dtype=string, numpy=b'        line3'>, <tf.Tensor: shape=(), dtype=int64, numpy=0>)
(<tf.Tensor: shape=(), dtype=string, numpy=b'line 2'>, <tf.Tensor: shape=(), dtype=int64, numpy=0>)
