## 使用 tf.data.Dataset 加载数据

In [1]:
import tensorflow as tf

In [2]:
tf.__version__

'2.2.0'

### List 列表数据

In [3]:
dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3])
for element in dataset:
    print(element)

tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)


### Generator 生成器

In [4]:
import itertools

def gen():
    for i in itertools.count(1):
        yield (i, [1] * i)

In [5]:
dataset = tf.data.Dataset.from_generator(
    gen,
    (tf.int64, tf.int64),
    (tf.TensorShape([]), tf.TensorShape([None])))

In [6]:
list(dataset.take(3).as_numpy_iterator())

[(1, array([1])), (2, array([1, 1])), (3, array([1, 1, 1]))]

### 文本文件

In [7]:
parent_dir = "files"
FILE_NAMES = ['cowper.txt', 'derby.txt', 'butler.txt']

In [8]:
import os

def labeler(example, index):
    return example, tf.cast(index, tf.int64)  

labeled_data_sets = []

print('cwd:', os.getcwd())
for i, file_name in enumerate(FILE_NAMES):
    file_path = os.path.join(parent_dir, file_name)
    print('file_path:', file_path)
    file_full_path = os.path.abspath(file_path)
    print('file_full_path:', file_full_path)
    lines_dataset = tf.data.TextLineDataset(file_full_path)
    labeled_dataset = lines_dataset.map(lambda ex: labeler(ex, i))
    labeled_data_sets.append(labeled_dataset)

cwd: /work/tensorflow-101/experts/code_sample/chapter-2
file_path: files/cowper.txt
file_full_path: /work/tensorflow-101/experts/code_sample/chapter-2/files/cowper.txt
file_path: files/derby.txt
file_full_path: /work/tensorflow-101/experts/code_sample/chapter-2/files/derby.txt
file_path: files/butler.txt
file_full_path: /work/tensorflow-101/experts/code_sample/chapter-2/files/butler.txt


In [9]:
BUFFER_SIZE = 50000
BATCH_SIZE = 64
TAKE_SIZE = 5000

all_labeled_data = labeled_data_sets[0]
for labeled_dataset in labeled_data_sets[1:]:
    all_labeled_data = all_labeled_data.concatenate(labeled_dataset)

all_labeled_data = all_labeled_data.shuffle(
    BUFFER_SIZE, reshuffle_each_iteration=False)

In [10]:
for ex in all_labeled_data.take(5):
    print(ex)

(<tf.Tensor: shape=(), dtype=string, numpy=b'battle-cry all of you with a single voice. Now has Jove vouchsafed us a'>, <tf.Tensor: shape=(), dtype=int64, numpy=2>)
(<tf.Tensor: shape=(), dtype=string, numpy=b'Conducts from some pure fountain through his grove'>, <tf.Tensor: shape=(), dtype=int64, numpy=0>)
(<tf.Tensor: shape=(), dtype=string, numpy=b"Consult by what expedient thou may'st save">, <tf.Tensor: shape=(), dtype=int64, numpy=0>)
(<tf.Tensor: shape=(), dtype=string, numpy=b'him round with fire. Let him not turn you back neither by fair words'>, <tf.Tensor: shape=(), dtype=int64, numpy=2>)
(<tf.Tensor: shape=(), dtype=string, numpy=b'To break their way: he then Thootes sent,'>, <tf.Tensor: shape=(), dtype=int64, numpy=1>)
