# tensorflow 입력 파이프라인 만들기

In [4]:
import tensorflow as tf

import pathlib
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# np에서 표현하는 소수점의 자리수를 적는 것. 4자리면 족할듯.
np.set_printoptions(precision=4)

In [6]:
dataset = tf.data.Dataset.from_tensor_slices([8, 3, 0, 8, 2, 1])
dataset

<TensorSliceDataset shapes: (), types: tf.int32>

In [13]:
for elem in dataset:
    print(elem.numpy())

8
3
0
8
2
1


In [14]:
dataset1 = tf.data.Dataset.from_tensor_slices(tf.random.uniform([4, 10]))

dataset1.element_spec

TensorSpec(shape=(10,), dtype=tf.float32, name=None)

In [20]:
dataset2 = tf.data.Dataset.from_tensor_slices((tf.random.uniform([4]), tf.random.uniform([4, 100], maxval=100, dtype=tf.int32)))
dataset2.element_spec

(TensorSpec(shape=(), dtype=tf.float32, name=None),
 TensorSpec(shape=(100,), dtype=tf.int32, name=None))

In [21]:
dataset3 = tf.data.Dataset.zip((dataset1, dataset2))

dataset3.element_spec

(TensorSpec(shape=(10,), dtype=tf.float32, name=None),
 (TensorSpec(shape=(), dtype=tf.float32, name=None),
  TensorSpec(shape=(100,), dtype=tf.int32, name=None)))

In [25]:
tf.random.uniform([4])

<tf.Tensor: shape=(4,), dtype=float32, numpy=array([0.7475685 , 0.36471248, 0.80418694, 0.2338022 ], dtype=float32)>

In [48]:
dataset4 = tf.data.Dataset.from_tensors(tf.SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4]))

dataset4.element_spec

1

In [31]:
dataset4.element_spec.value_type

tensorflow.python.framework.sparse_tensor.SparseTensor

In [47]:
dataset1 = tf.data.Dataset.from_tensor_slices(tf.random.uniform([4, 10], minval=1, maxval=10, dtype=tf.int32))

4

In [39]:
for i in dataset1:
    print(i.numpy())

[4 6 2 2 8 5 9 4 5 8]
[2 5 3 9 7 4 3 5 6 7]
[8 6 5 4 6 4 7 9 3 4]
[3 3 9 4 5 8 1 4 4 7]


In [46]:
dataset2 = tf.data.Dataset.from_tensor_slices((tf.random.uniform([4]), tf.random.uniform([4, 100], maxval=100, dtype=tf.int32)))
dataset2.element_spec

(TensorSpec(shape=(), dtype=tf.float32, name=None),
 TensorSpec(shape=(100,), dtype=tf.int32, name=None))

In [42]:
dataset3 = tf.data.Dataset.zip((dataset1, dataset2))
dataset3

<ZipDataset shapes: ((10,), ((), (100,))), types: (tf.int32, (tf.float32, tf.int32))>

In [45]:
for a, (b, c) in dataset3:
    print(f'shapes: {a.shape}, {b.shape}, {c.shape}')

shapes: (10,), (), (100,)
shapes: (10,), (), (100,)
shapes: (10,), (), (100,)
shapes: (10,), (), (100,)


In [49]:
train, test = tf.keras.datasets.fashion_mnist.load_data()

In [76]:
images, labels = train
images = images/255

# dataset 가 한 pair로 구성됨 (image, labels) 이런 식으로 구성된다. 
dataset = tf.data.Dataset.from_tensor_slices((image, labels))
for i in dataset.take(1):
    print(i)

(<tf.Tensor: shape=(28, 28), dtype=uint8, numpy=
array([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   1,
          0,   0,  13,  73,   0,   0,   1,   4,   0,   0,   0,   0,   1,
          1,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   3,
          0,  36, 136, 127,  62,  54,   0,   0,   0,   1,   3,   4,   0,
          0,   3],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   6,
          0, 102, 204, 176, 134, 144, 123,  23,   0, 

In [64]:
def count(stop):
    i = 0
    while i < stop:
        yield i
        i += 1

In [65]:
for n in count(5):
    print(n)

0
1
2
3
4


In [66]:
ds_counter = tf.data.Dataset.from_generator(count, args=[25], output_types=tf.int32, output_shapes=(),)

In [67]:
for count_batch in ds_counter.repeat().batch(10).take(10):
    print(count_batch.numpy())

[0 1 2 3 4 5 6 7 8 9]
[10 11 12 13 14 15 16 17 18 19]
[20 21 22 23 24  0  1  2  3  4]
[ 5  6  7  8  9 10 11 12 13 14]
[15 16 17 18 19 20 21 22 23 24]
[0 1 2 3 4 5 6 7 8 9]
[10 11 12 13 14 15 16 17 18 19]
[20 21 22 23 24  0  1  2  3  4]
[ 5  6  7  8  9 10 11 12 13 14]
[15 16 17 18 19 20 21 22 23 24]


In [70]:
def gen_series():
    i = 0
    while True:
        size = np.random.randint(0, 10)
        yield i, np.random.normal(size=(size,))
        i += 1

In [71]:
for i, series in gen_series():
    print(i, ":", str(series))
    if i > 5:
        break

0 : [-0.23768868]
1 : [-0.40121663]
2 : [0.17292577 1.2167525  0.58788029]
3 : [-0.96367664  0.46160028 -0.05578998  0.6816848  -0.18216777 -0.4238049
 -0.07229891]
4 : [ 0.07557055 -0.42925729 -1.51269535  0.23467701  1.1918732  -1.22349038
  2.02538609  0.00522889]
5 : [-0.51314883  0.85789588 -1.37376036  0.32588881 -0.82089044 -1.42467459
 -1.4428233  -0.10254666]
6 : [-1.26779393  0.13753792]


In [74]:
np.random.randint(0, 10)

7

In [None]:
사실 시간