In [1]:
import tensorflow as tf

# TF DATASET FROM LIST

In [3]:
daily_sales_numbers = [21, 22, -108, 31, -1, 32, 34,31]

tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_numbers)
tf_dataset

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>

In [7]:
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

21
22
-108
31
-1
32
34
31


In [10]:
for sales in tf_dataset.take(3): #Iterate through first n elements in tf dataset

    print(sales.numpy())


21
22
-108


# filter

In [11]:
tf_dataset = tf_dataset.filter(lambda x: x>0)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

21
22
31
32
34
31


# transformation

In [13]:
tf_dataset = tf_dataset.map(lambda x: x*72)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

1512
1584
2232
2304
2448
2232


# shuffle

In [14]:
tf_dataset = tf_dataset.shuffle(2)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

1512
2232
1584
2448
2232
2304


https://stackoverflow.com/questions/53514495/what-does-batch-repeat-and-shuffle-do-with-tensorflow-dataset

# Batching

In [16]:
for sales_batch in tf_dataset.batch(2):
    print(sales_batch.numpy())

[1512 1584]
[2304 2232]
[2448 2232]


# Perform all of the above operations in one shot

In [17]:
tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_numbers)

tf_dataset = tf_dataset.filter(lambda x: x>0).map(lambda y: y*72).shuffle(2).batch(2)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

[1512 1584]
[2304 2232]
[2232 2448]


# Images

In [32]:
images_daisy = tf.data.Dataset.list_files('datasets/flower_photos/daisy/*', shuffle=False)
images_rose = tf.data.Dataset.list_files('datasets/flower_photos/roses/*', shuffle=False)
ds = images_daisy.concatenate(images_rose)

<_ConcatenateDataset element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>

In [62]:
for file in ds.take(3):
    print(file.numpy())

b'datasets\\flower_photos\\daisy\\100080576_f52e8ee070_n.jpg'
b'datasets\\flower_photos\\daisy\\10140303196_b88d3d6cec.jpg'
b'datasets\\flower_photos\\daisy\\10172379554_b296050f82_n.jpg'


In [36]:
type(ds)

tensorflow.python.data.ops.concatenate_op._ConcatenateDataset

In [53]:
images_ds = ds.shuffle(4000) #shuffle!!
for file in images_ds.take(3):
    print(file.numpy())

b'datasets\\flower_photos\\daisy\\14591326135_930703dbed_m.jpg'
b'datasets\\flower_photos\\roses\\3560426426_1c66cb8330.jpg'
b'datasets\\flower_photos\\roses\\2960709681_e95940c0f0_n.jpg'


In [56]:
class_names = ["roses","daisy"]
image_count = len(images_ds)
image_count

1274

# create train + test DS

In [57]:
train_size = int(image_count*0.8)
train_ds = images_ds.take(train_size)
test_ds = images_ds.skip(train_size)

In [58]:
len(train_ds)

1019

In [59]:
len(test_ds)

255

In [72]:
def get_label(file_path):
    import os
    parts = tf.strings.split(file_path, os.path.sep) ##special tf.string.split instead of .split
    print(parts)
    return parts[-2]

In [66]:
get_label(b'datasets\\flower_photos\\roses\\2960709681_e95940c0f0_n.jpg')

tf.Tensor([b'datasets' b'flower_photos' b'roses' b'2960709681_e95940c0f0_n.jpg'], shape=(4,), dtype=string)


<tf.Tensor: shape=(), dtype=string, numpy=b'roses'>

# convert jpeg into tf dataset

In [73]:
def process_image(file_path):
    label = get_label(file_path)
    img = tf.io.read_file(file_path) # load the raw data from the file as a string
    img = tf.image.decode_jpeg(img)
    img = tf.image.resize(img, [128, 128])
    return img, label

In [76]:
img1, label1 = process_image('datasets\\flower_photos\\roses\\3560426426_1c66cb8330.jpg')
img1.numpy()[:2]

tf.Tensor([b'datasets' b'flower_photos' b'roses' b'3560426426_1c66cb8330.jpg'], shape=(4,), dtype=string)


array([[[0.00000000e+00, 0.00000000e+00, 4.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 2.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 2.39843750e+00],
        [0.00000000e+00, 0.00000000e+00, 2.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 2.00000000e+00],
        [0.00000000e+00, 1.00000000e+00, 4.96875000e+00],
        [0.00000000e+00, 0.00000000e+00, 4.00000000e+00],
        [4.04663086e-02, 0.00000000e+00, 4.04046631e+00],
        [1.99218750e-01, 0.00000000e+00, 4.19921875e+00],
        [0.00000000e+00, 0.00000000e+00, 4.00000000e+00],
        [8.00781250e-01, 9.64965820e-02, 4.99377441e+00],
        [8.84826660e-01, 0.00000000e+00, 4.88482666e+00],
        [2.62756348e-01, 6.53686523e-02, 5.00000000e+00],
        [1.00000000e+00, 0.00000000e+00, 5.00000000e+00],
        [1.00000000e+00, 0.00000000e+00, 5.00000000e+00],
        [9.90661621e-01, 9.33837891e-03, 5.00000000e+00],
        [9.53125000e-01, 0.00000000e+00, 5.00000000e+00],
        [8.007

# apply to training and testing ds

In [77]:
train_ds = train_ds.map(process_image)
test_ds = test_ds.map(process_image)

Tensor("StringSplit/RaggedGetItem/strided_slice_5:0", shape=(None,), dtype=string)
Tensor("StringSplit/RaggedGetItem/strided_slice_5:0", shape=(None,), dtype=string)


In [78]:
for image, label in train_ds.take(1):
    print("****",image)
    print("****",label)

**** tf.Tensor(
[[[217.79688  222.79688  216.79688 ]
  [217.       222.       216.      ]
  [216.4375   221.4375   215.4375  ]
  ...
  [209.46191  212.43066  203.47754 ]
  [209.4375   212.4375   203.4375  ]
  [209.4375   212.4375   203.4375  ]]

 [[217.54785  222.54785  216.54785 ]
  [217.       222.       216.      ]
  [217.       222.       216.      ]
  ...
  [210.03125  213.       204.04688 ]
  [209.6875   212.6875   203.6875  ]
  [209.6875   212.6875   203.6875  ]]

 [[217.       222.       216.      ]
  [217.       222.       216.      ]
  [217.1875   222.1875   216.1875  ]
  ...
  [210.21289  213.18164  206.19727 ]
  [209.1875   212.1875   205.1875  ]
  [209.03809  212.03809  205.03809 ]]

 ...

 [[140.       103.        76.      ]
  [140.11426  103.11426   76.11426 ]
  [140.1875   103.1875    76.1875  ]
  ...
  [129.39648   89.396484  63.396484]
  [129.44824   89.44824   63.448242]
  [128.26758   90.20996   63.5625  ]]

 [[141.       104.        77.      ]
  [140.80957  103.809

In [79]:
def scale(image, label):
    return image/255, label

In [80]:
train_ds = train_ds.map(scale)

In [81]:
for image, label in train_ds.take(5):
    print("****Image: ",image.numpy()[0][0])
    print("****Label: ",label.numpy())

****Image:  [0.27898285 0.48192403 0.80741423]
****Label:  b'roses'
****Image:  [0.15066636 0.16438419 0.03988205]
****Label:  b'roses'
****Image:  [0.5129442  0.49796262 0.45023745]
****Label:  b'daisy'
****Image:  [0.16006242 0.18308823 0.25759804]
****Label:  b'daisy'
****Image:  [0.5833946 0.615962  0.2027114]
****Label:  b'roses'
