In [1]:
import tensorflow as tf

### Create tf dataset from a list

In [2]:
daily_sales_numbers = [21, 22, -108, 31, -1, 32, 34, 31]

In [3]:
tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_numbers)

### Iterate through tf dataset

In [4]:
for sales in tf_dataset:
    print(sales.numpy())

21
22
-108
31
-1
32
34
31


### Iterate through elements as numpy elements

In [5]:
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

21
22
-108
31
-1
32
34
31


In [6]:
for sales in tf_dataset.take(4):
    print(sales.numpy())

21
22
-108
31


### Sales numbers cant be negative we need to filter them

In [7]:
tf_dataset = tf_dataset.filter(lambda x: x>0)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

21
22
31
32
34
31


### Convert sales numbers from USA dollars ($) to Indian Rupees (INR) Assuming 1->72 conversation rate

In [8]:
# We can perform arithmetic operations also
tf_dataset = tf_dataset.map(lambda x: x*72 )
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

1512
1584
2232
2304
2448
2232


### Shuffe
**For refrence** :- https://stackoverflow.com/questions/53514495/what-does-batch-repeat-and-shuffle-do-with-tensorflow-dataset

In [9]:
# Rearranging the elements
tf_dataset = tf_dataset.shuffle(3)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

1512
2232
2448
2232
1584
2304


### Batching

In [10]:
for sales_batch in tf_dataset.batch(2):
    print(sales_batch.numpy())

[1512 2304]
[1584 2448]
[2232 2232]


### Perform all of the above operations in one shot

In [11]:
tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_numbers)

tf_dataset = tf_dataset.filter(lambda x: x>0).map(lambda y: y*72).shuffle(2).batch(2)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

[1584 2232]
[1512 2304]
[2448 2232]


### Images

In [13]:
images_ds = tf.data.Dataset.list_files('images/*/*', shuffle=False)

In [14]:
image_count = len(images_ds)
image_count

2000

In [18]:
type(images_ds)

tensorflow.python.data.ops.dataset_ops.ShuffleDataset

In [19]:
for file in images_ds.take(3):
    print(file.numpy())

b'images\\cats\\cat.213.jpg'
b'images\\cats\\cat.184.jpg'
b'images\\cats\\cat.154.jpg'


In [20]:
images_ds = images_ds.shuffle(200)
for file in images_ds.take(3):
    print(file.numpy())

b'images\\cats\\cat.246.jpg'
b'images\\cats\\cat.269.jpg'
b'images\\cats\\cat.22.jpg'


### Splitting the dataset into train test 

In [21]:
class_names = ["cat","dog"]
train_size = int(image_count*0.8)
train_ds = images_ds.take(train_size)
test_ds = images_ds.skip(train_size)

In [22]:
len(train_ds)

1600

In [23]:
len(test_ds)

400

In [24]:
def get_label(file_path):
    import os
    parts = tf.strings.split(file_path, os.path.sep)
    return parts[-2]

In [28]:
get_label("images\\cats\\cat.246.jpg")

<tf.Tensor: shape=(), dtype=string, numpy=b'cats'>

In [26]:
def process_image(file_path):
    label = get_label(file_path)
    img = tf.io.read_file(file_path) # load the raw data from the file as a string
    img = tf.image.decode_jpeg(img)
    img = tf.image.resize(img, [128, 128])
    return img, label

In [29]:
img, label = process_image("images\\cats\\cat.246.jpg")
img.numpy()[:2]

array([[[112.44922 , 113.44922 ,  97.44922 ],
        [114.34766 , 115.34766 ,  99.34766 ],
        [116.24609 , 117.24609 , 101.24609 ],
        [118.      , 119.      , 103.      ],
        [119.      , 120.      , 104.      ],
        [120.90831 , 121.90831 , 105.90831 ],
        [121.      , 122.      , 106.      ],
        [123.      , 124.      , 108.      ],
        [125.      , 126.      , 108.      ],
        [126.53516 , 127.53516 , 109.53516 ],
        [126.      , 127.      , 109.      ],
        [126.      , 127.      , 109.      ],
        [126.23047 , 127.23047 , 109.23047 ],
        [130.      , 131.      , 113.      ],
        [128.      , 129.      , 111.      ],
        [130.92578 , 131.92578 , 113.92578 ],
        [129.17578 , 130.17578 , 112.17578 ],
        [130.      , 131.      , 113.      ],
        [131.      , 132.      , 114.      ],
        [132.51953 , 133.51953 , 115.51953 ],
        [133.41797 , 133.58203 , 115.58203 ],
        [134.      , 133.      , 1

In [30]:
train_ds = train_ds.map(process_image)
test_ds = test_ds.map(process_image)

In [31]:
for image, label in train_ds.take(1):
    print("****",image)
    print("****",label)

**** tf.Tensor(
[[[208.        199.        168.       ]
  [208.        199.        168.       ]
  [208.        199.        168.       ]
  ...
  [203.        194.        163.       ]
  [203.        194.        163.       ]
  [203.        194.        163.       ]]

 [[208.        199.        168.       ]
  [208.        199.        168.       ]
  [208.        199.        168.       ]
  ...
  [203.        194.        163.       ]
  [203.        194.        163.       ]
  [203.        194.        163.       ]]

 [[208.        199.        168.       ]
  [208.        199.        168.       ]
  [208.        199.        168.       ]
  ...
  [203.        194.        163.       ]
  [203.        194.        163.       ]
  [203.        194.        163.       ]]

 ...

 [[213.        204.        173.       ]
  [213.        204.        173.       ]
  [213.        204.        173.       ]
  ...
  [  3.          3.          3.       ]
  [  3.          3.          3.       ]
  [  3.          3.         

In [32]:
def scale(image, label):
    return image/255, label

In [33]:
train_ds = train_ds.map(scale)

In [34]:
for image, label in train_ds.take(5):
    print("****Image: ",image.numpy()[0][0])
    print("****Label: ",label.numpy())

****Image:  [0.4975342  0.45795086 0.19379644]
****Label:  b'cats'
****Image:  [0.66752833 0.20478325 0.18909697]
****Label:  b'cats'
****Image:  [0.7978554  0.6449142  0.34295344]
****Label:  b'cats'
****Image:  [0.44577205 0.44969362 0.43008578]
****Label:  b'cats'
****Image:  [0.55430454 0.51508886 0.47979474]
****Label:  b'cats'
