In [1]:
import tensorflow as tf

In [2]:
daily_sales_no = [ 23 , 54, -1, -89, 100 ,10 ]

# convert to TF Dataset 

<b> The tf.data.Dataset API supports writing descriptive and efficient input pipelines. Dataset usage follows a common pattern:</b>

<li> Create a source dataset from your input data.</li>
<li> Apply dataset transformations to preprocess the data.</li>
<li>Iterate over the dataset and process the elements.</li>

<em>Iteration happens in a streaming fashion, so the full dataset does not need to fit into memory.</em>

In [3]:
tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_no)
type(tf_dataset)

tensorflow.python.data.ops.from_tensor_slices_op._TensorSliceDataset

In [4]:
tf_dataset

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>

In [5]:
for element in tf_dataset:
  print(element.numpy())

23
54
-1
-89
100
10


In [6]:
for element in tf_dataset.as_numpy_iterator():
  print(element)

23
54
-1
-89
100
10


In [7]:
# Transformation
tf_dataset = tf_dataset.map(lambda x: x*2)
list(tf_dataset.as_numpy_iterator())

[46, 108, -2, -178, 200, 20]

In [8]:
# Take first 3 element
for sales in tf_dataset.take(3):
    print(sales.numpy())

46
108
-2


In [9]:
# Filtering Data
tf_dataset = tf_dataset.filter(lambda x: x > 0)
list(tf_dataset.as_numpy_iterator())

[46, 108, 200, 20]

In [10]:
# Randomly rearranging
# Filtering Data
print(list(tf_dataset.as_numpy_iterator()))
tf_dataset = tf_dataset.shuffle(3)
print(list(tf_dataset.as_numpy_iterator()))

[46, 108, 200, 20]
[108, 20, 46, 200]


In [11]:
#Batching the data set
# tf_dataset = tf_dataset.shuffle(3)
# print(list(tf_dataset.as_numpy_iterator()))
for sales_batch in tf_dataset.batch(2):
    print(sales_batch.numpy())

[200  46]
[108  20]


# Without Pipeline

In [12]:
daily_sales_no = [ 23 , 54, -1, -89, 100 ,10 ]
tf_dataset  = tf.data.Dataset.from_tensor_slices(daily_sales_no)
print(list(tf_dataset.as_numpy_iterator()))

[23, 54, -1, -89, 100, 10]


In [13]:
tf_dataset_new = tf_dataset.filter(lambda x : x > 0 ).map(lambda y : y*10).shuffle(3).batch(2)
for sales in tf_dataset_new:
    print(sales.numpy())

[ 230 1000]
[100 540]


In [27]:
image_ds = tf.data.Dataset.list_files('Flower Dataset/*/*'  , shuffle = False)

In [28]:
for img in (list(image_ds.take(5).as_numpy_iterator())):
    print(img)

b'Flower Dataset\\daisy\\100080576_f52e8ee070_n_jpg.rf.77fc70826524be2700a7465c39dd2663.jpg'
b'Flower Dataset\\daisy\\10140303196_b88d3d6cec_jpg.rf.2f4f52a5d9739ed87bd185a3af4904ed.jpg'
b'Flower Dataset\\daisy\\10172379554_b296050f82_n_jpg.rf.209e98415d463665863010946d22983e.jpg'
b'Flower Dataset\\daisy\\10172567486_2748826a8b_jpg.rf.b5a2975b3a809e5e38693fa863fc4e8c.jpg'
b'Flower Dataset\\daisy\\10172636503_21bededa75_n_jpg.rf.f76895eb82e0b112474335250a68378c.jpg'


In [30]:
class_names = ['daisy', 'dandelion']
img_count = len(image_ds)
img_count

1821

In [59]:
def splitting(ds , train_size):
    count = len(ds)
    
    train_size = int(count * train_size)
    train_ds = ds.take(train_size) 
    test_ds = ds.skip(train_size) 
    
    len(train_ds) , len(test_ds)
    return train_ds ,test_ds


In [46]:
import os

def get_label(img_path):
    return tf.strings.split(img_path , os.path.sep)[-2]
    
def get_process_img(img_path):
    label = get_label(img_path)
    
    img= tf.io.read_file(img_path)
    img = tf.image.decode_jpeg(img)
    img = tf.image.resize(img, [128,128])
    
    return img,label

In [51]:
train_ds1 =  train_ds.map(get_process_img)
for img,label in train_ds1.take(3):
    print(label , img)

tf.Tensor(b'daisy', shape=(), dtype=string) tf.Tensor(
[[[136.   137.   131.  ]
  [144.5  145.5  140.5 ]
  [147.5  147.5  145.5 ]
  ...
  [151.25 152.25 147.25]
  [153.   154.   149.  ]
  [151.5  152.5  147.5 ]]

 [[134.5  135.5  129.5 ]
  [143.5  144.5  139.5 ]
  [146.   146.   144.  ]
  ...
  [153.   154.   149.  ]
  [152.5  153.5  148.5 ]
  [148.75 149.75 144.75]]

 [[132.25 133.25 127.25]
  [141.   142.   137.  ]
  [144.5  144.5  142.5 ]
  ...
  [155.   156.   151.  ]
  [150.75 151.75 146.75]
  [147.   148.   143.  ]]

 ...

 [[ 42.    46.    23.  ]
  [ 42.    46.    23.  ]
  [ 43.75  47.75  24.75]
  ...
  [128.75 124.75 121.75]
  [127.   123.   120.  ]
  [129.75 125.75 122.75]]

 [[ 44.    48.    25.  ]
  [ 44.    48.    25.  ]
  [ 44.    48.    25.  ]
  ...
  [131.   127.   124.  ]
  [129.   125.   122.  ]
  [129.   125.   122.  ]]

 [[ 44.    48.    25.  ]
  [ 44.    48.    25.  ]
  [ 44.    48.    25.  ]
  ...
  [132.75 128.75 125.75]
  [130.   126.   123.  ]
  [130.   126.   1

In [54]:
type(train_ds1)

tensorflow.python.data.ops.map_op._MapDataset

In [56]:
#scallig img 
def scale(img , label):
    img = img/255
    return img , label

train_ds2 =  train_ds1.map(scale)

for img,label in train_ds2.take(3):
    print(label , img[0][0])


tf.Tensor(b'daisy', shape=(), dtype=string) tf.Tensor([0.53333336 0.5372549  0.5137255 ], shape=(3,), dtype=float32)
tf.Tensor(b'daisy', shape=(), dtype=string) tf.Tensor([0.83235294 0.8519608  0.8754902 ], shape=(3,), dtype=float32)
tf.Tensor(b'daisy', shape=(), dtype=string) tf.Tensor([0.5009804  0.5117647  0.44607842], shape=(3,), dtype=float32)


# All in one single Line : Pipelining

In [57]:
train_ds_3 = train_ds.map(get_process_img).map(scale).shuffle(3)

In [58]:
for img,label in train_ds_3.take(3):
    print(label , img[0][0])

tf.Tensor(b'daisy', shape=(), dtype=string) tf.Tensor([0.53333336 0.5372549  0.5137255 ], shape=(3,), dtype=float32)
tf.Tensor(b'daisy', shape=(), dtype=string) tf.Tensor([0.83235294 0.8519608  0.8754902 ], shape=(3,), dtype=float32)
tf.Tensor(b'daisy', shape=(), dtype=string) tf.Tensor([0.5009804  0.5117647  0.44607842], shape=(3,), dtype=float32)
