# Implementing Tensorflow Pipeline

In [8]:
import tensorflow as tf

Create tf dataset from a list

In [9]:
daily_sales_numbers = [21, 22, -108, 31, -1, 32, 34,31]
tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_numbers)
tf_dataset

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>

Iterate through tf dataset

In [10]:
for sales in tf_dataset:
    print(sales.numpy())# print(sales) 

# or we can do like this
# or sales in tf_dataset.as_numpy_iterator():
#     print(sales)

21
22
-108
31
-1
32
34
31


Iterate through first n elements in tf dataset

In [11]:
for sales in tf_dataset.take(3):
    print(sales.numpy())

21
22
-108



Filter sales numbers that are < 0

In [None]:
tf_dataset = tf_dataset.filter(lambda x: x > 0)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

21
22
31
32
34
31


Filter sales numbers that are < 0 without lambda

In [13]:
def custom_filter(x):
    return x > 0
tf_dataset = tf_dataset.filter(custom_filter)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

21
22
31
32
34
31


Convert sales numbers from USA dollars ($) to Indian Rupees (INR) Assuming 1->72 conversation rate using map

In [14]:
tf_dataset = tf_dataset.map(lambda x: x*72)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

1512
1584
2232
2304
2448
2232


Shuffe data rows using shuffle with a bucket size

In [16]:

tf_dataset = tf_dataset.shuffle(3)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

2232
2304
1584
2232
2448
1512



Batching using batch

In [18]:
for sales in tf_dataset.batch(4):
    print(sales.numpy())

[1512 2304 2448 1584]
[2232 2232]


Perform all of the above operations in one shot

In [20]:
tf_dataset_one_shot = tf.data.Dataset.from_tensor_slices(daily_sales_numbers).filter(lambda x: x > 0).map(lambda y: y * 72).shuffle(4)

for sales in tf_dataset_one_shot:
    print(sales.numpy())

2304
1584
2448
1512
2232
2232


# Lets work with some images in this PC

In [30]:
images_ds = tf.data.Dataset.list_files('../extraction_dump/images/*/*', shuffle=False)


In [31]:
image_count = len(images_ds)
image_count

130

In [32]:
type(images_ds)

tensorflow.python.data.ops.from_tensor_slices_op._TensorSliceDataset

In [33]:

for file in images_ds.take(3):
    print(file.numpy())

b'..\\extraction_dump\\images\\cat\\20 Reasons Why Cats Make the Best Pets....jpg'
b'..\\extraction_dump\\images\\cat\\7 Foods Your Cat Can_t Eat.jpg'
b'..\\extraction_dump\\images\\cat\\A cat appears to have caught the....jpg'


In [None]:
#some shuffling
images_ds = images_ds.shuffle(200)
for file in images_ds.take(3):
    print(file.numpy())

b'..\\extraction_dump\\images\\dog\\The US Army is testing augmented....jpg'
b'..\\extraction_dump\\images\\dog\\Aggression in dogs _ Animal Humane Society.jpg'
b'..\\extraction_dump\\images\\dog\\why dogs understand our body language....jpg'


In [35]:
class_names = ["cat","dog"]

In [37]:
train_size = int(image_count*0.8)
train_ds = images_ds.take(train_size)
test_ds = images_ds.skip(train_size)

In [38]:
print(len(train_ds),len(test_ds))

104 26


we use os split , instead of simple split , to maintain portability,Different operating systems use different separators 

In [39]:
import os
def get_label(file_path):
    parts = tf.strings.split(file_path,os.path.sep)
    return parts[-2]

In [42]:
get_label("..\\extraction_dump\\images\\dog\\The US Army is testing augmented....jpg")

<tf.Tensor: shape=(), dtype=string, numpy=b'dog'>

In [43]:
def process_image(file_path):
    label = get_label(file_path)
    img = tf.io.read_file(file_path) # load the raw data from the file as a string
    img = tf.image.decode_jpeg(img)
    img = tf.image.resize(img, [128, 128])
    return img, label

In [45]:
img, label = process_image("..\\extraction_dump\\images\\cat\\20 Reasons Why Cats Make the Best Pets....jpg")
img.numpy()[:2]

array([[[155.      , 186.      , 215.      ],
        [156.      , 187.      , 216.      ],
        [158.      , 189.      , 218.      ],
        [160.0039  , 189.0039  , 219.0039  ],
        [161.0039  , 190.0039  , 220.0039  ],
        [162.      , 191.      , 221.      ],
        [166.      , 193.      , 222.      ],
        [167.      , 194.      , 223.      ],
        [168.      , 195.      , 224.      ],
        [169.      , 196.      , 225.      ],
        [170.      , 197.      , 224.      ],
        [170.0039  , 197.0039  , 224.0039  ],
        [172.      , 199.      , 226.      ],
        [173.0039  , 199.0039  , 224.0039  ],
        [174.0039  , 200.0039  , 225.0039  ],
        [175.0039  , 201.0039  , 226.0039  ],
        [176.      , 202.      , 227.      ],
        [177.0039  , 203.0039  , 228.0039  ],
        [177.0039  , 203.0039  , 228.0039  ],
        [179.0039  , 203.0039  , 227.0039  ],
        [180.0039  , 204.0039  , 228.0039  ],
        [180.0039  , 204.0039  , 2

In [None]:
train_ds = train_ds.map(process_image) # processing test and train datasets 
test_ds = test_ds.map(process_image)

In [None]:
def scale(image, label):
    return image/255, label #normalizing image

In [49]:

train_ds = train_ds.map(scale)
test_ds = test_ds.map(scale)

In [50]:

for image, label in train_ds.take(5):
    print("****Image: ",image.numpy()[0][0])
    print("****Label: ",label.numpy())

****Image:  [0.00246059 0.00202999 0.0016609 ]
****Label:  b'cat'
****Image:  [0.00121852 0.00121852 0.00121852]
****Label:  b'dog'
****Image:  [0.00202708 0.00165799 0.0015042 ]
****Label:  b'dog'
****Image:  [0.00197455 0.00183128 0.00099602]
****Label:  b'dog'
****Image:  [0.0035383  0.00089761 0.00010681]
****Label:  b'cat'


Hence we successfully created test and training data