# Creating a Tensorflow Input Pipeline


In [1]:
import tensorflow as tf
import numpy as np

In [2]:
daily_sales_numbers=[21,22,-108,31,-1,32,34,31] # Python List

## Creating a Tensorflow Dataset or tensors

> Individual element in the tensorflow dataset is called as a tensor.

In [3]:
tf_dataset=tf.data.Dataset.from_tensor_slices(daily_sales_numbers)
tf_dataset

<TensorSliceDataset shapes: (), types: tf.int32>

In [4]:
tensors=[]
for tensor in tf_dataset:
  tensors.append(tensor)
  print(tensor)

tf.Tensor(21, shape=(), dtype=int32)
tf.Tensor(22, shape=(), dtype=int32)
tf.Tensor(-108, shape=(), dtype=int32)
tf.Tensor(31, shape=(), dtype=int32)
tf.Tensor(-1, shape=(), dtype=int32)
tf.Tensor(32, shape=(), dtype=int32)
tf.Tensor(34, shape=(), dtype=int32)
tf.Tensor(31, shape=(), dtype=int32)


## Converting the individual tensor into numpy object.

In [5]:
for tensor in tf_dataset:
  print(tensor.numpy())

21
22
-108
31
-1
32
34
31


In [6]:
for tensor in tf_dataset.as_numpy_iterator():
  print(tensor)

21
22
-108
31
-1
32
34
31


In [7]:
# tensors

In [8]:
# np.array(tensors)

In [9]:
 for sale in tf_dataset.take(3):
   print(sale.numpy())

21
22
-108


## Filtering the invalid data points

> As we are building the Tensorflow pipeline, we need to get rid of the invalid data points and we can do so by 'filtering' them out. We can do so by using the `.filter()` and suppling the filter function that we have created to it.

In [10]:
tf_dataset_filtered=tf_dataset.filter(lambda x: x>0)
for data in tf_dataset_filtered.as_numpy_iterator():
  print(data)

21
22
31
32
34
31


## Applying transformation to filtered data

> The data points are in `dollars(\$)` and we want to convert them into `INR(Indian Rupees)` as we are analyzing the Indian market. `1$= 75INR`

We are going to use the `.map()`

In [11]:
tf_dataset_transformed=tf_dataset_filtered.map(lambda x: x*75)
for data in tf_dataset_transformed.as_numpy_iterator():
  print(data)

1575
1650
2325
2400
2550
2325


## Randomly Shuffle the elements

> We want to randomly shuffle the elements when we are doing image classification especially because when we are training on the elements, we have to make sure that the model doesn't learn based on the numbering or arrangements of the elements(images here) in the dataset. That's we do shuffling on the dataset.

In [12]:
tf_dataset_shuffled=tf_dataset_transformed.shuffle(3) # '3' is the buffer size of the shuffle().
for data in tf_dataset_shuffled.as_numpy_iterator():
  print(data)

1575
1650
2400
2325
2550
2325


In [13]:
tf_dataset_shuffled=tf_dataset_transformed.shuffle(3)
for data in tf_dataset_shuffled.as_numpy_iterator():
  print(data)

1650
2400
1575
2325
2550
2325


In [14]:
tf_dataset_shuffled=tf_dataset_transformed.shuffle(2)
for data in tf_dataset_shuffled.as_numpy_iterator():
  print(data)

1575
2325
1650
2400
2550
2325


In [15]:
for batch in tf_dataset_shuffled:
  print(batch.numpy())

1575
1650
2325
2400
2550
2325


## Creating batches of the dataset

> We are creating batches of the dataset for easier computation and processing of the dataset, if the dataset is huge.

In [16]:
for batch in tf_dataset_shuffled.batch(2): # '2' is the batch size
  print(batch.numpy())

[1650 2325]
[2400 2550]
[2325 1575]


In [17]:
for batch in tf_dataset_shuffled.batch(4):
  print(batch.numpy())

[1650 1575 2325 2400]
[2325 2550]


## Tensorflow Input Pipeline Code

> We are reading the data from the data source and then we are doing filtering, mapping, shuffling, and finally creating batches of the data.

In [18]:
tf_dataset=tf.data.Dataset.from_tensor_slices(daily_sales_numbers)

tf_dataset_processed=tf_dataset.filter(lambda x:x >0).map(lambda y:y*75).shuffle(2).batch(2)

for data in tf_dataset_processed.as_numpy_iterator():
  print(data)

[1650 1575]
[2325 2550]
[2400 2325]


## Importing dataset of images(Fish and Birds)

In [22]:
images__path_dataset=tf.data.Dataset.list_files('/content/drive/MyDrive/Colab Notebooks/Images/*/*',shuffle=True) # Stored the Image paths


for image in images__path_dataset.take(3):
  print(image.numpy())

b'/content/drive/MyDrive/Colab Notebooks/Images/Fishs/proxy-image (4).jpg'
b'/content/drive/MyDrive/Colab Notebooks/Images/Fishs/proxy-image (17).jpg'
b'/content/drive/MyDrive/Colab Notebooks/Images/Birds/proxy-image (2).jpg'


## Shuffling the images paths

In [23]:
images__path_dataset_shuffle=images__path_dataset.shuffle(10)

for path in images__path_dataset.take(5):
  print(path.numpy()) 

b'/content/drive/MyDrive/Colab Notebooks/Images/Birds/proxy-image.jpg'
b'/content/drive/MyDrive/Colab Notebooks/Images/Fishs/proxy-image (22).jpg'
b'/content/drive/MyDrive/Colab Notebooks/Images/Fishs/proxy-image (7).jpg'
b'/content/drive/MyDrive/Colab Notebooks/Images/Birds/proxy-image (5).jpg'
b'/content/drive/MyDrive/Colab Notebooks/Images/Birds/proxy-image (2).jpg'


In [24]:
class_names= ['Fish','Bird']

In [26]:
image_count=len(images__path_dataset_shuffle)
image_count

46

In [28]:
train_size=int(image_count*0.8)

train_ds=images__path_dataset_shuffle.take(train_size)
test_ds=images__path_dataset_shuffle.skip(train_size)

In [29]:
len(train_ds)

36

In [30]:
len(test_ds)

10

In [33]:
s='/content/drive/MyDrive/Colab Notebooks/Images/Birds/proxy-image.jpg'

s.split('/')

['',
 'content',
 'drive',
 'MyDrive',
 'Colab Notebooks',
 'Images',
 'Birds',
 'proxy-image.jpg']

In [39]:
s.split('/')[6]

'Birds'

In [40]:
s.split('/')[-2]

'Birds'

## Getting all the labels('y') of the images

`y_train`

In [41]:
def get_label(file_path):
    import os
    parts = tf.strings.split(file_path, os.path.sep) # OS separator
    return parts[-2]

In [42]:
for ele in train_ds.take(4):
  print(ele.numpy())

b'/content/drive/MyDrive/Colab Notebooks/Images/Fishs/proxy-image (3).jpg'
b'/content/drive/MyDrive/Colab Notebooks/Images/Fishs/proxy-image (5).jpg'
b'/content/drive/MyDrive/Colab Notebooks/Images/Fishs/proxy-image (27).jpg'
b'/content/drive/MyDrive/Colab Notebooks/Images/Fishs/proxy-image (8).jpg'


In [44]:
for label in train_ds.map(get_label):
  print(label) # 'y' or labels of the images

tf.Tensor(b'Fishs', shape=(), dtype=string)
tf.Tensor(b'Fishs', shape=(), dtype=string)
tf.Tensor(b'Fishs', shape=(), dtype=string)
tf.Tensor(b'Fishs', shape=(), dtype=string)
tf.Tensor(b'Fishs', shape=(), dtype=string)
tf.Tensor(b'Fishs', shape=(), dtype=string)
tf.Tensor(b'Fishs', shape=(), dtype=string)
tf.Tensor(b'Fishs', shape=(), dtype=string)
tf.Tensor(b'Fishs', shape=(), dtype=string)
tf.Tensor(b'Fishs', shape=(), dtype=string)
tf.Tensor(b'Fishs', shape=(), dtype=string)
tf.Tensor(b'Fishs', shape=(), dtype=string)
tf.Tensor(b'Birds', shape=(), dtype=string)
tf.Tensor(b'Fishs', shape=(), dtype=string)
tf.Tensor(b'Fishs', shape=(), dtype=string)
tf.Tensor(b'Birds', shape=(), dtype=string)
tf.Tensor(b'Birds', shape=(), dtype=string)
tf.Tensor(b'Fishs', shape=(), dtype=string)
tf.Tensor(b'Birds', shape=(), dtype=string)
tf.Tensor(b'Birds', shape=(), dtype=string)
tf.Tensor(b'Birds', shape=(), dtype=string)
tf.Tensor(b'Fishs', shape=(), dtype=string)
tf.Tensor(b'Fishs', shape=(), dt

## Getting the images('X')

`X_Train`

In [45]:
def process_image(file_path):
    label = get_label(file_path)
    img = tf.io.read_file(file_path) # load the raw data from the file as a string
    img = tf.image.decode_jpeg(img) # To decode the jpeg image
    img = tf.image.resize(img, [128, 128]) # Since images are of different sizes and make them of a particular size
    return img, label

In [48]:
train_ds=train_ds.map(process_image)
for img, label in train_ds.take(1):
  print('Image: ', img)
  print('Label:', label)

Image:  tf.Tensor(
[[[137.48956  141.48956  142.48956 ]
  [135.31145  139.31145  140.31145 ]
  [149.26935  153.26935  154.26935 ]
  ...
  [151.86183  151.86183  151.86183 ]
  [146.57236  146.57236  146.57236 ]
  [154.25023  154.25023  154.25023 ]]

 [[122.55214  126.55214  127.55214 ]
  [136.10118  140.10118  141.10118 ]
  [130.9139   134.9139   135.9139  ]
  ...
  [157.04028  157.04028  157.04028 ]
  [159.89548  159.89548  159.89548 ]
  [150.10751  150.10751  150.10751 ]]

 [[146.03787  150.03787  151.03787 ]
  [135.00319  139.00319  140.00319 ]
  [120.23625  124.23625  125.23625 ]
  ...
  [156.134    156.134    156.134   ]
  [169.74219  169.74219  169.74219 ]
  [165.50606  165.50606  165.50606 ]]

 ...

 [[137.59897  141.59897  142.59897 ]
  [132.61583  136.61583  137.61583 ]
  [133.05469  137.05469  138.05469 ]
  ...
  [ 94.110565  94.110565  94.110565]
  [105.31386  105.31386  105.31386 ]
  [ 92.24574   92.24574   92.24574 ]]

 [[147.51054  151.51054  152.51054 ]
  [142.27975  146.

## Scaling the images between 0-1

In [49]:
def scale(image, label):
  return image/255, label

In [51]:
train_ds=train_ds.map(scale)
for image, label in train_ds.take(5):
  print("Image: ", image.numpy()[0][0])
  print("Label: ", image.numpy())

Image:  [1.         0.99607843 1.        ]
Label:  [[[1.         0.99607843 1.        ]
  [1.         0.99607843 1.        ]
  [1.         0.99897367 1.        ]
  ...
  [1.         1.         1.        ]
  [1.         1.         1.        ]
  [1.         1.         1.        ]]

 [[1.         0.99607843 1.        ]
  [1.         0.99607843 1.        ]
  [1.         0.99897367 1.        ]
  ...
  [1.         1.         1.        ]
  [1.         1.         1.        ]
  [1.         1.         1.        ]]

 [[1.         0.99607843 1.        ]
  [1.         0.99607843 1.        ]
  [1.         0.99897367 1.        ]
  ...
  [1.         1.         1.        ]
  [1.         1.         1.        ]
  [1.         1.         1.        ]]

 ...

 [[1.         1.         1.        ]
  [1.         1.         1.        ]
  [1.         1.         1.        ]
  ...
  [1.         1.         1.        ]
  [1.         1.         1.        ]
  [1.         1.         1.        ]]

 [[1.         1.       