## Importing the required libraries

In [152]:
import tensorflow as tf
import os

## small dataset

sales numbers can't be negative this is called data errors

In [153]:
daily_sales_numbers = [21, 22, -108, 31, -1, 32, 34, 31]

## Converting in tensorflow object

In [154]:
tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_numbers)
tf_dataset

<TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>

## Printing tensorflow object in data form

In [155]:
# for sales in tf_dataset:
#     print(sales.numpy())
    
# iterate tfdataset directly using .numpy() function or .as_numpy_iterator()
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

21
22
-108
31
-1
32
34
31


## take function --> prints only first n element

see only first n elements

In [156]:
n = 3
for sales in tf_dataset.take(n):
    print(sales.numpy())

21
22
-108


## filter function --> using filter function we can filter out the data, here we are filtering negative data

using filter function we can filter out the negative data

In [157]:
tf_dataset = tf_dataset.filter(lambda x: x > 0)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

21
22
31
32
34
31


## map function

suppose this data is in us dollars and we want to convert this in indan rupees

In [158]:
tf_dataset = tf_dataset.map(lambda y: y*72)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

1512
1584
2232
2304
2448
2232


## shuffle function

to shuffle the element or dataset

https://stackoverflow.com/questions/53514495/what-does-batch-repeat-and-shuffle-do-with-tensorflow-dataset

In [159]:
buffer = 3
tf_dataset = tf_dataset.shuffle(buffer)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

1512
2304
2232
2448
1584
2232


## batch function

create the batch of the dataset, batching the traing samples and distributing them on multi GPU environment

In [137]:
batch_size = 3
for sales_batch in tf_dataset.batch(batch_size):
    print(sales_batch.numpy())

[1584 2304 1512]
[2232 2448 2232]


## All above function in single line

whatever we have done in above steps we can do all that thing in single line

In [160]:
tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_numbers)
buffer = 3
batch_size = 3
tf_dataset = tf_dataset.filter(lambda x: x > 0).map(lambda y: y*72).shuffle(buffer).batch(batch_size)
for sales_batch in tf_dataset:
    print(sales_batch.numpy())

[1512 2232 2304]
[2448 2232 1584]


# Tensorflow input pipline is reading the data from a datasource than doing filtering, mapping, shuffling and creating batches (all kind of transformations)


## loading the dataset in tensorflow object

In [161]:
images_ds = tf.data.Dataset.list_files('images/*/*', shuffle = False)
for files in images_ds.take(5):
    print(files.numpy())

b'images/cat/20 Reasons Why Cats Make the Best Pets....jpg'
b'images/cat/7 Foods Your Cat Can_t Eat.jpg'
b'images/cat/A cat appears to have caught the....jpg'
b'images/cat/Adopt-A-Cat Month\xc2\xae - American Humane....jpg'
b'images/cat/All About Your Cat_s Tongue.jpg'


## shuffling the dataset with buffer size of 200

In [162]:
images_ds = images_ds.shuffle(200)

for file in images_ds.take(5):
    print(file.numpy())

b'images/dog/Aggression in dogs _ Animal Humane Society.jpg'
b'images/cat/Cat Throwing Up_ Normal or Cause for....jpg'
b'images/dog/The 25 Cutest Dog Breeds - Most....jpg'
b'images/dog/Colitis in Dogs _ VCA Animal Hospital.jpg'
b'images/dog/25 Best Small Dog Breeds \xe2\x80\x94 Cute and....jpg'


## creating labels

In [163]:
class_name = ['cat', 'dog']

## counting total images

In [164]:
image_count = len(images_ds)
image_count

130

## spliting the data into testing(20%) and training(80%) samples  

.take = It will take the image accoring to the input size

.skip = skip is the opposite of take

In [165]:
training_size = int(image_count * 0.8)

train_ds = images_ds.take(training_size)
test_ds = images_ds.skip(training_size)

## varifying the split

In [166]:
print(len(train_ds))
print(len(test_ds))

104
26


## finding the image label from there folder name

In [167]:
s = 'images/cat/Giving cats food with an antibody may....jpg'

# spliting_name = s.split('/')
# spliting_name = spliting_name[-2]
# spliting_name

# or directly 
s.split('/')[-2]

'cat'

## creating the function using tensorflow method which will give the label name

In [168]:
def get_label(file_path):
    return tf.strings.split(file_path, os.path.sep)[-2]

## creating the function which will process the image like decoding it and changing the image dimension in 128*128 px

In [169]:
def process_image(file_path):
    label = get_label(file_path)
    img = tf.io.read_file(file_path)
    img = tf.image.decode_jpeg(img)
    img = tf.image.resize(img, [128,128])
    
    return img, label

In [170]:
train_ds = train_ds.map(process_image)
for img, label in train_ds.take(3):
    print("Image: ", img)
    print("Label: ", label)

Image:  tf.Tensor(
[[[6.54062500e+01 8.14062500e+01 1.07406250e+02]
  [6.86093750e+01 8.46093750e+01 1.10609375e+02]
  [7.17968750e+01 8.77968750e+01 1.13796875e+02]
  ...
  [5.06093750e+01 7.06093750e+01 9.56093750e+01]
  [4.90317383e+01 6.90317383e+01 9.40317383e+01]
  [4.65053711e+01 6.65053711e+01 9.15053711e+01]]

 [[6.64379883e+01 8.34379883e+01 1.09437988e+02]
  [6.56093750e+01 8.26093750e+01 1.08609375e+02]
  [6.44062500e+01 8.14062500e+01 1.07406250e+02]
  ...
  [5.60864258e+01 7.60864258e+01 1.01086426e+02]
  [5.16093750e+01 7.16093750e+01 9.66093750e+01]
  [6.22968750e+01 7.92968750e+01 1.05296875e+02]]

 [[6.12812500e+01 8.32812500e+01 1.07281250e+02]
  [6.30131836e+01 8.50131836e+01 1.09013184e+02]
  [6.11782227e+01 8.31782227e+01 1.07178223e+02]
  ...
  [5.16093750e+01 7.16093750e+01 9.66093750e+01]
  [5.66562500e+01 7.66562500e+01 1.01656250e+02]
  [7.42856445e+01 8.82856445e+01 1.15285645e+02]]

 ...

 [[1.28564453e+00 1.28564453e+00 1.28564453e+00]
  [1.16943359e+00 1.

## scaling the image inbetween 0 and 1

In [171]:
def scale(image, label):
    return image/255, label

## checking all above operations

In [172]:
train_ds = train_ds.map(scale)
for image, label in train_ds.take(5):
    print("*****Image: ", image.numpy()[0][0])
    print("*****Label: ", label.numpy())

*****Image:  [0.03999694 0.07136948 0.01254596 1.        ]
*****Label:  b'dog'
*****Image:  [0.44313726 0.28627452 0.04705882]
*****Label:  b'dog'
*****Image:  [0.10364583 0.23477328 0.09779412]
*****Label:  b'cat'
*****Image:  [0.99607843 0.99607843 0.99607843]
*****Label:  b'dog'
*****Image:  [0.49607843 0.4254902  0.38627452]
*****Label:  b'cat'
