<a href="https://colab.research.google.com/github/hindia-vic/deep_learning/blob/main/input_pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf

In [3]:
daily_sales_numbers = [21, 22, -108, 31, -1, 32, 34, 31]

In [6]:
tf_dataset=tf.data.Dataset.from_tensor_slices(daily_sales_numbers)

In [7]:
#view data in tf_dataset
for sales in tf_dataset:
    print(sales.numpy())

21
22
-108
31
-1
32
34
31


In [8]:
#filtering negative number
tf_dataset=tf_dataset.filter(lambda x:x>0)

In [9]:
for sales in tf_dataset:
    print(sales.numpy())

21
22
31
32
34
31


In [10]:
#apply transformation
tf_dataset=tf_dataset.map(lambda x:x*72)

In [12]:
for sales in tf_dataset:
    print(sales.numpy())

1512
1584
2232
2304
2448
2232


In [13]:
#shuffling element
tf_dataset=tf_dataset.shuffle(2)
for sales in tf_dataset:
    print(sales.numpy())

1584
1512
2304
2232
2232
2448


In [14]:
#creating batches
for sales_batch in tf_dataset.batch(2):
    print(sales_batch.numpy())

[1584 1512]
[2232 2448]
[2304 2232]


In [16]:
#doing the above operation in a single line
tf_dataset=tf.data.Dataset.from_tensor_slices(daily_sales_numbers)
tf_dataset=tf_dataset.filter(lambda x:x>0).map(lambda y:y*72).shuffle(2).batch(2)
for sales in tf_dataset:
    print(sales.numpy())

[1584 1512]
[2232 2448]
[2304 2232]


In [17]:
#downloading cats and dogs dataset from kaggle
import kagglehub

# Download latest version
path = kagglehub.dataset_download("karakaggle/kaggle-cat-vs-dog-dataset")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/karakaggle/kaggle-cat-vs-dog-dataset?dataset_version_number=1...


100%|██████████| 787M/787M [00:13<00:00, 61.3MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/karakaggle/kaggle-cat-vs-dog-dataset/versions/1


In [29]:
list(path.iterdir())

[PosixPath('/root/.cache/kagglehub/datasets/karakaggle/kaggle-cat-vs-dog-dataset/versions/1/kagglecatsanddogs_3367a')]

In [25]:
import pandas as pd
import pathlib
path = pathlib.Path(path)


In [33]:
full=tf.data.Dataset.list_files(str(path /'kagglecatsanddogs_3367a'/ 'PetImages' / '*'/'*.jpg'),shuffle=False)

In [30]:
cats = tf.data.Dataset.list_files(str(path /'kagglecatsanddogs_3367a'/ 'PetImages' / 'Cat' / '*.jpg'))
dogs = tf.data.Dataset.list_files(str(path /'kagglecatsanddogs_3367a' / 'PetImages' / 'Dog' / '*.jpg'))

In [35]:
for file in full.take(5):
    print(file.numpy())

b'/root/.cache/kagglehub/datasets/karakaggle/kaggle-cat-vs-dog-dataset/versions/1/kagglecatsanddogs_3367a/PetImages/Cat/0.jpg'
b'/root/.cache/kagglehub/datasets/karakaggle/kaggle-cat-vs-dog-dataset/versions/1/kagglecatsanddogs_3367a/PetImages/Cat/1.jpg'
b'/root/.cache/kagglehub/datasets/karakaggle/kaggle-cat-vs-dog-dataset/versions/1/kagglecatsanddogs_3367a/PetImages/Cat/10.jpg'
b'/root/.cache/kagglehub/datasets/karakaggle/kaggle-cat-vs-dog-dataset/versions/1/kagglecatsanddogs_3367a/PetImages/Cat/100.jpg'
b'/root/.cache/kagglehub/datasets/karakaggle/kaggle-cat-vs-dog-dataset/versions/1/kagglecatsanddogs_3367a/PetImages/Cat/1000.jpg'


In [41]:
#shuffling
full=full.shuffle(24000)
for file in full.take(5):
    print(file.numpy())

b'/root/.cache/kagglehub/datasets/karakaggle/kaggle-cat-vs-dog-dataset/versions/1/kagglecatsanddogs_3367a/PetImages/Cat/10091.jpg'
b'/root/.cache/kagglehub/datasets/karakaggle/kaggle-cat-vs-dog-dataset/versions/1/kagglecatsanddogs_3367a/PetImages/Dog/4392.jpg'
b'/root/.cache/kagglehub/datasets/karakaggle/kaggle-cat-vs-dog-dataset/versions/1/kagglecatsanddogs_3367a/PetImages/Dog/3026.jpg'
b'/root/.cache/kagglehub/datasets/karakaggle/kaggle-cat-vs-dog-dataset/versions/1/kagglecatsanddogs_3367a/PetImages/Cat/11595.jpg'
b'/root/.cache/kagglehub/datasets/karakaggle/kaggle-cat-vs-dog-dataset/versions/1/kagglecatsanddogs_3367a/PetImages/Cat/9611.jpg'


In [38]:
image_count=len(full)
print(image_count)

24959


In [42]:
trainsize=int(image_count*0.8)
print(trainsize)

19967


In [44]:
#dividing train and test
train_ds=full.take(trainsize)
test_ds=full.skip(trainsize)

In [45]:
len(test_ds)

4992

In [48]:
def get_label(file_path):
    import os
    return tf.strings.split(file_path,os.path.sep)[-2]


In [51]:
def process_image(file_path):
  label=get_label(file_path)
  img=tf.io.read_file(file_path)
  img=tf.image.decode_jpeg(img)
  img=tf.image.resize(img,[128,128])
  return img , label

In [58]:
train_ds=train_ds.map(process_image)
for img, label in train_ds.take(3):
  print('image:',img)
  print('label:',label)

image: tf.Tensor(
[[[ 49.10437   43.10437   54.01062 ]
  [ 43.356934  39.326904  53.68628 ]
  [ 20.53125   16.265625  34.0625  ]
  ...
  [128.86841  120.82153   97.72778 ]
  [166.91272  160.88928  136.79553 ]
  [152.54309  141.51965  119.4259  ]]

 [[ 64.28064   64.17847   73.11658 ]
  [ 51.36902   50.894287  63.65808 ]
  [ 24.612427  23.471802  40.073364]
  ...
  [219.63074  217.43408  217.74963 ]
  [207.27576  207.03955  206.40674 ]
  [231.00415  229.5907   229.50476 ]]

 [[ 85.453     95.98425  101.12488 ]
  [ 50.68445   58.78125   68.69226 ]
  [ 19.78894   25.437378  40.085815]
  ...
  [211.0697   220.39026  229.3042  ]
  [235.177    246.67224  254.59717 ]
  [222.61108  237.01416  243.96106 ]]

 ...

 [[191.8081   184.71436  148.71436 ]
  [164.7782   159.13757  125.85632 ]
  [195.17175  190.43738  161.23425 ]
  ...
  [182.78943  177.78943  147.78943 ]
  [195.57642  190.57642  160.57642 ]
  [169.79224  169.46411  138.06409 ]]

 [[195.13965  188.0459   152.0459  ]
  [194.2937   188.6

In [59]:
def scale(image,label):
  return image/255, label

In [60]:
train_ds=train_ds.map(scale)
for image,label in train_ds.take(5):
  print('image:',image.numpy()[0][0])
  print('label:',label.numpy())


image: [0.52009803 0.6247549  0.4060049 ]
label: b'Dog'
image: [0.2780637  0.30729166 0.3647059 ]
label: b'Cat'
image: [0.29411766 0.28235295 0.24705882]
label: b'Cat'
image: [0.354542   0.40589002 0.40589002]
label: b'Cat'
image: [0.71066177 0.7224265  0.78909314]
label: b'Cat'


In [47]:
train_ds=train_ds.map(lambda x: (x,get_label(x)))
test_ds=test_ds.map(lambda x: (x,get_label(x)))

NameError: name 'train' is not defined