In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [2]:
import tensorflow as tf

In [3]:
arr = [32,45,12,34,78,65,76,-23,-108,-8,89,10]
tf_dataset = tf.data.Dataset.from_tensor_slices(arr)

In [4]:
for i in tf_dataset:
    print(i.numpy())

32
45
12
34
78
65
76
-23
-108
-8
89
10


In [5]:
for i in tf_dataset.as_numpy_iterator():
    print(i)

32
45
12
34
78
65
76
-23
-108
-8
89
10


In [6]:
len(tf_dataset)

12

In [7]:
def dollar_into_rupee(x):
    return x*83

In [8]:
#filter function
tf_dataset = tf_dataset.filter(lambda x:x>0)

In [9]:
for i in tf_dataset.as_numpy_iterator():
    print(i)

32
45
12
34
78
65
76
89
10


In [10]:
tf_dataset = tf_dataset.map(dollar_into_rupee)#same as pandas .apply functionality

In [11]:
for i in tf_dataset.as_numpy_iterator():
    print(i)

2656
3735
996
2822
6474
5395
6308
7387
830


In [12]:
for i in tf_dataset.take(4):#this will take only 4 values from beginning
    print(i)

tf.Tensor(2656, shape=(), dtype=int32)
tf.Tensor(3735, shape=(), dtype=int32)
tf.Tensor(996, shape=(), dtype=int32)
tf.Tensor(2822, shape=(), dtype=int32)


# Shuffle in tf.data.Dataset

In [13]:
for i in tf_dataset.shuffle(3).as_numpy_iterator():
    print(i)

2656
2822
3735
6474
6308
996
7387
830
5395


# Batch

In [15]:
for i in tf_dataset.batch(3).as_numpy_iterator():
    print(i,type(i))

[2656 3735  996] <class 'numpy.ndarray'>
[2822 6474 5395] <class 'numpy.ndarray'>
[6308 7387  830] <class 'numpy.ndarray'>


# Files - .list_files()

In [60]:
image_ds = tf.data.Dataset.list_files("data/PlantVillage/*/*",shuffle=True)# * is a placeholder for dynamic path elements

In [61]:
for i in image_ds.take(5):
    print(i.numpy())

b'data/PlantVillage/Potato___Late_blight/0d8346e1-4624-4979-84e6-1353ca59007a___RS_LB 4079.JPG'
b'data/PlantVillage/Potato___Early_blight/46dc77f0-be88-4887-b1f7-82adff9aaa39___RS_Early.B 7750.JPG'
b'data/PlantVillage/Potato___Late_blight/33b849d5-ec92-4acb-88e6-530accf22734___RS_LB 4994.JPG'
b'data/PlantVillage/Potato___Late_blight/80bdc2a3-66b8-4d95-abe7-65c7806f7cbf___RS_LB 3073.JPG'
b'data/PlantVillage/Potato___healthy/9a6eb7c4-6b43-477a-89e0-69f62ef67991___RS_HL 1846.JPG'


In [62]:
# beacuse s will be a Tensor and not a python string, We will have to use tf.strings.split(Tensor,delimiter)
def get_label(filepath):
    return tf.strings.split(tf.strings.split(filepath,"/")[-2],"___")[-1]

In [63]:
def process_img(filepath):
    label = get_label(filepath)
    img = tf.io.read_file(filepath)
    img = tf.image.decode_jpeg(img)
    img = tf.image.resize(img,[128,128])
    
    return img,label

In [67]:
int(len(image_ds)*0.8)

1721

In [68]:
train_size = int(len(image_ds)*0.8)
train_ds = image_ds.take(train_size)
test_ds = image_ds.skip(train_size)

In [71]:
train_ds = train_ds.map(process_img)

In [73]:
test_ds = test_ds.map(process_img)

In [74]:
for img,label in test_ds.take(3):
    print("***Image",img[0][0])
    print("***label",label)

***Image tf.Tensor([194.25 189.25 195.25], shape=(3,), dtype=float32)
***label tf.Tensor(b'Early_blight', shape=(), dtype=string)
***Image tf.Tensor([172.5  165.75 172.  ], shape=(3,), dtype=float32)
***label tf.Tensor(b'Early_blight', shape=(), dtype=string)
***Image tf.Tensor([128.75 117.75 125.75], shape=(3,), dtype=float32)
***label tf.Tensor(b'Late_blight', shape=(), dtype=string)


In [75]:
def scale(img,label):
    return img/255,label

In [78]:
train_ds = train_ds.map(scale)
test_ds = test_ds.map(scale)

In [79]:
for img,label in test_ds.take(3):
    print("***Image",img[0][0])
    print("***label",label)

***Image tf.Tensor([0.6735294 0.6460784 0.6735294], shape=(3,), dtype=float32)
***label tf.Tensor(b'Late_blight', shape=(), dtype=string)
***Image tf.Tensor([0.46764705 0.45196077 0.45588234], shape=(3,), dtype=float32)
***label tf.Tensor(b'Early_blight', shape=(), dtype=string)
***Image tf.Tensor([0.57058823 0.5470588  0.55490196], shape=(3,), dtype=float32)
***label tf.Tensor(b'Late_blight', shape=(), dtype=string)
