In [80]:
# import required libraries
import tensorflow as tf
import matplotlib.pyplot as plt
import os

In [17]:
# declare a list of random values
daily_sales = [23, 45, 11, -1, -34, 0, 12, 33]

In [18]:
# make a pipeline using tensorflow
dataset = tf.data.Dataset.from_tensor_slices(daily_sales)

In [19]:
dataset

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>

In [20]:
# iterate through dataset and show elements
for element in dataset:
    print(element.numpy())

23
45
11
-1
-34
0
12
33


In [21]:
# if want to show selected no of elements
for i in dataset.take(3):
    print(i.numpy())

23
45
11


In [22]:
# now remove negative values from dataset
dataset = dataset.filter(lambda x: x >= 0)

In [23]:
# iterate again and check
for j in dataset.as_numpy_iterator():
    print(j)

23
45
11
0
12
33


In [24]:
# convert above data to pkr as it is in usd
dataset = dataset.map(lambda y: y*279)

In [25]:
for k in dataset.as_numpy_iterator():
    print(k)

6417
12555
3069
0
3348
9207


In [28]:
# shuffle data elements
dataset = dataset.shuffle(2)

for m in dataset.as_numpy_iterator():
    print(m)

0
9207
3348
6417
12555
3069


In [31]:
# we can create batches 

for j in dataset.batch(3):
    print(j.numpy())

[3069 6417 3348]
[ 9207     0 12555]


In [36]:
# combine all the above operations in just one line and make a pipeline
dataset_pipe = tf.data.Dataset.from_tensor_slices(daily_sales)
dataset_pipe = dataset_pipe.filter(lambda x: x >= 0).map(lambda y: y*279).shuffle(2).batch(3)

In [37]:
dataset_pipe

<_BatchDataset element_spec=TensorSpec(shape=(None,), dtype=tf.int32, name=None)>

In [39]:
for i in dataset_pipe:
    print(i.numpy())

[ 6417 12555  3069]
[3348 9207    0]


# Pipeline To Read Image Files

In [110]:
# read images from the directory
images_dataset = tf.data.Dataset.list_files(r"C:\Users\Arfan Shah\Train\*\*", shuffle = True)

# for loop to iterate over directory and then read images
for i in images_dataset.take(5):
    print(i.numpy())

b'C:\\Users\\Arfan Shah\\Train\\Potato___Late_blight\\0085ef03-aec3-431a-99a1-de286e10c0cf___RS_LB 2949.JPG'
b'C:\\Users\\Arfan Shah\\Train\\Potato___Healthy\\762672f4-17e6-4fd6-84f1-3bcb091b3ca5___RS_HL 5417_newGRR.JPG'
b'C:\\Users\\Arfan Shah\\Train\\Potato___Early_blight\\c9d5a9b6-4e5c-42df-8f9b-5f57e13000df___RS_Early.B 6933.JPG'
b'C:\\Users\\Arfan Shah\\Train\\Potato___Early_blight\\cfd3112d-e376-4b82-bc06-de0bfba0ab41___RS_Early.B 8389_180deg.JPG'
b'C:\\Users\\Arfan Shah\\Train\\Potato___Early_blight\\70b89c8b-2850-4ec8-8da1-a4b9de5351c7___RS_Early.B 6889_flipTB.JPG'


In [111]:
# declare classes in directory
num_classes = ['Early_blight', 'Healthy', 'Late_blight']

In [112]:
# number of images in directory
image_count = len(images_dataset)
image_count

5702

In [113]:
# training size from the actual dataset
train_size = int(image_count*0.8)
train_size

4561

In [114]:
# training dataset
train_dataset = images_dataset.take(train_size)

# test dataset
test_dataset = images_dataset.skip(train_size)

In [115]:
# legth of training and testing dataset
print(len(train_dataset))
print(len(test_dataset))

4561
1141


In [116]:
# file path
string = 'C:\\Users\\Arfan Shah\\Train\\Potato___Late_blight\\d438f9b3-2952-425e-8c5f-a92a21b35f5f___RS_LB 4470_180deg.JPG'
string.split('\\')[-2]

'Potato___Late_blight'

In [117]:
# find labels in train dataset
for i in train_dataset.take(3):
    print(i.numpy())

b'C:\\Users\\Arfan Shah\\Train\\Potato___Early_blight\\275c3337-11ee-487c-9093-bcd5e310c72c___RS_Early.B 7671_flipTB.JPG'
b'C:\\Users\\Arfan Shah\\Train\\Potato___Early_blight\\cfd3112d-e376-4b82-bc06-de0bfba0ab41___RS_Early.B 8389_flipTB.JPG'
b'C:\\Users\\Arfan Shah\\Train\\Potato___Early_blight\\c43a5fd7-c60c-4a04-9835-bd35b551ec44___RS_Early.B 6701_180deg.JPG'


In [118]:
# function to find label path

def get_label(file):
    return tf.strings.split(file, os.path.sep)[-2]

In [120]:
# function to find both x and y parts
def process_dataset(file):
    label = get_label(file)
    image = tf.io.read_file(file)

    # decode jpeg image
    image = tf.image.decode_jpeg(image)

    # resize images
    image = tf.image.resize(image, [256, 256])
    return image, label

In [125]:
# now find label for whole training dataset
train_dataset = train_dataset.map(process_dataset)
for image, label in train_dataset.take(1):
    print("Image: ", image)
    print("Label: ", label)

Image:  tf.Tensor(
[[[189. 193. 204.]
  [201. 205. 216.]
  [205. 209. 220.]
  ...
  [141. 143. 156.]
  [157. 159. 172.]
  [137. 139. 152.]]

 [[199. 203. 214.]
  [198. 202. 213.]
  [188. 192. 203.]
  ...
  [127. 129. 142.]
  [142. 144. 157.]
  [138. 140. 153.]]

 [[193. 197. 208.]
  [188. 192. 203.]
  [184. 188. 199.]
  ...
  [122. 124. 137.]
  [133. 135. 148.]
  [137. 139. 152.]]

 ...

 [[152. 154. 166.]
  [149. 151. 163.]
  [146. 148. 160.]
  ...
  [ 69.  71.  83.]
  [102. 104. 116.]
  [159. 161. 173.]]

 [[150. 152. 164.]
  [146. 148. 160.]
  [144. 146. 158.]
  ...
  [133. 135. 148.]
  [129. 131. 144.]
  [ 93.  95. 108.]]

 [[158. 160. 172.]
  [154. 156. 168.]
  [151. 153. 165.]
  ...
  [103. 105. 118.]
  [105. 107. 120.]
  [107. 109. 122.]]], shape=(256, 256, 3), dtype=float32)
Label:  tf.Tensor(b'Potato___Early_blight', shape=(), dtype=string)


In [126]:
# now scale the image data
def scale(image, label):
    return image/255, label

In [127]:
train_dataset = train_dataset.map(scale)

In [128]:
for image, label in train_dataset.take(2):
    print("Image: ", image.numpy()[0][0])
    print("Label: ", label.numpy())

Image:  [0.69803923 0.6862745  0.7137255 ]
Label:  b'Potato___Late_blight'
Image:  [0.49803922 0.48235294 0.5411765 ]
Label:  b'Potato___Early_blight'
