In [1]:
import tensorflow as tf
import time


tf.__version__

'2.6.0'

In [2]:
sales_num = [8, 3, -1, 0, 8, 2, -32, 1]
dataset = tf.data.Dataset.from_tensor_slices(sales_num)
dataset

<TensorSliceDataset shapes: (), types: tf.int32>

In [3]:
for sales in dataset:
    print(sales.numpy())

8
3
-1
0
8
2
-32
1


In [4]:
for sales in dataset.as_numpy_iterator():
    print(sales)

8
3
-1
0
8
2
-32
1


In [5]:
tf_data = dataset.filter(lambda x: x > 0)
for sales in tf_data.as_numpy_iterator():
    print(sales)

8
3
8
2
1


In [6]:
tf_data = tf_data.map(lambda x: x * 74)
list(tf_data.as_numpy_iterator())

[592, 222, 592, 148, 74]

In [7]:
tf_data = tf_data.shuffle(2)
list(tf_data.as_numpy_iterator())

[222, 592, 148, 74, 592]

In [8]:
tf_data = tf_data.batch(2)
list(tf_data)

[<tf.Tensor: shape=(2,), dtype=int32, numpy=array([592, 592])>,
 <tf.Tensor: shape=(2,), dtype=int32, numpy=array([148,  74])>,
 <tf.Tensor: shape=(1,), dtype=int32, numpy=array([222])>]

In [9]:
images_ds = tf.data.Dataset.list_files('images/*/*', shuffle=False)

In [10]:
images_count = len(images_ds)
images_count

16

In [11]:
type(images_ds)

tensorflow.python.data.ops.dataset_ops.TensorSliceDataset

In [12]:
for img in images_ds.take(3):
    print(img.numpy())

b'images\\cats\\10 Fascinating Facts About Cats _ Purina.jpg'
b'images\\cats\\20 Cat Facts That_ll Blow Your Kitty....jpg'
b'images\\cats\\8 Signs of Separation Anxiety in Cats....jpg'


In [13]:
class_nums = ["cats", "dogs"]

In [20]:
images_ds = images_ds.shuffle(20)
train_size = int(images_count * 0.8)
train_ds = images_ds.take(train_size)
test_ds = images_ds.skip(train_size)

In [21]:
len(test_ds)

4

In [22]:
len(train_ds)

12

In [23]:
def get_label(file_path):
    import os
    parts = tf.strings.split(file_path, os.path.sep)
    return parts[-2]

In [24]:
get_label("images\\cats\\10 Fascinating Facts About Cats _ Purina.jpg")

<tf.Tensor: shape=(), dtype=string, numpy=b'cats'>

In [29]:
def process_images(file_path):
    label = get_label(file_path)
    img = tf.io.read_file(file_path)
    img = tf.image.decode_jpeg(img)
    img = tf.image.resize(img, [400, 400])
    return img, label

In [30]:
img, label = process_images("images\\cats\\10 Fascinating Facts About Cats _ Purina.jpg")

In [31]:
train_ds = train_ds.map(process_images)
test_ds = test_ds.map(process_images)

In [32]:
for image, label in train_ds.take(1):
    print("****",image)
    print("****",label)

**** tf.Tensor(
[[[179.       206.       223.      ]
  [179.       206.       223.      ]
  [179.       206.       223.      ]
  ...
  [162.47116  172.4721   171.72026 ]
  [178.27979  184.49565  184.43982 ]
  [180.00961  186.0471   184.06464 ]]

 [[179.0375   206.0375   223.0375  ]
  [179.0375   206.0375   223.0375  ]
  [179.0375   206.0375   223.0375  ]
  ...
  [162.2583   172.4027   173.44301 ]
  [178.3906   187.4748   186.63477 ]
  [182.14133  191.17883  188.28386 ]]

 [[180.       207.       224.      ]
  [180.       207.       224.      ]
  [180.       207.       224.      ]
  ...
  [163.6921   175.74677  175.94992 ]
  [180.18634  190.52074  191.41885 ]
  [186.04712  196.29712  195.38776 ]]

 ...

 [[119.985    101.93656   92.12406 ]
  [119.18     102.055     92.32687 ]
  [123.07968  106.10468   98.2375  ]
  ...
  [185.29999  190.29999  210.29999 ]
  [187.22528  192.22528  212.22528 ]
  [189.22092  194.22092  214.22092 ]]

 [[124.82634  108.73746   95.89329 ]
  [125.91746  108.917

In [33]:
def scale(image, label):
    return image/255, label

In [36]:
train_ds = train_ds.map(scale)

In [37]:
for image, label in train_ds.take(5):
    print("****Image: ",image.numpy()[0][0])
    print("****Label: ",label.numpy())

****Image:  [0.00118868 0.00108103 0.00098875]
****Label:  b'cats'
****Image:  [0.00077201 0.00066436 0.00057209]
****Label:  b'cats'
****Image:  [0.00066128 0.00044598 0.00024606]
****Label:  b'cats'
****Image:  [0.00390619 0.00390619 0.00390619]
****Label:  b'dogs'
****Image:  [0.00172241 0.00138408 0.00120723]
****Label:  b'cats'


In [41]:
txt_ds = tf.data.Dataset.list_files("Reviews/*/*", shuffle=False)

In [44]:
rev_len = len(txt_ds)
rev_len

6

In [43]:
type(txt_ds)

tensorflow.python.data.ops.dataset_ops.TensorSliceDataset

In [45]:
list(txt_ds.as_numpy_iterator())

[b'Reviews\\negative\\neg_1.txt',
 b'Reviews\\negative\\neg_2.txt',
 b'Reviews\\negative\\neg_3.txt',
 b'Reviews\\positive\\pos_1.txt',
 b'Reviews\\positive\\pos_2.txt',
 b'Reviews\\positive\\pos_3.txt']

In [46]:
def get_review_type(file_path):
    import os
    p = tf.strings.split(file_path, os.path.sep)
    return p[1]

In [47]:
get_review_type('Reviews\\negative\\neg_1.txt')

<tf.Tensor: shape=(), dtype=string, numpy=b'negative'>

In [89]:
def read_file(file_path):
    txt = tf.io.read_file(file_path)
    label = get_review_type(file_path)
    return txt, label

In [90]:
read_file('Reviews\\negative\\neg_1.txt')

(<tf.Tensor: shape=(), dtype=string, numpy=b'Bad'>,
 <tf.Tensor: shape=(), dtype=string, numpy=b'negative'>)

In [108]:
def filter_func(file_path):
    txt, label = read_file(file_path)
    if len(txt) == 0:
        return False
    return True

In [109]:
final_txt_ds = txt_ds.map(lambda x: read_file(x))
#for t, l in final_txt_ds:
#    print(t.numpy())
#    print(l)

In [None]:
final_txt_ds2 = final_txt_ds.filter(lambda r, l: filter_func(r))