# Hello Tensorflow Dataset Object

In [2]:
# Prerequisites
import sys
import numpy as np
import tensorflow as tf

print("Python Version: ", sys.version)
print("Numpy Version: ", np.__version__)
print("Tensorflow Version: ", tf.__version__)

Python Version:  3.12.7 (tags/v3.12.7:0b05ead, Oct  1 2024, 03:06:41) [MSC v.1941 64 bit (AMD64)]
Numpy Version:  2.0.2
Tensorflow Version:  2.18.0


### Create a dummy dataset

In [4]:
random_nums = np.random.normal(size=(1000, 16))
dataset = tf.data.Dataset.from_tensor_slices(random_nums)

- A Dataset object is an iterator: it can be used in a for loop.
- It will typically return batches of input data and labels.
- Dataset object can be passed directly to the fit() method of a Keras model.

### Get individual elements from the dataset

In [7]:
for i, element in enumerate(dataset):
    print(f"element {i},  shape {element.shape}")
    print(element)
    if i > 4:
        break

element 0,  shape (16,)
tf.Tensor(
[ 2.32482854 -0.97628267 -2.1444387  -0.96721843 -0.82478339  0.78164702
  0.46795164  0.02952116  0.53249777  0.372321   -0.41217289  0.89604719
 -1.15221669 -1.5592622  -0.76829803 -2.32069679], shape=(16,), dtype=float64)
element 1,  shape (16,)
tf.Tensor(
[ 0.01179703  1.81788228  0.30046066 -1.08725603  1.32618044 -0.23054567
  1.48306061 -0.08806151 -1.08739926  0.33466622 -0.09316201  0.83398946
 -2.31712005  0.43850538  0.90371852  0.57565794], shape=(16,), dtype=float64)
element 2,  shape (16,)
tf.Tensor(
[ 0.44171207 -0.47125307 -0.45895097  0.32425092  0.46424824  0.50946807
 -1.01871641 -0.52369184  0.28022186  1.55879441  1.29484276  0.18634811
  0.14721995 -0.84382738  2.27866848  0.58625029], shape=(16,), dtype=float64)
element 3,  shape (16,)
tf.Tensor(
[-0.46456532  0.22453846  1.0026874   0.87769787  0.21589293  1.30149012
 -0.07568179  0.53798744 -0.09044611 -0.39137423 -0.03003235  0.90897996
 -1.02395886  0.03057993 -0.30969018 -1

### Reshape with map

In [11]:
ds_reshaped = dataset.map(lambda x: tf.reshape(x, (4, 4)))
for i, element in enumerate(ds_reshaped):
    print(f"element {i},  shape {element.shape}")
    if i > 4:
        break

element 0,  shape (4, 4)
element 1,  shape (4, 4)
element 2,  shape (4, 4)
element 3,  shape (4, 4)
element 4,  shape (4, 4)
element 5,  shape (4, 4)


### Get batches of data

In [10]:
ds_batches = dataset.batch(32)
for i, element in enumerate(ds_batches):
    print(f"element {i},  shape {element.shape}")
    if i > 4:
        break

element 0,  shape (32, 16)
element 1,  shape (32, 16)
element 2,  shape (32, 16)
element 3,  shape (32, 16)
element 4,  shape (32, 16)
element 5,  shape (32, 16)


### Use dataset for time series

In [4]:
my_series = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
ds_series = tf.keras.utils.timeseries_dataset_from_array(
    my_series, 
    targets=my_series[3:],  # 3 steps into the future
    sequence_length = 3,
    batch_size = 2)

list(ds_series)

[(<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
  array([[0, 1, 2],
         [1, 2, 3]], dtype=int32)>,
  <tf.Tensor: shape=(2,), dtype=int32, numpy=array([3, 4], dtype=int32)>),
 (<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
  array([[2, 3, 4],
         [3, 4, 5]], dtype=int32)>,
  <tf.Tensor: shape=(2,), dtype=int32, numpy=array([5, 6], dtype=int32)>),
 (<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
  array([[4, 5, 6],
         [5, 6, 7]], dtype=int32)>,
  <tf.Tensor: shape=(2,), dtype=int32, numpy=array([7, 8], dtype=int32)>),
 (<tf.Tensor: shape=(1, 3), dtype=int32, numpy=array([[6, 7, 8]], dtype=int32)>,
  <tf.Tensor: shape=(1,), dtype=int32, numpy=array([9], dtype=int32)>)]

Do the same with the window() method

In [5]:
for ds_window in tf.data.Dataset.range(9).window(4,shift=1):
    for elem in ds_window:
        print(f"{elem}", end=" ")
    print()

0 1 2 3 
1 2 3 4 
2 3 4 5 
3 4 5 6 
4 5 6 7 
5 6 7 8 
6 7 8 
7 8 
8 


Returns nested dataset, Flatten it with flat_map() 

In [7]:
ds = tf.data.Dataset.range(9).window(4,shift=1, drop_remainder=True)
ds = ds.flat_map(lambda ds_window: ds_window.batch(4))
for window_tensor in ds:
    print(window_tensor)

tf.Tensor([0 1 2 3], shape=(4,), dtype=int64)
tf.Tensor([1 2 3 4], shape=(4,), dtype=int64)
tf.Tensor([2 3 4 5], shape=(4,), dtype=int64)
tf.Tensor([3 4 5 6], shape=(4,), dtype=int64)
tf.Tensor([4 5 6 7], shape=(4,), dtype=int64)
tf.Tensor([5 6 7 8], shape=(4,), dtype=int64)
