In [1]:
import tensorflow as tf
import numpy as np

### Using TensorFlow

In [2]:
dataset = tf.data.Dataset.range(10)
print(dataset)
print([val.numpy() for val in dataset])

<RangeDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)>
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


2022-05-08 16:34:02.087651: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_UNKNOWN: unknown error
2022-05-08 16:34:02.087676: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: acer-Predator-PT516-51s
2022-05-08 16:34:02.087679: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: acer-Predator-PT516-51s
2022-05-08 16:34:02.087719: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 470.103.1
2022-05-08 16:34:02.087730: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 470.103.1
2022-05-08 16:34:02.087734: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:310] kernel version seems to match DSO: 470.103.1
2022-05-08 16:34:02.088214: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instru

### Using Numpy

In [3]:
dataset_np = np.arange(10)
print(dataset_np)

[0 1 2 3 4 5 6 7 8 9]


### Using Python List

In [4]:
dataset_py_list = list(range(10))
print(dataset_py_list)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


### Understanding sliding window

In [5]:
window_size = 5
stride = 1

In [6]:
"""
This output is not exactly what we want when we scan the dataset using a window.
Instead this gives a start_idx from where where we want to scan.
The end should have been upto the window_size but we go upto the end of the dataset itself
"""
start = 0
stop = len(dataset_py_list)-window_size
step = stride

for start_idx in range(start, stop, step):
    print(dataset_py_list[start_idx:])  

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[1, 2, 3, 4, 5, 6, 7, 8, 9]
[2, 3, 4, 5, 6, 7, 8, 9]
[3, 4, 5, 6, 7, 8, 9]
[4, 5, 6, 7, 8, 9]


**Expected Output:**

```python

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] ------> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
|-->

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] ------> [1, 2, 3, 4, 5, 6, 7, 8, 9]
    |-->

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] ------> [2, 3, 4, 5, 6, 7, 8, 9]
       |-->

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] ------> [3, 4, 5, 6, 7, 8, 9]
          |-->

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] ------> [4, 5, 6, 7, 8, 9]
             |-->

```

In [7]:
start = 0
stop = len(dataset_py_list)
step = stride

for start_idx in range(start, stop, step):
    end_idx = window_size+start_idx
    print(dataset_py_list[start_idx:end_idx])

[0, 1, 2, 3, 4]
[1, 2, 3, 4, 5]
[2, 3, 4, 5, 6]
[3, 4, 5, 6, 7]
[4, 5, 6, 7, 8]
[5, 6, 7, 8, 9]
[6, 7, 8, 9]
[7, 8, 9]
[8, 9]
[9]


**Expected Output:**

```python

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] ------> [0, 1, 2, 3, 4]
 |___________|

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] ------> [1, 2, 3, 4, 5]
    |___________|

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] ------> [2, 3, 4, 5, 6]
       |___________|

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] ------> [3, 4, 5, 6, 7]
          |___________|

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] ------> [4, 5, 6, 7, 8]
             |___________|

    
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] ------> [5, 6, 7, 8, 9]
                |___________|
    

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] ------> [6, 7, 8, 9]
                   |________|-->
    
    
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] ------> [7, 8, 9]
                      |______|-->

    
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] ------> [8, 9]
                         |__|-->
    
    
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] ------> [9]
                            ||-->
    
```

In [8]:
start = 0
stop = len(dataset_py_list)-window_size+1
step = stride

for start_idx in range(start, stop, step):
    end_idx = window_size+start_idx
    print(dataset_py_list[start_idx:end_idx])    

[0, 1, 2, 3, 4]
[1, 2, 3, 4, 5]
[2, 3, 4, 5, 6]
[3, 4, 5, 6, 7]
[4, 5, 6, 7, 8]
[5, 6, 7, 8, 9]


**Expected Output:**

```python

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] ------> [0, 1, 2, 3, 4]
 |___________|

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] ------> [1, 2, 3, 4, 5]
    |___________|

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] ------> [2, 3, 4, 5, 6]
       |___________|

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] ------> [3, 4, 5, 6, 7]
          |___________|

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] ------> [4, 5, 6, 7, 8]
             |___________|
    
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] ------> [5, 6, 7, 8, 9]
                |___________|
    
```

### Creating same output using Tensorflow

In [9]:
dataset = tf.data.Dataset.range(10)

dataset = dataset.window(size=window_size, shift=stride)

for window_ds in dataset:
    print([item.numpy() for item in window_ds])

[0, 1, 2, 3, 4]
[1, 2, 3, 4, 5]
[2, 3, 4, 5, 6]
[3, 4, 5, 6, 7]
[4, 5, 6, 7, 8]
[5, 6, 7, 8, 9]
[6, 7, 8, 9]
[7, 8, 9]
[8, 9]
[9]


2022-05-08 16:34:02.189018: W tensorflow/core/framework/dataset.cc:768] Input of Window will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.


In [10]:
dataset = tf.data.Dataset.range(10)

dataset = dataset.window(size=window_size, shift=stride, drop_remainder=True)

for window_ds in dataset:
    print([item.numpy() for item in window_ds])

[0, 1, 2, 3, 4]
[1, 2, 3, 4, 5]
[2, 3, 4, 5, 6]
[3, 4, 5, 6, 7]
[4, 5, 6, 7, 8]
[5, 6, 7, 8, 9]


In [11]:
dataset = tf.data.Dataset.range(10)

dataset = dataset.window(size=window_size, shift=stride, drop_remainder=True)

dataset = dataset.flat_map(lambda window: window.batch(5))

for window_ds in dataset:
    print(window_ds.numpy())

[0 1 2 3 4]
[1 2 3 4 5]
[2 3 4 5 6]
[3 4 5 6 7]
[4 5 6 7 8]
[5 6 7 8 9]


In [12]:
dataset = tf.data.Dataset.range(10)

dataset = dataset.window(size=window_size, shift=stride, drop_remainder=True)

dataset = dataset.flat_map(lambda window: window.batch(5))

dataset = dataset.map(lambda window:(window[:-1], window[-1]))

for x, y in dataset:
    print(x.numpy(), y.numpy())

[0 1 2 3] 4
[1 2 3 4] 5
[2 3 4 5] 6
[3 4 5 6] 7
[4 5 6 7] 8
[5 6 7 8] 9


In [13]:
dataset = tf.data.Dataset.range(10)

dataset = dataset.window(size=window_size, shift=stride, drop_remainder=True)

dataset = dataset.flat_map(lambda window: window.batch(5))

dataset = dataset.map(lambda window:(window[:-1], window[-1]))

dataset = dataset.shuffle(buffer_size=10)

dataset = dataset.batch(2).prefetch(1)

# Print the results
for x,y in dataset:
  print("x = ", x.numpy())
  print("y = ", y.numpy())
  print()


x =  [[3 4 5 6]
 [1 2 3 4]]
y =  [7 5]

x =  [[0 1 2 3]
 [5 6 7 8]]
y =  [4 9]

x =  [[2 3 4 5]
 [4 5 6 7]]
y =  [6 8]

