In [1]:
%matplotlib inline

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from IPython.display import display

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, LSTM, GRU, Bidirectional

# Bidirectional Layer

**SHAPE OF THE INPUT LAYER**

When defining the shape of the Input layer, the batch size is not included in the shape because it can vary depending on the batch of data being processed. Instead, only the shape of a single data instance is specified.

For example, consider an input data consisting of 32 images, each of size 28x28 and with 3 color channels. The shape of the Input layer would be defined as follows:

```python
from tensorflow.keras.layers import Input

input_shape = (28, 28, 3)
inputs = Input(shape=input_shape)
```

In this case, the shape of the Input layer is (28, 28, 3), which corresponds to the shape of a single image. The batch size of 32 is not included in the shape definition, and will be automatically determined based on the batch of data being fed into the network during training or inference.

By omitting the batch size from the shape definition, the same model architecture can be used to process data with varying batch sizes, making the model more flexible and easier to reuse.

**SHAPE OF TEXT**

```python
BATCH_SIZE = 32              # ["My cat is black.", "I wear glasses.", ...]
MAX_SEQUENCE_LENGTH = 10     # "My cat is black. NULL NULL ..."
EMBEDDING_LENGTH = 5         # "my -> [254, 3, 5, 2, 87], cat -> [...], ..."
```

### Using input layer

In [17]:
input_layer = Input(shape=(10, 5))
lstm_layer = LSTM(20)
bidirectional_layer = Bidirectional(lstm_layer)
output = bidirectional_layer(input_layer)

# The output is keras tensor (no values, yet).
output

<KerasTensor: shape=(None, 40) dtype=float32 (created by layer 'bidirectional_10')>

### Using input values

In [19]:
input_values = np.random.random([32, 10, 5]).astype(np.float32)

lstm_layer = LSTM(20)
bidirectional_layer = Bidirectional(lstm_layer)
output = bidirectional_layer(input_values)

# The output is values. Default `merge_mode` is `concat`.
output

<tf.Tensor: shape=(32, 40), dtype=float32, numpy=
array([[-0.18838741,  0.04260767, -0.07224035, ...,  0.04364341,
        -0.00166474, -0.19681224],
       [-0.15869983,  0.00556247, -0.06004506, ...,  0.0692396 ,
        -0.03293317, -0.25512323],
       [-0.15612747,  0.02972779, -0.07140367, ...,  0.05190247,
        -0.03081059, -0.24716586],
       ...,
       [-0.1240718 , -0.01141947, -0.05324303, ...,  0.06523241,
         0.04116708, -0.23248076],
       [-0.15993057, -0.09914766, -0.05532391, ...,  0.05644035,
        -0.05249197, -0.23391026],
       [-0.1850729 ,  0.02952537, -0.02644309, ...,  0.08815773,
        -0.04372393, -0.2554341 ]], dtype=float32)>

### Sum the outputs

In [20]:
input_values = np.random.random([32, 10, 5]).astype(np.float32)

lstm_layer = LSTM(20)
bidirectional_layer = Bidirectional(lstm_layer, merge_mode='sum')
output = bidirectional_layer(input_values)

# The output is values.
output.shape

TensorShape([32, 20])

## Merge mode 'concat'

Първият `Bidirectional` връща вектори с дължина 40 защото залепя резултатите (2х20) от подслоевете LSTM-и. 20 за forward, 20 за backward.

Създаваме 2 слоя които работят едновременно: единия чете думите стандартно (ляво->дясно), а другият наобратно (отзад->напред). Всеки слой дава n активации, колкото неврона има (в примера долу: 20). Тъй като са 2 слоя, резултатите може да обединим като ги "залепим" (конкатенираме) или вземем средното им. 

In [18]:
model = Sequential([
    Input(shape=(50,5)),
    
    Bidirectional(LSTM(20, return_sequences = True)),
    Bidirectional(LSTM(20)),
])

In [19]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_4 (Bidirectio  (None, 50, 40)           4160      
 nal)                                                            
                                                                 
 bidirectional_5 (Bidirectio  (None, 40)               9760      
 nal)                                                            
                                                                 
Total params: 13,920
Trainable params: 13,920
Non-trainable params: 0
_________________________________________________________________


## Merge mode 'sum'

In [20]:
model = Sequential([
    Input(shape=(50,5)),
    
    Bidirectional(LSTM(20, return_sequences = True), merge_mode='sum'),
    Bidirectional(LSTM(20)),
])

In [21]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_6 (Bidirectio  (None, 50, 20)           4160      
 nal)                                                            
                                                                 
 bidirectional_7 (Bidirectio  (None, 40)               6560      
 nal)                                                            
                                                                 
Total params: 10,720
Trainable params: 10,720
Non-trainable params: 0
_________________________________________________________________


## Backward layer

In [24]:
model = Sequential([
    Input(shape=(50,5)),
    
    Bidirectional(LSTM(20, return_sequences=True), merge_mode='sum'),
    Bidirectional(LSTM(20), backward_layer=GRU(15, go_backwards=True)),
])

In [25]:
model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_9 (Bidirectio  (None, 50, 20)           4160      
 nal)                                                            
                                                                 
 bidirectional_10 (Bidirecti  (None, 35)               4945      
 onal)                                                           
                                                                 
Total params: 9,105
Trainable params: 9,105
Non-trainable params: 0
_________________________________________________________________
