# 1) Importations

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 2) Preprocessing input et output embedding

In [2]:
input_embedding = [["Salut", "comment", "ca", "va", "?"]] #1 batch de 1 sequence
output_embedding = [["<START>", "Hi", "how", "are", "you", "?"]]

In [3]:
def get_vocabulary(sequences):
    token_to_info = {}
    for sequence in sequences:
        for word in sequence:
            if word not in token_to_info:                #Pas de doublons dans les tokens
                token_to_info[word] = len(token_to_info) #On donne un ID au token, qui sera la longueur de la liste de token
    return token_to_info

In [4]:
input_voc = get_vocabulary(input_embedding)
output_voc = get_vocabulary(output_embedding)
print(input_voc)
print(output_voc)

{'Salut': 0, 'comment': 1, 'ca': 2, 'va': 3, '?': 4}
{'<START>': 0, 'Hi': 1, 'how': 2, 'are': 3, 'you': 4, '?': 5}


### --> On ajoute les tokens spécifiques

In [5]:
input_voc["<START>"] = len(input_voc)
input_voc["<END>"] = len(input_voc)
input_voc["<PAD >"] = len(input_voc) #Le padding est utile pr remplir les sequences n'étant pas de même taille que d'autres

output_voc["<END>"] = len(input_voc) #Attention à ne pas ajouter <START> ici si déjà fait dans le output_embedding !!!
output_voc["<PAD >"] = len(input_voc)

### --> Transformation des mots en int pour notre modèle

In [6]:
def sequences_to_int(sequences, voc):
    for sequence in sequences:
        for index, word in enumerate(sequence):
            sequence[index] = voc[word]
    return np.array(sequences)

In [7]:
input_seq = sequences_to_int(input_embedding, input_voc)
output_seq = sequences_to_int(output_embedding, output_voc)
print(input_seq)
print(output_seq)

[[0 1 2 3 4]]
[[0 1 2 3 4 5]]


# 3) Layers

### --> Input embedding layer

In [8]:
class InputEmbedding(tf.keras.layers.Layer):
    def __init__(self, nb_token, **kwargs):
        self.nb_token = nb_token
        super(**kwargs).__init__()
        
    def build(self, input_shape):
        self.word_embedding = tf.keras.layers.Embedding(self.nb_token, 256)
        super().build(input_shape)
        
    def call(self, x):
        embed = self.word_embedding(x)
        return embed

### --> Scaled Dot-Product Attention

In [9]:
class ScaledDotProductAttention(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(**kwargs).__init__()
        
    def build(self, input_shape):
        self.query_layer = tf.keras.layers.Dense(256)
        self.key_layer   = tf.keras.layers.Dense(256)
        self.value_layer = tf.keras.layers.Dense(256)
        super().build(input_shape)
        
    def call(self, x):
        Q = self.query_layer(x)
        K = self.key_layer(x)
        V = self.value_layer(x)
        QK = tf.matmul(Q, K, transpose_b=True)
        QK = QK / tf.math.sqrt(256.0)           #Normalise  les valeurs
        softmax_QK = tf.nn.softmax(QK, axis=-1) #Attention pour chaque mots de la sequence
        attention = tf.matmul(softmax_QK, V)    #Applique notre attention aux V
        return attention

In [11]:
def test_ScaledDotProductAttention():
    layer_input = tf.keras.Input(shape=(5)) #Taille sequence : 5 (On peut gerer les autres tailles avec de PAD)
    input_embedding = InputEmbedding(nb_token=5)(layer_input)
    attention = ScaledDotProductAttention()(input_embedding)
    model = tf.keras.Model(layer_input, attention)
    model.summary()
    return model
    
model_test = test_ScaledDotProductAttention()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 5)]               0         
_________________________________________________________________
input_embedding_1 (InputEmbe (None, 5, 256)            1280      
_________________________________________________________________
scaled_dot_product_attention (None, 5, 256)            197376    
Total params: 198,656
Trainable params: 198,656
Non-trainable params: 0
_________________________________________________________________


### --> Encoder layer

In [30]:
class EncoderLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(**kwargs).__init__()
        
    def build(self, input_shape):
        self.scaled_dot_product_attention = ScaledDotProductAttention()
        self.norm = tf.keras.layers.LayerNormalization()
        self.feed_forward = tf.keras.layers.Dense(256)
        super().build(input_shape)
        
    def call(self, x):
        attention = self.scaled_dot_product_attention(x) #Multi-Head Attention
        post_attention = self.norm(attention + x)        #Add & Norm
        feed_forward = self.feed_forward(post_attention) #Feed Forward
        enc_output = self.norm(x + post_attention)       #2nd Add & Norm
        print(post_attention.shape)
        return enc_output

In [32]:
def test_EncoderLayer():
    layer_input = tf.keras.Input(shape=(5)) #Taille sequence : 5 (On peut gerer les autres tailles avec de PAD)
    input_embedding = InputEmbedding(nb_token=5)(layer_input)
    encoder_output = EncoderLayer()(input_embedding)
    model = tf.keras.Model(layer_input, encoder_output)
    model.summary()
    return model
    
model_test = test_EncoderLayer()

(None, 5, 256)
Model: "model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_15 (InputLayer)        [(None, 5)]               0         
_________________________________________________________________
input_embedding_11 (InputEmb (None, 5, 256)            1280      
_________________________________________________________________
encoder_layer_8 (EncoderLaye (None, 5, 256)            263680    
Total params: 264,960
Trainable params: 264,960
Non-trainable params: 0
_________________________________________________________________
