In [2]:
import tensorflow as tf
import numpy as np

In [3]:
# 4 dimensions for each word
import math
def positionalEncodings(sequence):
    
    PE=[]
    d_model=4
    for pos in range(sequence):
        EncodingsOfEach=[]
        for i in range(d_model):
            if i%2==0:
                temp=pos/(math.pow(10000,(2*i)/d_model))
                EncodingsOfEach.append(math.sin(temp))
            else:
                temp=pos/(math.pow(10000,(2*(i-1))/d_model))
                EncodingsOfEach.append(math.cos(temp))
        PE.append(EncodingsOfEach)
    return PE

In [4]:
def selfAttention(embeddings, W_q, W_k, W_v):
    numberOfWords = tf.shape(embeddings)[0]

    Q, K, V = [], [], []
    for i in range(numberOfWords):
        x_i = tf.expand_dims(embeddings[i], 0)
        Q.append(tf.matmul(x_i, W_q))
        K.append(tf.matmul(x_i, W_k))
        V.append(tf.matmul(x_i, W_v))

    d_k = tf.math.sqrt(tf.cast(tf.shape(K[0])[1], tf.float32))

    attention_outputs = []

    for i in range(numberOfWords):
        score_list = []
        for j in range(numberOfWords):
            score = tf.matmul(Q[i], K[j], transpose_b=True)[0, 0] / d_k
            score_list.append(score)

        scores = tf.stack(score_list)
        weights = tf.nn.softmax(scores)

        output = tf.zeros_like(V[0])
        for j in range(numberOfWords):
            output += weights[j] * V[j]

        attention_outputs.append(output[0])

    return tf.stack(attention_outputs)


In [5]:
def Add_and_normalize(output_of_previous,input_of_previous):
    layer_norm = tf.keras.layers.LayerNormalization(epsilon=1e-6)
    return layer_norm(output_of_previous + input_of_previous)

In [6]:
def FeedForwardNN(x, W1, b1, W2, b2):
    hidden = tf.nn.relu(tf.matmul(x, W1) + b1)
    out = tf.matmul(hidden, W2) + b2
    return out


In [7]:
def Encoder(positional_encodings_plus_embeddings,W_q,W_k,W_v,W1,b1,W2,b2):
    self_attn_output=selfAttention(positional_encodings_plus_embeddings,W_q,W_k,W_v)
    add_and_normalize_1_output=Add_and_normalize(self_attn_output,positional_encodings_plus_embeddings)
    ffn_output=FeedForwardNN(add_and_normalize_1_output,W1,b1,W2,b2)
    encoder_output=Add_and_normalize(ffn_output,add_and_normalize_1_output)
    return encoder_output

In [8]:
embeddings=tf.constant([[1,0,1,0],[0,1,0,1],[1,1,1,1]],dtype=tf.float32)
positional_encodings=positionalEncodings(len(embeddings))
embeddings_plus_PE=embeddings+positional_encodings

In [11]:
d_model=embeddings_plus_PE[0].shape[0]
W_q=tf.constant([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]],dtype=tf.float32)
W_k=tf.constant([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]],dtype=tf.float32)
W_v=tf.constant([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]],dtype=tf.float32)
W1 = tf.random.normal(shape=(d_model,256))
b1 = tf.random.normal(shape=(256,))
W2 = tf.random.normal(shape=(256,d_model))
b2 = tf.random.normal(shape=(d_model,))

In [12]:
print(Encoder(embeddings_plus_PE,W_q,W_k,W_v,W1,b1,W2,b2))

tf.Tensor(
[[-0.7105796   1.3845652  -1.1588589   0.48487327]
 [-0.83903754  1.3066386  -1.0938545   0.6262537 ]
 [-0.6768905   1.4612029  -1.1334834   0.34917092]], shape=(3, 4), dtype=float32)


In [13]:
d_model=embeddings_plus_PE[0].shape[0]
encoder_params = []
for _ in range(6):
    params = {
        "W_q": tf.random.normal(shape=(d_model,d_model)),
        "W_k": tf.random.normal(shape=(d_model,d_model)),
        "W_v": tf.random.normal(shape=(d_model,d_model)),
        "W1": tf.random.normal(shape=(d_model,256)),
        "b1": tf.random.normal(shape=(256,)),
        "W2": tf.random.normal(shape=(256,d_model)),
        "b2": tf.random.normal(shape=(d_model,))
    }
    encoder_params.append(params)


In [14]:
def encoderStack(x, encoder_params):
    for layer in encoder_params:
        x = Encoder(
            x,
            layer["W_q"],
            layer["W_k"],
            layer["W_v"],
            layer["W1"],
            layer["b1"],
            layer["W2"],
            layer["b2"]
        )
    return x


In [15]:
print(encoderStack(embeddings_plus_PE,encoder_params))

tf.Tensor(
[[ 0.42652076 -0.81188786 -1.0492232   1.4345901 ]
 [ 0.38801178 -0.8178501  -1.0276964   1.4575348 ]
 [ 0.44736645 -0.8075728  -1.0614312   1.4216375 ]], shape=(3, 4), dtype=float32)
