In [None]:
Implement Self-Attention Mechanism  

In [1]:
import numpy as np   

In [2]:
def compute_qkv(X, W_q, W_k, W_v):
    Q=np.dot(X,W_q)  
    K=np.dot(X,W_k) 
    V=np.dot(X,W_v)    
    return Q,K,V 

In [3]:
def self_attention(Q,K,V): 
  dim=Q.shape[-1]   
  sim= np.dot(Q,K.T) /np.sqrt(dim)  # this measure similarity between words.
  normalised_sim= np.exp(sim)/np.sum(np.exp(sim),axis=1,keepdims=True)  # The similarity scores are exponentiated and normalized using the softmax function.
  scores= np.dot(normalised_sim,V) 
  return scores  

In [6]:
def take_matrix_input(name, n):
    print(f"Please start entering values for {name}:") 
    matrix= np.zeros((n, n))
    for i in range(n):
      for j in range(n):
         x = float(input(f"{name}[{i}][{j}] - number: "))  
         matrix[i][j] = x   
    return matrix

In [None]:

n = int(input("Please enter number of words in a sentence: "))    


X = take_matrix_input("x" , n)  
W_q = take_matrix_input("W_q", n)  
W_k = take_matrix_input("W_k", n) 
W_v =take_matrix_input("W_v", n)  

print(f"X:\n{X}\nW_q:\n{W_q}\nW_k:\n{W_k}\nW_v:\n{W_v}") 

Please enter number of words in a sentence:  2


Please start entering values for x:


x[0][0] - number:  1
x[0][1] - number:  0
x[1][0] - number:  0
x[1][1] - number:  1


Please start entering values for W_q:


In [35]:
# Compute Q, K, V
Q, K, V = compute_qkv(X, W_q, W_k, W_v)

# Compute Self-Attention  
output = self_attention(Q, K, V)  
print(output)  

[[1.6604769 2.6604769]
 [2.3395231 3.3395231]]


In [37]:
X = np.array([[1, 0], [0, 1]])  # Input matrix
W_q = np.array([[1, 0], [0, 1]])  # Query weights
W_k = np.array([[1, 0], [0, 1]])  # Key weights
W_v = np.array([[1, 2], [3, 4]])  # Value weights 

# Compute Q, K, V
Q, K, V = compute_qkv(X, W_q, W_k, W_v) 

# Compute Self-Attention   
output = self_attention(Q, K, V)   
print(output) 

[[1.6604769 2.6604769]
 [2.3395231 3.3395231]]


In [None]:
Implement Multi-Head Attention 

In [38]:
def self_attention(Q,K,V):
  dim=Q.shape[-1] 
  sim= np.dot(Q,K.T) /np.sqrt(dim) 
  normalised_sim= np.exp(sim)/np.sum(np.exp(sim),axis=1,keepdims=True) 
  scores= np.dot(normalised_sim,V) 
  return scores   


def multi_head_attention(Q,K,V,n_heads): 
    d=Q.shape[-1] 
    assert d % n_heads==0   
    Q_= np.array(np.split(Q,n_heads,axis=-1))  
    K_= np.array(np.split(K,n_heads,axis=-1)) 
    V_= np.array(np.split(V,n_heads,axis=-1))  
    attention_scores= [self_attention(Q_[i],K_[i],V_[i]) for i in range(n_heads)] 
    multi_head_output=np.concatenate(attention_scores,axis=-1)   
    return multi_head_output 

In [44]:
def take_matrix_input(name, n,dim):
    print(f"Please start entering values for {name}:") 
    matrix= np.zeros((n, dim))
    for i in range(n): 
      for j in range(dim): 
         x = float(input(f"{name}[{i}][{j}] - number: "))  
         matrix[i][j] = x   
    return matrix

In [47]:
n=int(input("Enter no.of words: ")) 
dim=int(input("Enter dimension of word embeddings of a words of input sentence: "))  
Q = take_matrix_input("Q",n,dim)
K = take_matrix_input("K",n,dim)
V = take_matrix_input("V",n,dim)
  
n_heads = int(input("Enter number of heads: "))   
output = multi_head_attention(Q, K, V, n_heads) 
print("multi_head_attention",output)
print("\nBinary_output of multi_head_attention")
output= (output > 0.5).astype(int)  
print(output) 

Enter no.of words:  2
Enter dimension of word embeddings of a word:  2


Please start entering values for Q:


Q[0][0] - number:  1
Q[0][1] - number:  0
Q[1][0] - number:  0
Q[1][1] - number:  1


Please start entering values for K:


K[0][0] - number:  1
K[0][1] - number:  0
K[1][0] - number:  0
K[1][1] - number:  1


Please start entering values for V:


V[0][0] - number:  1
V[0][1] - number:  0
V[1][0] - number:  0
V[1][1] - number:  1
Enter number of heads:  2


[[0.73105858 0.5       ]
 [0.5        0.73105858]]

Binary_output
[[1 0]
 [0 1]]


In [24]:
Q = np.array([[1, 0], [0, 1]])  # (2x2)
K = np.array([[1, 0], [0, 1]])  # (2x2)
V = np.array([[1, 0], [0, 1]])  # (2x2) 
n_heads = 2

output = multi_head_attention(Q, K, V, n_heads)
output= (output > 0.5).astype(int) 
print(output)  

[[1 0]
 [0 1]]


In [None]:
Done!! 

In [None]:
Implementation of self attention without Q,K,V vectors 

In [7]:
import spacy
nlp = spacy.load("en_core_web_sm") 
import gensim.downloader as api 

# Load Word2Vec embeddings (pretrained on Google News)
word2vec = api.load("word2vec-google-news-300") 
import numpy as np 

In [38]:
import spacy
import numpy as np

# Load Spacy Model
nlp = spacy.load("en_core_web_sm")

# Example sentence
sentence = "Money bank grows"
doc = nlp(sentence)

# Extract tokens
tokens = [token.text for token in doc]

# Placeholder Word2Vec (Use pre-trained word embeddings instead)
word2vec = {word: np.random.rand(300) for word in tokens}  # Simulating word embeddings

# Initialize matrices
similarity_matrix = []
word_embeddings = []

# Compute word embeddings
for word in tokens: 
    i = word2vec[word]  # Get embedding
    word_embeddings.append(i) 

# Compute similarity matrix using dot product
for i in word_embeddings:
    sim = [np.dot(i, j) for j in word_embeddings] 
    sim = np.exp(sim) / np.sum(np.exp(sim))  # Softmax normalization
    similarity_matrix.append(sim) 

# Compute new word embeddings (weighted sum)
new_word_embeddings = []
for i in range(len(tokens)):
    new_embedding = np.dot(similarity_matrix[i], word_embeddings)  # Weighted sum
    new_word_embeddings.append(new_embedding)

# Print result
print( f"similarity_matrix,{ similarity_matrix}") 
new_word_embeddings=np.array(new_word_embeddings)
print(f"new_word_embeddings{new_word_embeddings}") 
word_embeddings=np.array(word_embeddings) 
print(new_word_embeddings-word_embeddings) 


similarity_matrix,[array([1.00000000e+00, 2.60429739e-13, 3.81359627e-13]), array([1.14932845e-10, 1.00000000e+00, 1.37662739e-11]), array([3.38483326e-11, 2.76863300e-12, 1.00000000e+00])]
new_word_embeddings[[3.76803737e-01 4.38170781e-01 2.97348834e-01 5.70211461e-01
  9.38437560e-01 5.49421216e-01 6.86058582e-02 4.90253162e-01
  4.11021457e-01 2.42561796e-01 9.37988239e-01 4.01833660e-01
  4.91251842e-01 1.89366354e-01 8.28348094e-01 3.69757869e-01
  6.98111294e-01 1.52308571e-01 1.92980183e-01 1.26226955e-01
  9.91246296e-02 9.34043416e-01 5.44222501e-01 7.87633672e-01
  6.46101692e-01 7.21890764e-01 4.52739681e-01 1.43640304e-01
  6.36249107e-01 3.34877450e-01 2.61920835e-01 3.33372232e-01
  1.15213817e-02 8.23909637e-01 6.95558886e-01 1.12503678e-01
  8.85672300e-01 2.62598850e-01 9.32897168e-01 8.78921716e-01
  3.22122371e-01 4.96436857e-01 3.88477962e-01 7.46420709e-01
  6.54576164e-01 9.08837706e-01 6.36778265e-01 8.22957623e-01
  5.25680381e-01 3.67044874e-01 4.30979789e-01 