In [None]:
"""
На основе курса "Нейронные сети и обработка текста" со Stepic
Basic functions for trasformers realization
"""

import sys
import ast
import numpy as np


def parse_array(s):
    return np.array(ast.literal_eval(s))

def read_array():
    return parse_array(sys.stdin.readline())

def write_array(arr):
    print(repr(arr.tolist()))

def softmax(x):
    """
    returns vector of n elements - softmax output    
   
    x - vector of n elements - input
    """
    numerator = np.array(list(map(np.exp, x)))
    softmax = numerator / np.sum(numerator)
    return softmax    

def attention(features, query):
    """
    returns vector of size EmbSize - features, aggregated according to the query    
    
    features - InLen x EmbSize - features of elements of input sequence
    query - EmbSize - features of query object
    """
    UnnormScores = features@query
    AttScores = softmax(UnnormScores).reshape(1, -1)
    Attention = AttScores@features
    return Attention.squeeze()

def self_attention(features, proj_k, bias_k, proj_q, bias_q, proj_v, bias_v):
    """
    returns array with shape InLen x EmbSize    
    
    features - InLen x EmbSize - features of elements of input sequence
    proj_k - EmbSize x EmbSize - projection matrix to make keys from features
    bias_k - EmbSize - bias vector to make keys from features
    proj_q - EmbSize x EmbSize - projection matrix to make queries from features
    bias_q - EmbSize - bias vector to make queries from features
    proj_v - EmbSize x EmbSize - projection matrix to make values from features
    bias_v - EmbSize - bias vector to make values from features
    """
    Keys = features@proj_k + bias_k
    Queries = features@proj_q + bias_q
    Values = features@proj_v + bias_v
    
    Logits = Queries@Keys.T
    AttScores = np.apply_along_axis(softmax, 1, Logits)
    
    Result = AttScores@Values
    return Result