In [None]:
"""
На основе курса "Нейронные сети и обработка текста" со Stepic
Convolution funtions for text
"""

import sys
import ast
import numpy as np

def parse_array(s):
    return np.array(ast.literal_eval(s))

def read_array():
    return parse_array(sys.stdin.readline())

def write_array(arr):
    print(repr(arr.tolist()))


def apply_convolution(data, kernel, bias):
    """
    returns array with shape OutLen x OutChannels after kernel usage   
   
    data - InLen x InChannels
    kernel - OutChannels x InChannels x KernelSize
    bias - OutChannels
    """
    in_len, _ = data.shape
    out_c, in_c, k = kernel.shape
    out_len = in_len - k + 1
    
    data_vectorization = np.stack([data[0+i:k+i].T.flatten() for i in range(out_len)])
    kernel = kernel.reshape(out_c, -1, 1)
    
    res = (data_vectorization@kernel).squeeze().T + bias.reshape(1, -1)
    return res.reshape(out_len, out_c)

def calculate_kernel_grad(x, y, kernel, bias):
    """
    returns gradient for kernel with shape OutChannels x InChannels x KernelSize    
    
    x - InLen x InChannels
    y - OutLen x OutChannels
    kernel - OutChannels x InChannels x KernelSize
    bias - OutChannels
    """
    in_len, _ = x.shape 
    out_c, in_c, k = kernel.shape
    out_len = in_len-k+1
    
    ind = np.arange(k)[None, :] + np.arange(in_len - k + 1)[:, None]
    grad = np.sum(x[ind], axis=0).T
    res = np.stack(repeat(grad, out_c))
    return res

def calculate_conv_x_grad(x, y, kernel, bias):
    """
    returns gradient for x with shape InLen x InChannels   
    
    x - InLen x InChannels
    y - OutLen x OutChannels
    kernel - OutChannels x InChannels x KernelSize
    bias - OutChannels
    """
    in_l, in_c = x.shape
    out_c, in_c, k = kernel.shape
    kernel = np.sum(kernel, axis=0).T
    
    top = np.vstack([np.sum(kernel[0:i], axis=0) for i in range(1,k)])
    bottom = np.vstack([np.sum(kernel[::-1][0:i], axis=0) for i in range(1,k)])[::-1]
    
    middle_part = np.sum(kernel, keepdims=True, axis=0)
    middle = np.vstack(repeat(middle_part, in_l-(k-1)*2))
   
    grad = np.vstack((top, middle, bottom))
    return grad

def calculate_receptive_field(layers):
    """
    returns int - receptive field size    

    layers - list of LayerInfo
    """
    layers = np.array(layers).T
    kernels = np.array(layers[0])
    dilations = np.array(layers[1])
    percep_field = 1 + np.sum((kernels - 1) * dilations)
    return percep_field

def max_pooling(features, kernel_size):
    """
    returns tuple of two matrices of shape OutLen x EmbSize:
         - output features (main result)
         - relative indices of maximum elements for each position of sliding window    
    
    features - InLen x EmbSize - features of elements of input sequence
    kernel_size - positive integer - size of sliding window
    """
    in_len, emb_size = features.shape
    f = features.T # транспонируем признаки для удобства работы с numpy array
    k = kernel_size
    out_len = in_len - k + 1
    
    # шагаем по признакам ядром свертки, вытягивавем соответствующие элементы в строчку и сводим их вместе
    vectorize_features = np.stack([f[i%emb_size][i//emb_size:i//emb_size+k].reshape(1, -1) for i in range(out_len * emb_size)])
    #  после чего по каждой строке ищем максимум и соответствующий индекс 
    result = np.max(vectorize_features, keepdims=True, axis=2)
    indices = np.where(vectorize_features==result)[-1].reshape(out_len, emb_size)
    
    result = result.squeeze().reshape(out_len, emb_size)
    return result, indices

def max_pooling_dldfeatures(features, kernel_size, indices, dldout):
    """
    returns gradient for max pooling layer with shape InLen x EmbSize    
    
    features - InLen x EmbSize - features of elements of input sequence
    kernel_size - positive integer - size of sliding window
    indices - OutLen x EmbSize - relative indices of maximum elements for each window position
    dldout - OutLen x EmbSize - partial derivative of loss function with respect to outputs of max_pooling layer
    """
    
    in_len, emb_size = features.shape
    out_len, _ = dldout.shape
    grad_init = np.zeros(features.shape).T
    
    for i, row in enumerate(indices.T):
        for j in range(out_len):
            grad_init[i][j+row[j]] += dldout.T[i][j]
    return grad_init.T

