In [3]:
import numpy as np

T, H = 5, 4
hs = np.random.randn(T, H)
print(hs[0])
a  = np.array([0.8, 0.1, 0.03, 0.05, 0.02])

ar = a.reshape(T, 1).repeat(4, axis=1)
print(ar.shape)
print(ar)

t = hs * ar
print(t.shape)

c = np.sum(t, axis=0)
print(c.shape)
print(c)

[-0.55957285 -0.8255792   0.72702834 -1.05505906]
(5, 4)
[[0.8  0.8  0.8  0.8 ]
 [0.1  0.1  0.1  0.1 ]
 [0.03 0.03 0.03 0.03]
 [0.05 0.05 0.05 0.05]
 [0.02 0.02 0.02 0.02]]
(5, 4)
(4,)
[-0.58791536 -0.60066715  0.48828332 -0.85496717]


In [4]:
import numpy as np

N, T, H = 2, 3, 4
hs = np.random.randn(N, T, H)
a  = np.random.randn(N, T)

ar = a.reshape(N, T, 1).repeat(H, axis=2)

t = hs * ar
print(t.shape)

c = np.sum(t, axis=1)
print(c.shape)

(2, 3, 4)
(2, 4)


In [6]:
t = np.arange(24).reshape(2, 3, 4)
print(t)
c = np.sum(t, axis=1)
print(c)

[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]
[[12 15 18 21]
 [48 51 54 57]]


In [7]:
import sys
sys.path.append("..")
from common.layers import Softmax
import numpy as np

N, T, H = 10, 5, 4
hs = np.random.randn(N, T, H)
h  = np.random.randn(N, H)

hr = h.reshape(N, 1, H).repeat(T, axis=1)

t = hs * hr
print(t.shape)

s = np.sum(t, axis=2)
print(s.shape)

softmax = Softmax()
a = softmax.forward(s)
print(a.shape)
print(a)

(10, 5, 4)
(10, 5)
(10, 5)
[[0.38809027 0.29545037 0.13657334 0.10224911 0.07763691]
 [0.51091018 0.08868446 0.17822433 0.17300609 0.04917495]
 [0.24238267 0.05862754 0.05094108 0.53089014 0.11715856]
 [0.00377341 0.96800507 0.02205353 0.00200039 0.0041676 ]
 [0.05907465 0.05474874 0.27672388 0.13818032 0.47127241]
 [0.01885712 0.08517914 0.00535828 0.61814501 0.27246045]
 [0.03145756 0.34765569 0.00379522 0.61127636 0.00581516]
 [0.08010931 0.0677117  0.378384   0.00153163 0.47226337]
 [0.02808342 0.21716891 0.70133504 0.02927912 0.0241335 ]
 [0.03559259 0.04929676 0.51320484 0.00899499 0.39291081]]


In [None]:
# coding: utf-8
import sys
sys.path.append('..')
from common.np import *  # import numpy as np
from common.layers import Softmax


class WeightSum:
    def __init__(self):
        self.params, self.grads = [], []
        self.cache = None

    def forward(self, hs, a):
        N, T, H = hs.shape

        ar = a.reshape(N, T, 1).repeat(T, axis=1)
        t = hs * ar
        c = np.sum(t, axis=1)

        self.cache = (hs, ar)
        return c

    def backward(self, dc):
        hs, ar = self.cache
        N, T, H = hs.shape
        dt = dc.reshape(N, 1, H).repeat(T, axis=1)
        dar = dt * hs
        dhs = dt * ar
        da = np.sum(dar, axis=2)

        return dhs, da


class AttentionWeight:
    def __init__(self):
        self.params, self.grads = [], []
        self.softmax = Softmax()
        self.cache = None

    def forward(self, hs, h):
        N, T, H = hs.shape

        hr = h.reshape(N, 1, H).repeat(T, axis=1)
        t = hs * hr
        s = np.sum(t, axis=2)
        a = self.softmax.forward(s)

        self.cache = (hs, hr)
        return a

    def backward(self, da):
        hs, hr = self.cache
        N, T, H = hs.shape

        ds = self.softmax.backward(da)
        dt = ds.reshape(N, T, 1).repeat(H, axis=2)
        dhs = dt * hr
        dhr = dt * hs
        dh = np.sum(dhr, axis=1)

        return dhs, dh


class Attention:
    def __init__(self):
        self.params, self.grads = [], []
        self.attention_weight_layer = AttentionWeight()
        self.weight_sum_layer = WeightSum()
        self.attention_weight = None

    def forward(self, hs, h):
        a = self.attention_weight_layer.forward(hs, h)
        out = self.weight_sum_layer.forward(hs, a)
        self.attention_weight = a
        return out

    def backward(self, dout):
        dhs0, da = self.weight_sum_layer.backward(dout)
        dhs1, dh = self.attention_weight_layer.backward(da)
        dhs = dhs0 + dhs1
        return dhs, dh


class TimeAttention:
    def __init__(self):
        self.params, self.grads = [], []
        self.layers = None
        self.attention_weights = None

    def forward(self, hs_enc, hs_dec):
        N, T, H = hs_dec.shape
        out = np.empty_like(hs_dec)
        self.layers = []
        self.attention_weights = []

        for t in range(T):
            layer = Attention()
            out[:, t, :] = layer.forward(hs_enc, hs_dec[:,t,:])
            self.layers.append(layer)
            self.attention_weights.append(layer.attention_weight)

        return out

    def backward(self, dout):
        N, T, H = dout.shape
        dhs_enc = 0
        dhs_dec = np.empty_like(dout)

        for t in range(T):
            layer = self.layers[t]
            dhs, dh = layer.backward(dout[:, t, :])
            dhs_enc += dhs
            dhs_dec[:,t,:] = dh

        return dhs_enc, dhs_dec
