In [1]:
import numpy as np

## 重み付き和の実装

In [2]:
T,H = 5,4
hs = np.random.randn(T,H)
a = np.array([0.8, 0.1, 0.03, 0.05, 0.02])

In [3]:
ar = a.reshape(5,1).repeat(4, axis=1)
print(ar)

[[0.8  0.8  0.8  0.8 ]
 [0.1  0.1  0.1  0.1 ]
 [0.03 0.03 0.03 0.03]
 [0.05 0.05 0.05 0.05]
 [0.02 0.02 0.02 0.02]]


In [4]:
print(hs)

[[ 1.06992892  0.62738399 -0.53252558 -0.60762027]
 [ 0.32112509 -0.82707874  0.84872165 -1.29333919]
 [ 0.6938465   0.93974623 -0.17513435 -0.17430116]
 [ 0.59237411  0.58243641 -0.31713789 -0.03758297]
 [-0.69728927  1.50770685 -1.38284813  1.27152078]]


In [6]:
t = hs * ar
print(t)

[[ 0.85594314  0.5019072  -0.42602046 -0.48609622]
 [ 0.03211251 -0.08270787  0.08487216 -0.12933392]
 [ 0.0208154   0.02819239 -0.00525403 -0.00522903]
 [ 0.02961871  0.02912182 -0.01585689 -0.00187915]
 [-0.01394579  0.03015414 -0.02765696  0.02543042]]


In [7]:
c = np.sum(t,axis=0)
print(c.shape)

(4,)


In [8]:
c

array([ 0.92454396,  0.50666767, -0.38991619, -0.5971079 ])

## バッチ処理版の重み付き和の実装

In [10]:
N,T,H = 10,5,4
hs = np.random.randn(N,T,H)
a = np.random.randn(N, T)
ar = a.reshape(N, T, 1).repeat(H, axis=2)

In [13]:
t = hs * ar
print(t.shape)
print(t)

(10, 5, 4)
[[[-0.19025786 -0.11997848 -0.4957948  -1.59924724]
  [-0.26039053  0.11733221 -0.0269754  -0.01947011]
  [ 0.4219718  -0.44518118 -1.01047478 -1.16769086]
  [ 0.4841305  -0.38349248 -0.57181502  0.32332853]
  [ 0.60387006 -0.9004709   0.97014867  2.02644627]]

 [[-0.99564643  1.03087795  0.05290311 -0.10630611]
  [-0.42517884  0.05696828 -0.01838178 -0.76505821]
  [-3.00747041 -1.92982765 -2.63655472 -3.31525939]
  [-1.67526501 -0.61896389  0.45833206  0.67625585]
  [ 0.09674549  0.06729912 -0.03427757 -0.03994594]]

 [[-0.05503972  0.09711847 -0.11397793  0.24814371]
  [-0.23292985  0.07042208  0.23403724  0.597125  ]
  [ 1.98423667 -0.52499108  0.25916379  0.52784523]
  [-0.54476073 -1.39611787 -0.33992325  2.13439525]
  [-0.503456    0.73270677 -2.13531233  1.73765503]]

 [[-0.68977969  0.74586367 -0.14258513 -0.89123897]
  [ 0.33395245 -0.05838972 -0.40897456  0.11760739]
  [-0.18340476  0.23432085  0.13809048 -0.28498888]
  [-0.23868981  0.28637863 -0.25594758  0.27037

In [14]:
c = np.sum(t, axis=1)
print(c)
print(c.shape)

[[ 1.05932397 -1.73179083 -1.13491133 -0.43663342]
 [-6.0068152  -1.3936462  -2.17797891 -3.55031381]
 [ 0.64805037 -1.02086163 -2.09601248  5.24516422]
 [-0.6951742   1.2730926  -0.58822182 -0.88169682]
 [ 1.82256177 -1.52974077  0.05627837  0.5128269 ]
 [-0.79722362  2.56947175  0.406497    2.34372694]
 [ 2.20842362  2.77162879 -0.84333604 -1.23686647]
 [ 3.22719999  0.08898756  6.04932358  2.96105821]
 [-0.71780457  0.26838033 -1.13839857 -0.25848596]
 [ 2.31646871 -2.57952533  0.21455538 -1.56804985]]
(10, 4)


## Decoderの改良2

In [26]:
class Softmax:
    def __init__(self):
        self.params, self.grads = [], []
        self.out = None

    def forward(self, x):
        self.out = softmax(x)
        return self.out

    def backward(self, dout):
        dx = self.out * dout
        sumdx = np.sum(dx, axis=1, keepdims=True)
        dx -= self.out * sumdx
        return dx
    

In [15]:
N,T,H = 10,5,4
hs = np.random.randn(N,T,H)
h = np.random.randn(N,H)
hr = h.reshape(N, 1, H).repeat(T, axis=1)

In [32]:
print(h)
print(hr)

[[-0.85386222 -0.58518992 -0.386579    0.16597095]
 [ 0.8526899  -0.26718498 -0.50848301 -0.1112106 ]
 [-0.61686415  0.00546739 -0.16589723 -0.73253401]
 [ 1.54651466 -0.90535487  0.09876434 -0.17141466]
 [ 0.10855371  0.77223873 -0.34953658 -1.95457674]
 [ 2.06400619 -0.38483342  0.18107398  0.56127436]
 [-0.07884966 -0.52197907  0.71598072  1.16867855]
 [ 1.24227519 -0.69847367  0.39527321  0.5172277 ]
 [ 0.85078539  0.82703497 -0.65955595  0.25908555]
 [ 0.30118319  0.78394688  0.70029006 -0.07935971]]
[[[-0.85386222 -0.58518992 -0.386579    0.16597095]
  [-0.85386222 -0.58518992 -0.386579    0.16597095]
  [-0.85386222 -0.58518992 -0.386579    0.16597095]
  [-0.85386222 -0.58518992 -0.386579    0.16597095]
  [-0.85386222 -0.58518992 -0.386579    0.16597095]]

 [[ 0.8526899  -0.26718498 -0.50848301 -0.1112106 ]
  [ 0.8526899  -0.26718498 -0.50848301 -0.1112106 ]
  [ 0.8526899  -0.26718498 -0.50848301 -0.1112106 ]
  [ 0.8526899  -0.26718498 -0.50848301 -0.1112106 ]
  [ 0.8526899  -0.2

In [17]:
t = hs * hr
print(t.shape)
print(t)

(10, 5, 4)
[[[ 1.50184884e+00  7.14609672e-02 -1.05163159e-01 -6.22511696e-02]
  [-1.34234305e+00 -9.47338227e-01  3.54300927e-01 -3.93230640e-03]
  [ 1.34565095e+00 -3.21272326e-01 -3.81282864e-01  1.15700983e-01]
  [-3.64663924e-01  6.35569686e-01  6.59902455e-02  2.85587135e-01]
  [-4.55629576e-01 -2.27171032e-01  4.21360131e-01  1.62196069e-01]]

 [[ 2.18080682e-01  1.88206330e-01 -2.46753675e-01  2.10740566e-02]
  [ 1.84459045e+00 -6.97303127e-02  2.75937079e-01 -9.86432668e-02]
  [-7.22048645e-01  7.66804365e-01  1.88136962e-01 -1.21942492e-01]
  [-1.75817111e+00  6.09540124e-02 -2.38059175e-01 -6.21294938e-02]
  [ 7.41944147e-01 -4.90024124e-03 -1.02657015e+00  9.71720206e-02]]

 [[ 4.32148853e-02 -5.25570083e-03  6.49988624e-02 -2.81672431e-01]
  [-1.47750843e-01 -4.05575213e-04  1.65686662e-01  1.69557280e+00]
  [-1.10741266e+00  7.05697707e-03  1.05090442e-01  9.13314671e-03]
  [ 2.89241769e-01  8.23823793e-03  5.28772198e-03  4.04163643e-01]
  [ 4.21331097e-01  3.53764482e-0

In [18]:
s = np.sum(t, axis=2)
print(s)
print(s.shape)

[[ 1.40589548 -1.93931266  0.75879674  0.62248314 -0.09924441]
 [ 0.18060739  1.95215395  0.11095019 -1.99740577 -0.19235422]
 [-0.17871438  1.71310305 -0.98613209  0.70693137  0.64045571]
 [ 0.83580324 -0.21169621  0.8685206   0.68333313 -3.67361477]
 [-0.91307415  1.63602419 -2.87704809 -4.37643352  1.45640775]
 [-1.84702208  3.1374687   0.54660473  1.36173536  0.93112309]
 [-1.20579733 -0.67734404  3.37289633 -0.21578874  1.42396307]
 [-1.50107434  3.7726471   1.5730188  -0.13931786  2.62020517]
 [-0.7887932  -1.92660373 -1.23671633  1.42502485  0.66610387]
 [-0.56361627 -0.28378556 -0.61621783  0.17301351  1.31721957]]
(10, 5)


In [29]:
def softmax(x):
        if x.ndim == 2:
            x = x - x.max(axis=1, keepdims=True)
            x = np.exp(x)
            x /= x.sum(axis=1, keepdims=True)
        elif x.ndim == 1:
            x = x - np.max(x)
            x = np.exp(x) / np.sum(np.exp(x))

        return x

In [30]:
a = softmax(a)

In [31]:
print(a)
print(a.shape)

[[0.03856311 0.10150135 0.28082104 0.27401623 0.30509827]
 [0.07325551 0.10237723 0.03089353 0.58394967 0.20952406]
 [0.41915726 0.23582193 0.11238277 0.09703463 0.13560341]
 [0.1223794  0.16615574 0.32739612 0.12436486 0.25970387]
 [0.17131366 0.39903373 0.12327527 0.23102027 0.07535707]
 [0.2660093  0.08035062 0.53091833 0.09246175 0.03025999]
 [0.08290365 0.18363585 0.63747982 0.06501717 0.03096351]
 [0.14369117 0.4950766  0.01725454 0.04817126 0.29580643]
 [0.04562043 0.1978669  0.3049885  0.08363192 0.36789225]
 [0.26015365 0.06974554 0.37318971 0.17793211 0.11897899]]
(10, 5)
