In [1]:
import numpy as np
import pandas as pd

In [21]:
def initialize_ash_prior(k, scale = 2, sparsity = None):
    w = np.zeros(k)
    w[0] = 1 / k if sparsity is None else sparsity
    w[1:(k-1)] = np.repeat((1 - w[0])/(k-1), (k - 2))
    w[k-1] = 1 - np.sum(w)
    sk2 = np.square((np.power(scale, np.arange(k) / k) - 1))
    prior_grid = np.sqrt(sk2)
    return w, prior_grid

def softmax(x, base = 10):
    if base is not None:
        beta = np.log(base)
        x = x * beta
    e_x = np.exp(x - np.max(x))
    return e_x / np.sum(e_x, axis = 0, keepdims = True)

In [47]:
k = 4
softmax_base = 92
w_grad = np.random.rand(k)
ak = np.random.rand(k)
ak[2:] = 1e-2

In [48]:
ak

array([0.73880469, 0.22536756, 0.01      , 0.01      ])

In [49]:
wk = softmax(ak, base = softmax_base)
wk

array([0.85308936, 0.08369759, 0.03160652, 0.03160652])

In [68]:
softmax(ak, base = None)

array([0.390106  , 0.23345314, 0.18822043, 0.18822043])

In [67]:
softmax(ak, base = np.exp(1))

array([0.390106  , 0.23345314, 0.18822043, 0.18822043])

In [51]:
wk_jacobian = np.zeros((k, k))
for i in range(k):
    for j in range(k):
        dij = 1 if i == j else 0
        wk_jacobian[i, j] = np.log(softmax_base) * wk[i] * (dij - wk[j])

In [52]:
wk

array([0.85308936, 0.08369759, 0.03160652, 0.03160652])

In [53]:
wk_jacobian

array([[ 0.56670627, -0.32286258, -0.12192185, -0.12192185],
       [-0.32286258,  0.34678637, -0.01196189, -0.01196189],
       [-0.12192185, -0.01196189,  0.13840088, -0.00451714],
       [-0.12192185, -0.01196189, -0.00451714,  0.13840088]])

In [54]:
wk_jacobian.T

array([[ 0.56670627, -0.32286258, -0.12192185, -0.12192185],
       [-0.32286258,  0.34678637, -0.01196189, -0.01196189],
       [-0.12192185, -0.01196189,  0.13840088, -0.00451714],
       [-0.12192185, -0.01196189, -0.00451714,  0.13840088]])

In [55]:
np.eye(k) - wk

array([[ 0.14691064, -0.08369759, -0.03160652, -0.03160652],
       [-0.85308936,  0.91630241, -0.03160652, -0.03160652],
       [-0.85308936, -0.08369759,  0.96839348, -0.03160652],
       [-0.85308936, -0.08369759, -0.03160652,  0.96839348]])

In [57]:
np.log(softmax_base) * wk.reshape(-1, 1) * (np.eye(k) - wk)

array([[ 0.56670627, -0.32286258, -0.12192185, -0.12192185],
       [-0.32286258,  0.34678637, -0.01196189, -0.01196189],
       [-0.12192185, -0.01196189,  0.13840088, -0.00451714],
       [-0.12192185, -0.01196189, -0.00451714,  0.13840088]])

In [58]:
w_grad

array([0.53952567, 0.97399717, 0.80661183, 0.12173589])

In [59]:
for i in range(k):
    print (w_grad)
    print (wk_jacobian[i,:])
    print (i, ":", w_grad * wk_jacobian[i, :])
    print (np.sum(w_grad * wk_jacobian[i, :]))

[0.53952567 0.97399717 0.80661183 0.12173589]
[ 0.56670627 -0.32286258 -0.12192185 -0.12192185]
0 : [ 0.30575258 -0.31446724 -0.0983436  -0.01484226]
-0.12190052727354968
[0.53952567 0.97399717 0.80661183 0.12173589]
[-0.32286258  0.34678637 -0.01196189 -0.01196189]
1 : [-0.17419265  0.33776894 -0.0096486  -0.00145619]
0.15247149501288654
[0.53952567 0.97399717 0.80661183 0.12173589]
[-0.12192185 -0.01196189  0.13840088 -0.00451714]
2 : [-0.06577997 -0.01165085  0.11163579 -0.0005499 ]
0.03365507426879429
[0.53952567 0.97399717 0.80661183 0.12173589]
[-0.12192185 -0.01196189 -0.00451714  0.13840088]
3 : [-0.06577997 -0.01165085 -0.00364358  0.01684835]
-0.06422604200813084


In [61]:
np.sum(w_grad * wk_jacobian, axis = 1)

array([-0.12190053,  0.1524715 ,  0.03365507, -0.06422604])

In [62]:
A = w_grad * wk_jacobian

In [63]:
np.sum(A, axis = 1)

array([-0.12190053,  0.1524715 ,  0.03365507, -0.06422604])

In [65]:
np.einsum('i,ij->j', w_grad, wk_jacobian)

array([-0.12190053,  0.1524715 ,  0.03365507, -0.06422604])

In [9]:
wk * (1 - wk)

array([0.0475, 0.0475, 0.0475, 0.0475, 0.0475, 0.0475, 0.0475, 0.0475,
       0.0475, 0.0475, 0.0475, 0.0475, 0.0475, 0.0475, 0.0475, 0.0475,
       0.0475, 0.0475, 0.0475, 0.0475])