In [1]:
import numpy as np
import pandas as pd

In [2]:
def initialize_ash_prior(k, scale = 2, sparsity = None):
    w = np.zeros(k)
    w[0] = 1 / k if sparsity is None else sparsity
    w[1:(k-1)] = np.repeat((1 - w[0])/(k-1), (k - 2))
    w[k-1] = 1 - np.sum(w)
    sk2 = np.square((np.power(scale, np.arange(k) / k) - 1))
    prior_grid = np.sqrt(sk2)
    return w, prior_grid

def softmax(x, base = 10):
    if base is not None:
        beta = np.log(base)
        x = x * beta
    e_x = np.exp(x - np.max(x))
    return e_x / np.sum(e_x, axis = 0, keepdims = True)

In [3]:
k = 4
softmax_base = 92
w_grad = np.random.rand(k)
ak = np.random.rand(k)
ak[2:] = 1e-2

In [4]:
ak

array([0.90123647, 0.12764001, 0.01      , 0.01      ])

In [5]:
wk = softmax(ak, base = softmax_base)
wk

array([0.93825714, 0.02838851, 0.01667718, 0.01667718])

In [6]:
softmax(ak, base = None)

array([0.4382798 , 0.20220076, 0.17975972, 0.17975972])

In [7]:
softmax(ak, base = np.exp(1))

array([0.4382798 , 0.20220076, 0.17975972, 0.17975972])

In [8]:
wk_jacobian = np.zeros((k, k))
for i in range(k):
    for j in range(k):
        dij = 1 if i == j else 0
        wk_jacobian[i, j] = np.log(softmax_base) * wk[i] * (dij - wk[j])

In [9]:
wk_jacobian

array([[ 0.26195029, -0.12044109, -0.0707546 , -0.0707546 ],
       [-0.12044109,  0.12472269, -0.0021408 , -0.0021408 ],
       [-0.0707546 , -0.0021408 ,  0.07415303, -0.00125764],
       [-0.0707546 , -0.0021408 , -0.00125764,  0.07415303]])

In [10]:
np.eye(k) - wk

array([[ 0.06174286, -0.02838851, -0.01667718, -0.01667718],
       [-0.93825714,  0.97161149, -0.01667718, -0.01667718],
       [-0.93825714, -0.02838851,  0.98332282, -0.01667718],
       [-0.93825714, -0.02838851, -0.01667718,  0.98332282]])

In [11]:
np.log(softmax_base) * wk.reshape(-1, 1) * (np.eye(k) - wk)

array([[ 0.26195029, -0.12044109, -0.0707546 , -0.0707546 ],
       [-0.12044109,  0.12472269, -0.0021408 , -0.0021408 ],
       [-0.0707546 , -0.0021408 ,  0.07415303, -0.00125764],
       [-0.0707546 , -0.0021408 , -0.00125764,  0.07415303]])

In [12]:
w_grad

array([0.11916247, 0.74824826, 0.42262751, 0.7381763 ])

In [13]:
for i in range(k):
    print (w_grad)
    print (wk_jacobian[i,:])
    print (i, ":", np.log(softmax_base) * w_grad * wk_jacobian[i, :])
    print (np.sum(np.log(softmax_base) * w_grad * wk_jacobian[i, :]))

[0.11916247 0.74824826 0.42262751 0.7381763 ]
[ 0.26195029 -0.12044109 -0.0707546  -0.0707546 ]
0 : [ 0.14114602 -0.40750286 -0.13521432 -0.23617016]
-0.6377413212655045
[0.11916247 0.74824826 0.42262751 0.7381763 ]
[-0.12044109  0.12472269 -0.0021408  -0.0021408 ]
1 : [-0.06489697  0.42198928 -0.00409113 -0.00714572]
0.3458554673918965
[0.11916247 0.74824826 0.42262751 0.7381763 ]
[-0.0707546  -0.0021408   0.07415303 -0.00125764]
2 : [-0.03812452 -0.00724321  0.14170884 -0.00419784]
0.09214326138303476
[0.11916247 0.74824826 0.42262751 0.7381763 ]
[-0.0707546  -0.0021408  -0.00125764  0.07415303]
3 : [-0.03812452 -0.00724321 -0.00240339  0.24751371]
0.19974259249057302


In [14]:
np.sum(np.log(softmax_base) * w_grad * wk_jacobian, axis = 1)

array([-0.63774132,  0.34585547,  0.09214326,  0.19974259])

In [15]:
A = np.log(softmax_base) * w_grad * wk_jacobian

In [16]:
np.sum(A, axis = 1)

array([-0.63774132,  0.34585547,  0.09214326,  0.19974259])

In [17]:
np.einsum('i,ij->j', w_grad, wk_jacobian) * np.log(softmax_base)

array([-0.63774132,  0.34585547,  0.09214326,  0.19974259])