## Discrete entropy 

In [1]:
import autograd.numpy as np
from scipy.stats import entropy as entropy_scipy

def expectation(f, x, p):
    return np.sum(p * f(x))

p = np.array([0.07, 0.08, 0.31, 0.21, 0.09, 0.24])
entropy_f = lambda x: -np.log(p[x])
entropy = lambda p: expectation(entropy_f, np.arange(len(p)), p)
entropy(p), entropy_scipy(p)

(1.6382322756637515, 1.6382322756637515)

In [2]:
import autograd

autograd.grad(entropy)(p)

array([2.65926004, 2.52572864, 1.17118298, 1.56064775, 2.40794561,
       1.42711636])

In [3]:
def mc_expectation(f, x, p, n_samples=1):
    samples = np.random.choice(x, p=p, size=n_samples)
    return np.mean(f(samples))

mc_expectation(entropy_f, np.arange(len(p)), p, n_samples=int(1e5))

1.6389947556635918

In [4]:
def score_function_estimator(f, x, p, n_samples=1):
    sample_indices = np.random.choice(len(x), p=p, size=n_samples)
    samples = x[sample_indices]
    log_p_f = lambda indices, p: np.log(p[indices])
    log_p_f_jac = autograd.jacobian(log_p_f, argnum=1)(sample_indices, p)
    sample_vals = log_p_f_jac * f(samples)[:, np.newaxis]
    return np.mean(sample_vals, axis=0)

score_function_estimator(entropy_f, np.arange(len(p)), p, n_samples=10000)

array([2.69345052, 2.52888581, 1.17496099, 1.51457148, 2.33838274,
       1.46755132])

In [43]:
q = np.ones(len(p))/len(p)
kl_pq_f = lambda x: -entropy_f(x) - np.log(q[x])
kl_pq = lambda p: expectation(kl_pq_f, np.arange(len(p)), p)
kl_pq(p)

0.15352719356430322

In [22]:
mc_expectation(kl_pq_f, np.arange(len(p)), p, 100000)

0.15270361327364357

In [25]:
autograd.grad(kl_pq)(p)

array([-0.86750057, -0.73396918,  0.62057649,  0.23111172, -0.61618614,
        0.36464311])

In [30]:
score_function_estimator(kl_pq_f, np.arange(len(p)), p, 10000)

array([-0.84271484, -0.74589617,  0.63018541,  0.21603443, -0.6209787 ,
        0.3781653 ])