In [1]:
from info_hdp import InfoHDP
import numpy as np
from scipy import stats, special, optimize, integrate
from typing import List, Tuple, Union
import ndd

In [2]:
# Set the seed
np.random.seed(0)

# Generate sample data
alpha, beta, Ns, M = 10.0, 0.5, 1000, 200
pij = InfoHDP.gen_prior_pij(alpha, beta, 1, Ns)
samples = InfoHDP.gen_samples_prior(pij[0], M, Ns)

# Compute various statistics
print(f"True entropy: {InfoHDP.strue(pij[0])}")
print(f"Naive entropy: {InfoHDP.smaxlik(samples)}")

dkmz = InfoHDP.dkm2(samples)
kz = len(np.unique(samples))
az = InfoHDP.asol(M, kz)
ead, eau = InfoHDP.intEa(az, M, kz)
sigea = np.sqrt(-InfoHDP.D2expalogL(np.log(az), M, kz))

log_az = np.log(az)
logLa_az = InfoHDP.logLa(az, M, kz)

def integrand_normalization(log_x):
    return np.exp(InfoHDP.logLa(np.exp(log_x), M, kz)-logLa_az)

norm_const, norm_error = integrate.quad(integrand_normalization, ead, eau)

print(az, ead, eau, sigea, norm_const, integrand_normalization(log_az))

print(InfoHDP.logLa(az, M, kz),logLa_az)
print(InfoHDP.logLa(np.exp(log_az), M, kz),logLa_az)

print(f"NSB entropy (infoHDP): {InfoHDP.Sint(samples)}")

unique, counts = np.unique(samples, return_counts=True)
print(f"NSB entropy (ndd with K): {ndd.entropy(counts, k=Ns, return_std=True)}")
print(f"NSB entropy (ndd w/o K): {ndd.entropy(counts, return_std=True)}")


True entropy: 3.3204714078171147
Naive entropy: 2.9751708857148014
13.639064600766394 -12.303280880213899 17.529157025028574 4.972072984207079 0.5050364320838264 1.0
-809.71039405756 -809.71039405756
-809.71039405756 -809.71039405756
NSB entropy (infoHDP): (3.1490662442712365, 0.04909423715244971)
NSB entropy (ndd with K): (3.1479469057964735, 0.0972250188630044)
NSB entropy (ndd w/o K): (3.1493099989395783, 0.09780774166330879)


  return -np.sum(np.where(p > 0, p * np.log(p), 0))
  return -np.sum(np.where(p > 0, p * np.log(p), 0))


In [3]:
import pandas as pd

# Assuming pij[0] is your vector to be exported
dfp = pd.DataFrame(pij[0])
dfs = pd.DataFrame(samples)

# Save to CSV without index
dfp.to_csv('pij0.csv', index=False, header=False)
dfs.to_csv('samples0.csv', index=False, header=False)

In [4]:
# Internal calculations of Sint
nn = len(samples)
dkmz = InfoHDP.dkm2(samples)
kz = len(np.unique(samples))
az = InfoHDP.asol(nn, kz)
logLaz=InfoHDP.logLa(az, nn, kz)
spostz=InfoHDP.Spost(az, nn, dkmz)
print(nn)
print(dkmz)
print(kz)
print(az)
print(logLaz)
print(spostz)

50
[(1, 9), (2, 4), (3, 1), (4, 3), (5, 2), (8, 1)]
20
11.859890348309674
-125.53792181471931
3.157022884436927


In [5]:
# True mutual information
print(f"True mutual information: {InfoHDP.itrue(pij[0]):.4f}")

# Compute various statistics
print(f"Naive mutual information: {InfoHDP.inaive(samples):.4f}")
nsb_mi, sx, sy, sxy = InfoHDP.Insb(samples)
print(f"NSB Mutual Information estimate: {nsb_mi:.4f}")
print(f"InfoHDP MAP Mutual Information estimate: {InfoHDP.IhdpMAP(samples):.4f}")
print(f"InfoHDP (integration in beta) Mutual Information estimate: {InfoHDP.IhdpIntb(samples)}")


True mutual information: 0.2156
Naive mutual information: 0.6399
NSB Mutual Information estimate: 0.6230
InfoHDP MAP Mutual Information estimate: 0.5635
InfoHDP (integration in beta) Mutual Information estimate: (0.5603993755722314, 0.06238233797579687, 0.1295443828861681)
