In [1]:
from info_hdp import InfoHDP
import numpy as np
from scipy import stats, special, optimize, integrate
from typing import List, Tuple, Union
import ndd

In [9]:
# Set the seed
np.random.seed(1)

# Generate sample data
alpha, beta, Ns, M = 10.0, 0.5, 1000, 50
pij = InfoHDP.gen_prior_pij(alpha, beta, 1, Ns)
samples = InfoHDP.gen_samples_prior(pij[0], M, Ns)

# Compute various statistics
print(f"True entropy: {InfoHDP.strue(pij[0])}")
print(f"Naive entropy: {InfoHDP.smaxlik(samples)}")

# dkmz = InfoHDP.dkm2(samples)
# kz = len(np.unique(samples))
# az = InfoHDP.asol(M, kz)
# ead, eau = InfoHDP.intEa(az, M, kz)
# sigea = np.sqrt(-InfoHDP.D2expalogL(np.log(az), M, kz))

# log_az = np.log(az)
# logLa_az = InfoHDP.logLa(az, M, kz)

# def integrand_normalization(log_x):
#     return np.exp(InfoHDP.logLa(np.exp(log_x), M, kz)-logLa_az)

# norm_const, norm_error = integrate.quad(integrand_normalization, ead, eau)

# print(az, ead, eau, sigea, norm_const, integrand_normalization(log_az))

# print(InfoHDP.logLa(az, M, kz),logLa_az)
# print(InfoHDP.logLa(np.exp(log_az), M, kz),logLa_az)

print(f"NSB entropy (infoHDP): {InfoHDP.Sint(samples)}")

unique, counts = np.unique(samples, return_counts=True)
print(f"NSB entropy (ndd with K): {ndd.entropy(counts, k=Ns, return_std=True)}")
print(f"NSB entropy (ndd w/o K): {ndd.entropy(counts, return_std=True)}")


True entropy: 3.624322762399706
Naive entropy: 3.052878602840477
NSB entropy (infoHDP): (3.692469718104957, 0.19932029874217316)
NSB entropy (ndd with K): (3.6859154845267246, 0.22881575701530027)
NSB entropy (ndd w/o K): (3.6937754160107374, 0.23280549445093177)


In [3]:
import pandas as pd

# Assuming pij[0] is your vector to be exported
dfp = pd.DataFrame(pij[0])
dfs = pd.DataFrame(samples)

# Save to CSV without index
dfp.to_csv('pij0.csv', index=False, header=False)
dfs.to_csv('samples0.csv', index=False, header=False)

In [4]:
# Internal calculations of Sint
nn = len(samples)
dkmz = InfoHDP.dkm2(samples)
kz = len(np.unique(samples))
az = InfoHDP.asol(nn, kz)
logLaz=InfoHDP.logLa(az, nn, kz)
spostz=InfoHDP.Spost(az, nn, dkmz)
print(nn)
print(dkmz)
print(kz)
print(az)
print(logLaz)
print(spostz)

200
[(1, 14), (2, 6), (3, 4), (4, 5), (5, 1), (6, 2), (11, 1), (14, 1), (21, 1), (25, 2), (29, 1)]
38
13.639064600766394
-809.71039405756
3.1472457205710556


In [5]:
# True mutual information
print(f"True mutual information: {InfoHDP.itrue(pij[0]):.4f}")

# Compute various statistics
print(f"Naive mutual information: {InfoHDP.inaive(samples):.4f}")
nsb_mi, sx, sy, sxy = InfoHDP.Insb(samples)
print(f"NSB Mutual Information estimate: {nsb_mi:.4f}")
print(f"InfoHDP MAP Mutual Information estimate: {InfoHDP.IhdpMAP(samples):.4f}")
print(f"InfoHDP (integration in beta) Mutual Information estimate: {InfoHDP.IhdpIntb(samples)}")


True mutual information: 0.3372
Naive mutual information: 0.5332
NSB Mutual Information estimate: 0.5368
InfoHDP MAP Mutual Information estimate: 0.5223
InfoHDP (integration in beta) Mutual Information estimate: (0.5217095836422757, 0.011535389637468096, 0.011454257095022851)
