In [21]:
from info_hdp import InfoHDP
import numpy as np
from scipy import stats, special, optimize, integrate
from typing import List, Tuple, Union

In [27]:
# Generate sample data
alpha, beta, Ns, M = 10.0, 0.5, 200, 50
pij = InfoHDP.gen_prior_pij(alpha, beta, 1, Ns)
samples = InfoHDP.gen_samples_prior(pij[0], M, Ns)

# Compute various statistics
print(f"Sample entropy: {InfoHDP.smaxlik(samples)}")
print(f"Naive mutual information: {InfoHDP.inaive(samples)}")

# True mutual information
true_mi = InfoHDP.itrue(pij[0])
print(f"True mutual information: {true_mi}")

Sample entropy: 2.9654518590350656
Naive mutual information: 0.6268694575724263
True mutual information: 0.17950024407836906


In [28]:
import pandas as pd

# Assuming pij[0] is your vector to be exported
dfp = pd.DataFrame(pij[0])
dfs = pd.DataFrame(samples)

# Save to CSV without index
dfp.to_csv('pij0.csv', index=False, header=False)
dfs.to_csv('samples0.csv', index=False, header=False)

In [29]:
# Internal calculations of Sint
nn = len(samples)
dkmz = InfoHDP.dkm2(samples)
kz = len(np.unique(samples))
az = InfoHDP.asol(nn, kz)
logLaz=InfoHDP.logLa(az, nn, kz)
spostz=InfoHDP.Spost(az, nn, dkmz)
print(nn)
print(dkmz)
print(kz)
print(az)
print(logLaz)
print(spostz)

50
[(1, 12), (2, 5), (3, 4), (4, 1), (5, 1), (7, 1)]
24
17.502439252246685
-114.85878600911136
3.359743408622875


In [30]:
# Compute NSB entropy estimate
nsb_entropy, nsb_std = InfoHDP.Sint(samples)
print(f"NSB Entropy estimate: {nsb_entropy:.4f} ± {nsb_std:.4f}")

# Compute NSB mutual information estimate
nsb_mi, sx, sy, sxy = InfoHDP.Insb(samples)
print(f"NSB Mutual Information estimate: {nsb_mi:.4f}")

# Compute InfoHDP estimates
n10 = InfoHDP.n10sam(samples)
kx = len(n10)
bb = InfoHDP.bsol(kx, n10)
sy_cond_x = InfoHDP.SYconX(alpha, bb, len(samples), n10)
print(n10, bb)
print(f"InfoHDP S(Y|X) estimate: {sy_cond_x:.4f}")

NSB Entropy estimate: nan ± nan
NSB Mutual Information estimate: nan
[[2, 0], [1, 0], [0, 1], [0, 7], [2, 0], [0, 1], [0, 1], [0, 3], [1, 0], [0, 1], [1, 0], [0, 5], [0, 1], [1, 0], [0, 3], [0, 1], [0, 2], [0, 3], [0, 2], [1, 0], [1, 0], [2, 0], [4, 0], [0, 3]] 0.051642668286065715
InfoHDP S(Y|X) estimate: 0.0539


  return (k - 1) * np.log(x) + special.gammaln(1 + x) - special.gammaln(n + x)
  the requested tolerance from being achieved.  The error may be 
  underestimated.
  norm_const, norm_error = integrate.quad(integrand_normalization, lower_bound, upper_bound)
  dsint = np.sqrt(sint2 - sint**2)


In [31]:
# True mutual information
print(f"True mutual information: {InfoHDP.itrue(pij[0]):.4f}")

# Compute various statistics
print(f"Naive mutual information: {InfoHDP.inaive(samples):.4f}")
nsb_mi, sx, sy, sxy = InfoHDP.Insb(samples)
print(f"NSB Mutual Information estimate: {nsb_mi:.4f}")
print(f"InfoHDP MAP Mutual Information estimate: {InfoHDP.IhdpMAP(samples):.4f}")
print(f"InfoHDP (integration in beta) Mutual Information estimate: {InfoHDP.IhdpIntb(samples)}")


True mutual information: 0.1795
Naive mutual information: 0.6269
NSB Mutual Information estimate: nan
InfoHDP MAP Mutual Information estimate: 0.5705
InfoHDP (integration in beta) Mutual Information estimate: (0.5655693703510217, 0.049775247127986574, 0.061300087221404606)


In [34]:
import ndd

unique, counts = np.unique(samples, return_counts=True)
print(unique)
print(counts)
ndd.entropy(counts, k=100, return_std=True)

[-198 -147 -143 -122 -115  -92  -81  -52  -50  -32  -24  -23   -6   -5
    1    2   12   37   51   91  149  150  191  192]
[3 2 3 2 1 3 1 5 1 3 1 1 7 1 2 1 2 1 1 1 1 1 2 4]


(3.4645546219623773, 0.19003929138278977)