In [1]:
# autoreload
%load_ext autoreload
%autoreload 2

# Tutorial

## First `Experiment` class 

In [3]:
from catede.handle_ngrams import markov_class
from catede.estimate import Experiment

seed_1 = 13273                                          # rng seed

#  simulation  #
A = 20                                                  # n. of states
L = 3                                                   # length of the L-grams
K = A ** L                                              # n. categories a priori

mobj_1 = markov_class( L, n_states=A, seed=seed_1 )     # random Markov matrix

size = int(1e4)                                         # sample size
seqs_1 = mobj_1.generate_counts( size, seed=seed_1 )    # generate histogram of counts
exact_sh_entropy_1 = mobj_1.exact_shannon()             # exact Shannon entropy

exp_1 = Experiment( seqs_1, categories=K )              # first experiment

## Shannon entropy estimation

$$ - \sum_{i=1}^{K} q_{i} \log q_{i}$$

In [4]:
shannon_1_naive = exp_1.entropy( method='naive' ) 
shannon_1_CAE = exp_1.entropy( method='CAE' ) 
shannon_1_NSB, shannon_1_NSBstd = exp_1.entropy( method='NSB', error=True, verbose=False ) 

print("Shannon entropy")
print( f"exact : { exact_sh_entropy_1:.3f}" )
print( f"naive : {shannon_1_naive:.3f}" )
print( f"CAE : {shannon_1_CAE:.3f}" )
print( f"NSB : {shannon_1_NSB:.3f}", r"+-", f"{shannon_1_NSBstd:.3f}" )

Shannon entropy
exact : 8.592
naive : 8.200
CAE : 8.553
NSB : 8.590 +- 0.009


## Simpson index estimation

$$ \sum_{i=1}^{K} {q_{i}}^2

In [5]:
exact_si_idx_1 = mobj_1.exact_simpson()
simpson_1_naive = exp_1.simpson( method='naive' )
simpson_1_CAE = exp_1.simpson( method='CAE' ) 
simpson_1_NSB, simpson_1_NSBstd = exp_1.simpson( method='NSB', error=True, n_bins=100 )

print("Simpson index")
print( f"exact : {exact_si_idx_1:.5f}" )
print( f"naive : {simpson_1_naive:.5f}" )
print( f"CAE : {simpson_1_CAE:.5f}" )
print( f"NSB : {simpson_1_NSB:.5f}", r"+-", f"{simpson_1_NSBstd:.5f}" )

Simpson index
exact : 0.00023
naive : 0.00033
CAE : 0.00024
NSB : 0.00024 +- 0.00000


## `Divergence` class

In [6]:
from catede.estimate import Divergence

# simulation of an independent second system
seed_2 = 5119                                           # rng seed

mobj_2 = markov_class( L, n_states=A, seed=seed_2 )     # random Markov matrix generation
seqs_2 = mobj_2.generate_counts( size, seed=seed_2 )    # generate histogram of counts
exact_sh_entropy_2 = mobj_2.exact_shannon()             # exact Shannon entropy  
exp_2 = Experiment( seqs_2, categories=K )              # second experiment
div_to1from2 = Divergence( exp_1, exp_2 )               # divergence class

## Kullback-Leibler divergence estimation

$$ \sum_{i=1}^{K} q_{i} \log \frac{q_{i}}{t_{i}}$$

In [7]:
# Kullback Leibler divergence estimation #
exact_DKL_to1from2 = mobj_1.exact_kullbackleibler( mobj_2 )
kullback_naive = div_to1from2.kullback_leibler(method='naive')
kullback_CMW, kullback_CMWstd = div_to1from2.kullback_leibler( method='CMW', error=True ) 

print("Kullback Leilber divergence")
print( f"exact : { exact_DKL_to1from2:.3f}" )
print( f"naive : {kullback_naive:.3f}" )
print( f"CMW : {kullback_CMW:.3f}", r"+-", f"{kullback_CMWstd:.3f}" )

Kullback Leilber divergence
exact : 0.949
naive : 0.211
CMW : 0.893 +- 0.021


## Hellinger divergence estimation

$$ \sqrt{ 1 - \sum_{i=1}^{K} \sqrt{q_{i}} \sqrt{t_{i}} }$$

In [9]:
# Hellinger divergence estimation #
# FIXME
#exact_DH_to1from2 = mobj_1.exact_hellinger( mobj_2 )
hellinger_naive = div_to1from2.hellinger(method='naive')
# FIXME
#hellinger_CMW, hellinger_CMWstd = div_to1from2.hellinger( method='CMW', error=True ) 

print("Kullback Leilber divergence")
#print( f"exact : { exact_DH_to1from2:.3f}" )
print( f"naive : {hellinger_naive:.3f}" )
#print( f"CMW : {hellinger_CMW:.3f}", r"+-", f"{hellinger_CMWstd:.3f}" )

Kullback Leilber divergence
exact : 0.949
naive : 0.707
