# six sided die 

In [None]:
# a die is the 'source' in this example. Each roll produces a number 1 through 6.
# this works out as you will see, to 2.59 information bits per roll,or bits per symbol, its entropy
# note that an 8 sided die would produce 3 bits.

In [32]:
import numpy as np
import pandas as pd
import scipy.stats as sc

def entropy2(p):
    h = 0;
    for pk in p:
        h = h + pk * np.log2(1/pk)
    return(h)

a = np.ones(6) * 1./6
h = entropy2(a)

h2 = sc.entropy(a,base=2)
print(h, h2, h-h2)

2.58496250072 2.58496250072 0.0


# Examine optimal Source Coding

In [None]:
# Shannon's(the fundamental theorem for a noiseless channel, section 9)
# says that for a source with an entropy of H bits/symbol and a channel with a capacity C bits/Second, 
# there is a source coding that can achieve a rate of C/H-epsilon Symbols/s, where epsilon is arbitrarily small
# and that it is Not possible to transmit at an Average rate greater than C/H

# The shannon limit is a rate of 1(b/sec)/2.584(b/sym) = 0.386 sym/sec

In [52]:
# We'll try various encodings to try and get a maximum efficiency.
# Here, a die roll is a 'symbol' and by default gets
C = 1.0 #binary digit/second
Rmax = C/h1

emax = 0
for num_die in range(1,24):
    values = 6**num_die
    bits = np.ceil(np.log2(values))
    #print (values, Lbits, num_die/Lbits)
    L = bits/num_die
    eff = h/L
    if eff > emax:
        emax = eff
        die = num_die
        Lmax = L
        rt = 1/Lmax
        print("emax {:1f}, num_die {}, L {:1f} ibits/sym, Shannon {:1f} ibits/sym, rate {:1f} sym/sec".format(emax,die,Lmax,h,rt))

    

emax 0.861654, num_die 1, L 3.000000 ibits/sym, Shannon 2.584963 ibits/sym, rate 0.333333 sym/sec
emax 0.969361, num_die 3, L 2.666667 ibits/sym, Shannon 2.584963 ibits/sym, rate 0.375000 sym/sec
emax 0.994216, num_die 5, L 2.600000 ibits/sym, Shannon 2.584963 ibits/sym, rate 0.384615 sym/sec
emax 0.998736, num_die 17, L 2.588235 ibits/sym, Shannon 2.584963 ibits/sym, rate 0.386364 sym/sec


# Huffman Coding

In [None]:
# I should here insert the code to do huffman coding. It is a cool coding technique that will encode data
# such that the symbols with the highest probabiliy will have the smallest encoding.
# further, it is a Prefix code. This says that each symbol has a unique prefix so I do not need a delimiter
# between symbols. cool. Also, lossless.