In [2]:
import sys
import numpy as np
from scipy.stats import entropy

In [3]:
eps = sys.float_info.epsilon

###   Joint distribution $P(X,Y)$

In [4]:
pXY = np.array([[1/8, 1/16, 1/32, 1/32], # Y = axis 0, X = axis 1
                [1/16, 1/8, 1/32, 1/32],
                [1/16, 1/16, 1/16, 1/16],
                [1/4, 0, 0, 0]
               ])
pXY

array([[0.125  , 0.0625 , 0.03125, 0.03125],
       [0.0625 , 0.125  , 0.03125, 0.03125],
       [0.0625 , 0.0625 , 0.0625 , 0.0625 ],
       [0.25   , 0.     , 0.     , 0.     ]])

### Marginals $P(X)$, $P(Y)$

In [5]:
pX = pXY.sum(0)
print(f'P(X): {pX}')

P(X): [0.5   0.25  0.125 0.125]


In [6]:
pY = pXY.sum(1) # UNIFORM !!!
print(f'P(X): {pY}')

P(X): [0.25 0.25 0.25 0.25]


### Entropy $H(X)$,  $H(Y)$

In [7]:
hX = entropy(pX, base=2)
print(f'H(X): {hX} bits', 7/4)

H(X): 1.75 bits 1.75


In [8]:
hY = entropy(pY, base=2)
print(f'H(Y): {hY} bits', 2)

H(Y): 2.0 bits 2


### Conditionals $P(X|Y)$, $P(Y|X)$ 

In [9]:
pX_Y = (pXY.T / pY).T

pY_X = pXY / pX

print(f'P(X|Y)\n{pX_Y}\n \nP(Y|X)\n{pY_X}')

P(X|Y)
[[0.5   0.25  0.125 0.125]
 [0.25  0.5   0.125 0.125]
 [0.25  0.25  0.25  0.25 ]
 [1.    0.    0.    0.   ]]
 
P(Y|X)
[[0.25  0.25  0.25  0.25 ]
 [0.125 0.5   0.25  0.25 ]
 [0.125 0.25  0.5   0.5  ]
 [0.5   0.    0.    0.   ]]


In [10]:
pX_Y.sum(1)

array([1., 1., 1., 1.])

In [11]:
pY_X.sum(0)

array([1., 1., 1., 1.])

### $H(X|Y=i)$ (Nats)

In [12]:
np.apply_along_axis(entropy, 1, pX_Y)

array([1.21300757, 1.21300757, 1.38629436, 0.        ])

### $H(X|Y=i)$ (Bits)

In [13]:
np.log2(np.e) * np.apply_along_axis(entropy, 1, pX_Y)

array([1.75, 1.75, 2.  , 0.  ])

### $H(X|Y)$ (Bits)

In [16]:
hX_Y = np.sum(entropy(pX_Y.T, base=2) * pY)
print(f'H(X|y): {hX_Y} bits', 11/8)

H(X|y): 1.375 bits 1.375


In [17]:
hX_Y = np.mean(np.log2(np.e) * np.apply_along_axis(entropy, 1, pX_Y))
print(f'H(X|y): {hX_Y} bits', 11/8)

H(X|y): 1.375 bits 1.375


### $H(Y|X)$ (Bits)


In [18]:
hY_X = np.sum(entropy(pY_X, base=2) * pX)
print(f'H(X|y): {hY_X:.3f} bits', 13/8)

H(X|y): 1.625 bits 1.625


In [19]:
hY_X =  np.sum(pX * np.log2(np.e) * np.apply_along_axis(entropy, 0, pY_X)) # ESTE NO PUEDE TOMAR MEAN !!!!!
print(f'H(X|y): {hY_X:.3f} bits', 13/8)

H(X|y): 1.625 bits 1.625


In [20]:
hY_X = -np.sum(pX * pY_X*np.log2(pY_X + eps))
print(f'H(X|y): {hY_X:.3f} bits', 13/8)

H(X|y): 1.625 bits 1.625


### Joint entropy $H(X, Y)$

In [21]:
hXY = -np.sum(pXY * np.log2(pXY + eps))
print(f'H(X,Y): {hXY:.3f} bits', 27/8)

H(X,Y): 3.375 bits 3.375


### Chain rule $H(X,Y) = H(X) + H(Y|X)$

In [22]:
print(f'{hXY:.4f} = {hX + hY_X:.4f}')

3.3750 = 3.3750


### Chain rule $H(X,Y) = H(Y) + H(X|Y)$

In [119]:
print(f'{hXY:.3f} = {hY + hX_Y}')

3.375 = 3.375


### Entropies

In [23]:
print(f'H(X):{hX} \nH(X|Y):{hX_Y} \n\nH(Y):{hY} \nH(Y|X):{hY_X:.3f} \n\nH(X,Y):{hXY:.3f}')

H(X):1.75 
H(X|Y):1.375 

H(Y):2.0 
H(Y|X):1.625 

H(X,Y):3.375


### $I(X:Y)= H(X) -H(X|Y) = H(Y) + H(Y|H) = I(Y:X)$

In [133]:
print(f'{hX - hX_Y} = {hY - hY_X:.3f}')

0.375 = 0.375
