In [1]:
import cvxpy as cp
import numpy as np
import tqdm
import scipy
import math
from scipy.special import xlogy
import time

setting

In [2]:
X = np.array([-1, 0 ,1])
N = len(X)
M = 50
S = np.linspace(-10, 10, M+1)
K = 3

X corrupted by noise

In [3]:
sigma = np.sqrt(5)
U = X + np.random.randn(N)*sigma

conditional noise density

In [4]:
Phi = [scipy.stats.norm(loc=X[i], scale=sigma) for i in range(N)]

## step 1

Quantizer

In [5]:
H = [S[0], -0.5, 0.5, S[-1]]

A = np.zeros((K, N))

for j in range(N):
    for i in range(K):
        A[i, j] = Phi[j].cdf(H[i+1]) - Phi[j].cdf(H[i])

In [6]:
def channel_capacity(n, m, P, sum_x=1):
    '''
    Boyd and Vandenberghe, Convex Optimization, exercise 4.57 page 207
    Capacity of a communication channel.

    We consider a communication channel, with input X(t)∈{1,..,n} and
    output Y(t)∈{1,...,m}, for t=1,2,... .The relation between the
    input and output is given statistically:
    p_(i,j) = ℙ(Y(t)=i|X(t)=j), i=1,..,m  j=1,...,n

    The matrix P ∈ ℝ^(m*n) is called the channel transition matrix, and
    the channel is called a discrete memoryless channel. Assuming X has a
    probability distribution denoted x ∈ ℝ^n, i.e.,
    x_j = ℙ(X=j), j=1,...,n

    The mutual information between X and Y is given by
    ∑(∑(x_j p_(i,j)log_2(p_(i,j)/∑(x_k p_(i,k)))))
    Then channel capacity C is given by
    C = sup I(X;Y).
    With a variable change of y = Px this becomes
    I(X;Y)=  c^T x - ∑(y_i log_2 y_i)
    where c_j = ∑(p_(i,j)log_2(p_(i,j)))
    '''

    # n is the number of different input values
    # m is the number of different output values
    if n*m == 0:
        print('The range of both input and output values must be greater than zero')
        return 'failed', np.nan, np.nan

    # x is probability distribution of the input signal X(t)
    x = cp.Variable(shape=n)

    # y is the probability distribution of the output signal Y(t)
    # P is the channel transition matrix
    y = P@x

    # I is the mutual information between x and y
    c = np.sum(np.array((xlogy(P, P) / math.log(2))), axis=0)
    I = c@x + cp.sum(cp.entr(y) / math.log(2))

    # Channel capacity maximised by maximising the mutual information
    obj = cp.Maximize(I)
    constraints = [cp.sum(x) == sum_x,x >= 0]

    # Form and solve problem
    prob = cp.Problem(obj,constraints)
    prob.solve()
    if prob.status=='optimal':
        return prob.status, prob.value, x.value
    else:
        return prob.status, np.nan, np.nan

In [7]:
status, obj_value, px_value = channel_capacity(N, K, A)

In [8]:
obj_value

0.10054511943472244

In [9]:
px_value

array([4.99999993e-01, 1.47191919e-08, 4.99999992e-01])

## step 2

In [10]:
def calculate_D(px, i, j, k, S, N, Phi):
    if k == 0:
        return 0
    if j <= i:
        return 0
    
    H_thres = np.linspace(S[i], S[j], k+1)
    
    Ax = np.zeros((k, N))
    for j in range(N):
        for i in range(k):
            Ax[i, j] = Phi[j].cdf(H_thres[i+1]) - Phi[j].cdf(H_thres[i])

    Ay = np.zeros((N, k))
    for j in range(N):
        for i in range(k):
            Ay[j, i] = px_value[j]*Ax[i,j]/np.sum(px*Ax[i,:])

    py = np.matmul(Ax, px)
    c = np.sum(np.array((xlogy(Ay, Ay) / math.log(2))), axis=0)
    H_xy = -np.sum(py*c)
    
    return H_xy

In [85]:
def run_dp(px, N, M, K, S, Phi):
    D = np.zeros((N, M, K))
    H = np.zeros((K, M))

    for k in range(1, K):
        for i in range(N):
            for j in range(1, M):
                tmp = []
                for q in range(j):
                    tmp.append(calculate_D(px, i, q, k-1, S, N, Phi) +\
                                           calculate_D(px, q+1, j, 1, S, N, Phi))
                D[i, j, k] = np.min(tmp)
                H[k, j] = np.argmin(tmp)
    H_K = M-1
    H_opt = []
    for i in np.arange(K-1)[::-1]:
        H_ = H[i+1, int(H_K)]
        H_opt.append(H_)
        H_K = H_

    H_opt = H_opt[::-1]
    
    return D, H, H_opt

## combine

### init

In [86]:
X = np.array([-3, 0, 3])
N = len(X)
M = 200
S = np.linspace(-8, 8, M+1)
K = 3

In [87]:
sigma = 0.1
U = X + np.random.randn(N)*sigma

In [88]:
Phi = [scipy.stats.norm(loc=X[i], scale=sigma) for i in range(N)]

In [89]:
Hx = [S[0], -1.5, 1.5, S[-1]]

A = np.zeros((K, N))

for j in range(N):
    for i in range(K):
        A[i, j] = Phi[j].cdf(Hx[i+1]) - Phi[j].cdf(Hx[i])

In [90]:
channel_capacity(N, K, A)

('optimal', 1.584962500721156, array([0.33333333, 0.33333333, 0.33333333]))

### test

In [80]:
px_value = np.array([1.0/3, 1.0/3, 1.0/3])

In [81]:
D, H, H_opt = run_dp(N, M, K, S, Phi)

In [83]:
H

array([[  0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0., 

In [84]:
H_opt

[99.0, 100.0]

## alternating

In [60]:
I_list = []
px_list = []
h_list = []

for iteration in tqdm.tqdm(range(100)):
    start = time.time()
    # step 1
    status, obj_value, px_value = channel_capacity(N, K, A)

    # step 2
    D = np.zeros((N, M, K))
    H = np.zeros((K, M))

    for k in range(1, K):
        for i in range(N):
            for j in range(M):
                tmp = []
                for q in range(j):
                    tmp.append(calculate_D(px_value, i, q, k-1, S, N, Phi) +\
                                calculate_D(px_value, q+1, j, 1, S, N, Phi))
                if len(tmp) > 0:
                    D[i, j, k] = np.min(tmp)
                    H[k, j] = np.argmin(tmp)
                else:
                    D[i, j, k] = 0
                    H[k, j] = 0

    H_K = M-1
    H_opt = []
    for i in np.arange(K-1)[::-1]:
        H_ = H[i+1, int(H_K)]
        H_opt.append(H_)
        H_K = H_

    H_opt = H_opt[::-1]

    # recompute A
    thres = [S[int(l)] for l in H_opt]
    Hx = [S[0]] + thres + [S[-1]]

    A = np.zeros((K, N))

    for j in range(N):
        for i in range(K):
            A[i, j] = Phi[j].cdf(Hx[i+1]) - Phi[j].cdf(Hx[i])

    # calculate I(X;Y)
    py = np.matmul(A, px_value)

    # I is the mutual information between x and y
    c = np.sum(np.array((xlogy(A, A) / math.log(2))), axis=0)
    I = -np.sum(py*c) - np.sum(np.array((xlogy(py, py) / math.log(2))), axis=0)
    stop = time.time()
    print("iter {} - I(X;Y)={:.6f} - took {:.2f}s".format(iteration, I, stop-start))
    print("threshold = ", thres)
    print("H_opt = ", H_opt)
    print("px = ", px_value)
    
    I_list.append(I)
    h_list.append(H_opt)
    px_list.append(px_value)

  1%|▍                                       | 1/100 [05:21<8:50:17, 321.39s/it]

iter 0 - I(X;Y)=2.981629 - took 321.39s
threshold =  [-0.6999999999999993, -0.5999999999999996, 4.0]
H_opt =  [73.0, 74.0, 120.0]
px =  [4.79280467e-01 6.40236081e-10 4.92278394e-10 5.20719532e-01]


  2%|▊                                       | 2/100 [10:41<8:43:32, 320.54s/it]

iter 1 - I(X;Y)=2.988892 - took 319.94s
threshold =  [-0.5999999999999996, -0.5, 4.100000000000001]
H_opt =  [74.0, 75.0, 121.0]
px =  [4.97742142e-01 2.09813968e-08 2.10434996e-08 5.02257816e-01]


  3%|█▏                                      | 3/100 [15:48<8:28:27, 314.51s/it]

iter 2 - I(X;Y)=2.988954 - took 307.34s
threshold =  [-0.5999999999999996, -0.5, 4.100000000000001]
H_opt =  [74.0, 75.0, 121.0]
px =  [4.98369779e-01 1.76745245e-08 1.77115076e-08 5.01630186e-01]


  3%|█▏                                      | 3/100 [18:10<9:47:43, 363.54s/it]


KeyboardInterrupt: 