<a href="https://colab.research.google.com/github/Zhengro/DL-Identification/blob/sara-mlp_decoder_2/ANN_decoding_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Libraries

In [4]:
# Import libraries
import numpy as np
import numpy.matlib
import math
import random
from numpy.random import random as rand
from keras.models import Sequential
from keras.layers.core import Dense
from keras import backend as K
from sklearn.metrics.cluster import normalized_mutual_info_score
import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [5]:
# connect to google drive
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


# Parameters

In [0]:
N = 256                     # data sequence length
M = 1500                    # num of users
R = 0.5                     # compression rate
bsc_err = 0.1               # cross over prob. 
epochs = 2**14              # number of learning epochs
design = [16*N, 16*N]      # each list entry defines the number of nodes in a layer
batch_size = 100            # size of batches for calculation the gradient
optimizer = 'nadam'           
loss ='binary_crossentropy'
obs_per_user = 10

# Import data

In [19]:
base_path = '/content/drive/My Drive/Colab Notebooks/{}_data_len_{}/bsc_err_{}'.format(M, N, bsc_err)
path_to_orig_data = base_path + '/original_data_{}.txt'.format(R)
# path_to_observations = base_path + "/observations_{}.txt".format(R)
path_to_db = base_path + "/db_{}.txt".format(R)
path_to_decompressed_db = base_path + "/decompressed_db_{}.txt".format(R)

orig_data = np.loadtxt(path_to_orig_data, delimiter=',', dtype=bool)
# observations = np.loadtxt(path_to_observations, delimiter=',', dtype=bool)
db = np.loadtxt(path_to_db, delimiter=',', dtype=bool)
decompressed_db = np.loadtxt(path_to_decompressed_db, delimiter=',', dtype=bool)
print('size is:', orig_data.shape)

size is: (1500, 256)


# Calculate free/frozen indexes

In [0]:
# Input to this function can be an array of probabilities
def get_entropy(x):
    # Calculates entropy for a single value
    def get_single_entropy(p):
        if p == 1 or p == 0:
            return 0
        h = -p * np.log2(p) - (1-p) * np.log2(1-p)
        return h
    dummy_func = np.vectorize(get_single_entropy)
    return dummy_func(x)
  
  
def reverse_entropy(y):
    xu = 0.5
    xd = 0
    tst = y
    val = get_entropy(tst)

    while abs(val - y) > 0.0000001:
        if val > y:
            xx = xd
            xu = tst
        else:
            xx = xu
            xd = tst
        tst = tst + (y - val) * (xx - tst) / (get_entropy(xx) - val + np.finfo(float).eps)
        val = get_entropy(tst)
    if tst < 0.5:
        x = tst
    else:
        x = 1 - tst

    return x

  
def get_bec_bhattacharyya(N, e):
    # e is erasure rate
    # N is number of channels (length of the sequence)

    n = math.log2(N)
    Z = np.zeros(N)
    Z[0] = e
    temp1 = np.zeros(N)
    for i in range(int(n)):
        for ii in range(int(2 ** i)):
            temp1[2 * ii] = 2 * Z[ii] - Z[ii] * Z[ii]
            temp1[2 * ii + 1] = Z[ii] * Z[ii]
        Z = temp1.copy()

    return Z  
  
  
# compression rate must be 0 < R < 1
def get_free_indexes(N, compresseion_rate):

    distortion = reverse_entropy(1 - compresseion_rate)
    # print('this is the distortion:', distortion)
    bits_per_user = round(N * compresseion_rate)
    # print('bits per user:', bits_per_user)
    bhat_param = get_bec_bhattacharyya(N, get_entropy(distortion))
    # print('this is the bhat param:', bhat_param)

    sorted_bhat_param_index = bhat_param.argsort()
    # frozen_index = sorted_bhat_param_index[N - bits_per_user:]
    free_index = sorted_bhat_param_index[:N - bits_per_user]
    # print(frozen_index)
    # print(free_index)
    return free_index


# Define NN model

In [0]:
def compose_model(layers):
    model = Sequential()
    for layer in layers:
        model.add(layer)
    return model
  
  
def errors(y_true, y_pred):
    return K.sum(K.cast(K.not_equal(y_true, K.round(y_pred)), 'uint16'))

In [21]:
free_indexes = get_free_indexes(N, R)
num_free_indexes = len(free_indexes)
db_free = db[:, free_indexes]
print(db_free.shape)

# Define decoder 
decoder_layers = [Dense(design[0], activation='relu', input_shape=(N,))]

for i in range(1,len(design)):
    decoder_layers.append(Dense(design[i], activation='relu'))
    
decoder_layers.append(Dense(num_free_indexes, activation='sigmoid'))
model = compose_model(decoder_layers)
model.compile(optimizer=optimizer, loss=loss, metrics=[errors])


(1500, 128)


# Train

In [0]:
model.summary()
history = model.fit(orig_data, db_free, batch_size=batch_size, epochs=epochs, verbose=0, shuffle=True)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 4096)              1052672   
_________________________________________________________________
dense_5 (Dense)              (None, 4096)              16781312  
_________________________________________________________________
dense_6 (Dense)              (None, 128)               524416    
Total params: 18,358,400
Trainable params: 18,358,400
Non-trainable params: 0
_________________________________________________________________


# Generate observations

In [0]:
# input bits is a M*N array
def bsc(input_bits, p_t):
  M = input_bits.shape[0]
  N = input_bits.shape[1]
  output_bits = input_bits.copy()
  flip_locs = (rand((M, N)) <= p_t)
  output_bits[flip_locs] = 1 ^ output_bits[flip_locs]
  return output_bits



In [0]:
observations = bsc(np.tile(orig_data, (obs_per_user, 1)), bsc_err)
true_labels = np.tile(np.arange(M), (1, obs_per_user))
# print(true_labels)
# print(observations.shape)

# Predict

In [14]:
predicted_compressed = model.predict(observations, verbose=1, steps=None)
predictions = np.rint(predicted_compressed)
# print(np.sum(abs(predictions - db), axis=1))
# not_same = np.mean(np.sum(abs(predictions - db), axis=1)) / N
# print(np.mean(np.sum(abs(predictions - db), axis=1)))
# print(not_same)



# Exhaustive search to get index

In [0]:
def get_index_mmi(y, x):
  # y is the db (array of vectors)
  # x is the observation (one vector)  
  length = y.shape[0]
  mi = np.zeros(length)
  for i in range(length):
    mi[i] = normalized_mutual_info_score (y[i, :], x)
  
  return np.argmin(mi)


def get_err_mmi(predictions, db_free, true_labels):
  length = predictions.shape[0]
  pred_index = np.zeros(length)
  for i in range(length):
    pred_index [i] = get_index_mmi(db_free, predictions[i, :])
    print(i)
    
  num_err = np.count_nonzero( pred_index - true_labels )
  err = num_err/length
  return num_err, err



def get_index_hamming(y, x):
    # y is the db (array of vectors)
    # x is the observation (one vector)  
    x_large = numpy.matlib.repmat(x, y.shape[0], 1)
    hamming_distance = np.sum(np.logical_xor(x_large, y), axis=1)
    index_of_min = np.argmin(hamming_distance)
    return index_of_min
  
  
def get_err_hamming(predictions, db_free, true_labels):
  length = predictions.shape[0]
  pred_index = np.zeros(length)
  
  for i in range(length):
    pred_index [i] = get_index_hamming(db_free, predictions[i, :])
    
  num_err = np.count_nonzero( pred_index - true_labels )
  err = num_err/length
  return num_err, err

# Calculate error

In [0]:
num_err_hamming, err_hamming = get_err_hamming(predictions, db_free, true_labels)
# num_err_mmi, err_mmi = get_err_mmi(predictions, db_free, true_labels)

In [17]:
print('number of error for hamming distance:', num_err_hamming)
print ('error for hamming distance:', err_hamming)

# print('number of error for mmi:', num_err_mmi)
# print ('error for mmi:', err_mmi)

number of error for hamming distance: 14373
error for hamming distance: 0.9582


# Save results

In [0]:
str = 'num_err_mmi = {}, pe_mmi = {}, num_err_hamming= {}, pe_hamming= {}, N= {}, M= {}, bsc_err= {}, R= {},\
  hidden_layers= {}, cost_func= {}, not_same={} \n'.format(num_err_mmi, err_mmi, num_err_hamming, 
                                            err_hamming, N, M, bsc_err, R, design, loss, not_same)
file_path = base_path + '/results_2.txt'
with open(file_path, "a") as myfile:
    myfile.write(str)