<a href="https://colab.research.google.com/github/Zhengro/DL-Identification/blob/NNE_Sara/NNE_multiple_simulations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Libraries

In [1]:
# Import libraries
import numpy as np
import numpy.matlib
import math
import random
from numpy.random import random as rand
from keras.models import Sequential
from keras.layers.core import Dense, Dropout
from keras import backend as K
from sklearn.metrics.cluster import normalized_mutual_info_score
import matplotlib.pyplot as plt
from keras.callbacks import EarlyStopping
%matplotlib inline

Using TensorFlow backend.


In [2]:
# connect to google drive
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


# Parameters

In [3]:
# data sequence length(s)
N = np.array([64])

# num of users
M = 1000     

# compression rate
R = 0.5 

# crossover prob.
bsc_err = 0.1      

# number of learning epochs
epochs = np.arange(3000, 4000, 1000)

# each list entry defines the number of nodes in a hidden layer
design = np.array([[1024, 1024], [512, 512]])    

# size of batches for calculation the gradient
batch_size = 100       

#optimizer 
optimizer = 'nadam'     

# loss function 
loss ='binary_crossentropy'

# test on how many observations per user
obs_per_user = 100

# droptout value(s)
dropout = np.array([0])

# name of the file to save the results to (optional)
filename = ''

print('epochs:', epochs)
print('dropouts:', dropout)
print ('N:', N)
print('hidden layers:', design)

epochs: [3000]
dropouts: [0]
N: [64]
hidden layers: [[1024 1024]
 [ 512  512]]


# Functions

In [0]:
# necessary functions 

# Input to this function can be an array of probabilities
def get_entropy(x):
    # Calculates entropy for a single value
    def get_single_entropy(p):
        if p == 1 or p == 0:
            return 0
        h = -p * np.log2(p) - (1-p) * np.log2(1-p)
        return h
    dummy_func = np.vectorize(get_single_entropy)
    return dummy_func(x)
  
  
def reverse_entropy(y):
    xu = 0.5
    xd = 0
    tst = y
    val = get_entropy(tst)

    while abs(val - y) > 0.0000001:
        if val > y:
            xx = xd
            xu = tst
        else:
            xx = xu
            xd = tst
        tst = tst + (y - val) * (xx - tst) / (get_entropy(xx) - val + np.finfo(float).eps)
        val = get_entropy(tst)
    if tst < 0.5:
        x = tst
    else:
        x = 1 - tst

    return x

  
def get_bec_bhattacharyya(N, e):
    N = int(N)
    # e is erasure rate
    # N is number of channels (length of the sequence)

    n = math.log2(N)
    Z = np.zeros(N)
    Z[0] = e
    temp1 = np.zeros(N)
    for i in range(int(n)):
        for ii in range(int(2 ** i)):
            temp1[2 * ii] = 2 * Z[ii] - Z[ii] * Z[ii]
            temp1[2 * ii + 1] = Z[ii] * Z[ii]
        Z = temp1.copy()

    return Z  
  
  
# compression rate must be 0 < R < 1
def get_free_indexes(N, compresseion_rate):
    N = int(N)
    distortion = reverse_entropy(1 - compresseion_rate)
    # print('this is the distortion:', distortion)
    bits_per_user = round(N * compresseion_rate)
    # print('bits per user:', bits_per_user)
    bhat_param = get_bec_bhattacharyya(N, get_entropy(distortion))
    # print('this is the bhat param:', bhat_param)

    sorted_bhat_param_index = bhat_param.argsort()
    # frozen_index = sorted_bhat_param_index[N - bits_per_user:]
    free_index = sorted_bhat_param_index[:N - bits_per_user]
    # print(frozen_index)
    # print(free_index)
    return free_index


def compose_model(layers):
    model = Sequential()
    for layer in layers:
        model.add(layer)
    return model
  
  
def errors(y_true, y_pred):
    return K.sum(K.cast(K.not_equal(y_true, K.round(y_pred)), 'uint16'))
  

# input bits is a M*N array
def bsc(input_bits, p_t):
  M = input_bits.shape[0]
  N = input_bits.shape[1]
  output_bits = input_bits.copy()
  flip_locs = (rand((M, N)) <= p_t)
  output_bits[flip_locs] = 1 ^ output_bits[flip_locs]
  return output_bits


def get_index_hamming(y, x):
    # y is the db (array of vectors)
    # x is the observation (one vector)  
    x_large = numpy.matlib.repmat(x, y.shape[0], 1)
    hamming_distance = np.sum(np.logical_xor(x_large, y), axis=1)
    index_of_min = np.argmin(hamming_distance)
    return index_of_min
  
  
def get_err_hamming(predictions, db_free, true_labels):
  length = predictions.shape[0]
  pred_index = np.zeros(length)
  
  for i in range(length):
    pred_index [i] = get_index_hamming(db_free, predictions[i, :])
    
  num_err = np.count_nonzero( pred_index - true_labels )
  err = num_err/length
  return num_err, err

# For loop

In [5]:
# run simulations for multiple cases of N, epochs, hidden layers, 
# and dropout specified in the paramets section

len_N = N.shape[0]
len_epochs = epochs.shape[0]
len_hidden_layers = design.shape [0]
len_dropout = dropout.shape[0]



for i_N in range(len_N):
  print('/////////////////////////////')
  # path to files
  base_path = '/content/drive/My Drive/Colab Notebooks/{}_data_len_{}/bsc_err_{}'.format(M, N[i_N], bsc_err)
  path_to_orig_data = base_path + '/original_data_{}.txt'.format(R)
  path_to_db = base_path + "/db_{}.txt".format(R)
  
  # import data
  orig_data = np.loadtxt(path_to_orig_data, delimiter=',', dtype=bool)
  db = np.loadtxt(path_to_db, delimiter=',', dtype=bool)
  
  # results file path
  results_file_path = base_path + '/' + filename ; 
  
  # calculate free/frozen indexes
  free_indexes = get_free_indexes(N[i_N], R)
  num_free_indexes = len(free_indexes)
  db_free = db[:, free_indexes]
  
  # generate observations
  observations = bsc(np.tile(orig_data, (obs_per_user, 1)), bsc_err)
  true_labels = np.tile(np.arange(M), (1, obs_per_user))
  
  # scale the database
  scaled_db_free = np.tile(db_free, (obs_per_user, 1))

  for i_hidden_layers in range(len_hidden_layers):
    for i_dropout in range (len_dropout):
      
      decoder_layers = [Dropout(dropout[i_dropout], input_shape=(N[i_N],))]
      
      decoder_layers.append(Dense(design[i_hidden_layers, 0], activation='relu'))
      decoder_layers.append(Dropout(dropout[i_dropout]))
      
      for idx in range(1, design.shape[1]):
        decoder_layers.append(Dense(design[i_hidden_layers, idx], activation='relu'))
        decoder_layers.append(Dropout(dropout[i_dropout]))
      
      decoder_layers.append(Dense(num_free_indexes, activation='sigmoid'))
                              
         
      model = compose_model(decoder_layers)
      model.compile(optimizer=optimizer, loss=loss, metrics=[errors])
        
      for i_epochs in range (len_epochs):
        # train
        history = model.fit(orig_data, db_free, batch_size=batch_size, 
                                  epochs=epochs[i_epochs], verbose=0, shuffle=True)
        
        # predict
        predicted_compressed = model.predict(observations, verbose=1, steps=None)
        predictions = np.rint(predicted_compressed)
        not_same = np.mean(np.sum(abs(predictions - scaled_db_free), axis=1)) / N[i_N]
        
        # calculate error
        num_err_hamming, err_hamming = get_err_hamming(predictions, db_free, true_labels)
        
        # print results
        print('Case: N={}, hidden_layers={}, dropout={}, epochs={}'.format(N[i_N]
                             , design[i_hidden_layers,:], dropout[i_dropout], epochs[i_epochs]))
        print('error is:', err_hamming)
        print('\n')
        
        # optionally, save results
#         str = 'num_err_hamming= {}, pe_hamming= {}, N= {}, M= {}, bsc_err= {}, R= {},\
#             hidden_layers= {}, cost_func= {}, not_same={}, epochs={}, dropout={} \n'.format(num_err_hamming, 
#             err_hamming, N[i_N], M, bsc_err, R, design[i_hidden_layers,:], loss, not_same, epochs[i_epochs], dropout[i_dropout])
        
#         with open(results_file_path, "a") as myfile:
#           myfile.write(str)

/////////////////////////////


KeyboardInterrupt: ignored