<a href="https://colab.research.google.com/github/Zhengro/DL-Identification/blob/jaume/Map_noisyObs_to_indexes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
__author__ = ''

### Import all necessary libraries and establish connection with Google Drive

In [0]:
# Load main libraries for the project
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
import numpy as np

In [0]:
# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
# All the data needed for this notebook is stored in MAIN_PATH
MAIN_PATH = "/content/drive/My Drive/Colab Notebooks/"

### Read all user's data

In [0]:
# Main parameters of the fingerprints
DATA_FILENAME = MAIN_PATH + 'data/userData_500_256.npy'

# Read data from all the fingerprints
userData = np.load(DATA_FILENAME)

# Create main parameters
NUM_USERS, NUM_FEATURES = userData.shape

print(NUM_USERS)
print(NUM_FEATURES)

500
256


### Generate user's data

In [0]:
# Main parameters
NUM_USERS = 500
NUM_FEATURES = 64
bsc_e = 0.2

# Generate user data
userData = np.random.choice([0,1],(NUM_USERS,NUM_FEATURES))

### Functions for generating the data

In [0]:
def bsc(bit_sequence, bsc_e):
    """
    Binary Symmetric Channel.
    Parameters
    ----------
    input_bits : 1D ndarray containing {0, 1}
        Input arrary of bits to the channel.
    p_t : float in [0, 1]
        Transition/Error probability of the channel.
    Returns
    -------
    output_bits : 1D ndarray containing {0, 1}
        Output bits from the channel.
    """
    flipped_bits = np.random.choice([0,1],bit_sequence.shape,p=[1.0-bsc_e,bsc_e])
    output_bits = np.absolute(np.subtract(bit_sequence,flipped_bits))
    return output_bits

In [0]:
# Test bsc function
input_bits = np.zeros((100,1))
output_bits = bsc(input_bits,bsc_e)
print(np.count_nonzero(output_bits)/output_bits.shape[0])


0.18


### Classification algorithm

In [0]:
# Construct train set and test set
NUM_SAMPLES = 400 # Number of noisy samples for each user
X = np.zeros((NUM_USERS*NUM_SAMPLES,NUM_FEATURES))
Y = np.zeros((NUM_USERS*NUM_SAMPLES, 1))

for k in range(NUM_USERS):
    for n in range(NUM_SAMPLES):
        X[n + k*NUM_SAMPLES, :] = bsc(np.int_(userData[k, :]), bsc_e)
        Y[n + k*NUM_SAMPLES, :] = k

train_data, test_data, train_labels, test_labels = train_test_split(X,Y,test_size=0.2,shuffle=True)
print(train_data.shape)
print(test_data.shape)
print(train_labels.shape)
print(test_labels.shape)

(160000, 64)
(40000, 64)
(160000, 1)
(40000, 1)


In [0]:
# Build MLP model and train the NN
clf = MLPClassifier(hidden_layer_sizes=(128,64,32), activation='relu',
                    solver='adam', verbose=True, alpha=0.001) 
clf.fit(train_data,train_labels)

  y = column_or_1d(y, warn=True)


Iteration 1, loss = 5.42062742
Iteration 2, loss = 4.67191183
Iteration 3, loss = 4.52609949
Iteration 4, loss = 4.45242961
Iteration 5, loss = 4.39863485
Iteration 6, loss = 4.35651323
Iteration 7, loss = 4.32090425
Iteration 8, loss = 4.28950537
Iteration 9, loss = 4.25885814
Iteration 10, loss = 4.23226353
Iteration 11, loss = 4.20767621
Iteration 12, loss = 4.18304565
Iteration 13, loss = 4.16313497
Iteration 14, loss = 4.14022265
Iteration 15, loss = 4.12118403


In [0]:
# Print number of output layers
print(clf.n_outputs_)

500


In [0]:
# Test MLP algorithm
estimated_labels = clf.predict(test_data).reshape(test_labels.shape)
error_rate = np.count_nonzero( estimated_labels - test_labels ) / len(estimated_labels)
print("Error rate = %.5f" % error_rate)

Error rate = 0.09695


### Results obtained so far


Copy and paste the message that is automatiacally created below together with the setup of the MLP algorithm



Error rate = 0.003675, NUM_USERS = 500, NUM_FEATURES = 256, NUM_SAMPLES = 400, bsc_e = 0.3, hidden_layer_sizes=(128, 64, 32)

Error rate = 0.014750, NUM_USERS = 500, NUM_FEATURES = 256, NUM_SAMPLES = 400, bsc_e = 0.3, hidden_layer_sizes=(128, 64, 32, 16)

Error rate = 0.320275, NUM_USERS = 500, NUM_FEATURES = 256, NUM_SAMPLES = 400, bsc_e = 0.3, hidden_layer_sizes=(128, 64, 32)

In [0]:
msg = "Error rate = {0}, NUM_USERS = {1}, NUM_FEATURES = {2}, NUM_SAMPLES = {3}, bsc_e = {4}".format(error_rate,
                                                                                                     NUM_USERS,
                                                                                                     NUM_FEATURES,
                                                                                                     NUM_SAMPLES,
                                                                                                     bsc_e)
print(msg)

Error rate = 0.09695, NUM_USERS = 500, NUM_FEATURES = 128, NUM_SAMPLES = 400, bsc_e = 0.2
