<a href="https://colab.research.google.com/github/Zhengro/DL-Identification/blob/jaume/Map_noisyObs_to_indexes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
__author__ = ''

### Import all necessary libraries and establish connection with Google Drive

In [0]:
# Load main libraries for the project
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
import numpy as np

In [4]:
# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
def bsc(bit_sequence, bsc_e):
    """
    Binary Symmetric Channel.
    Parameters
    ----------
    input_bits : 1D ndarray containing {0, 1}
        Input arrary of bits to the channel.
    p_t : float in [0, 1]
        Transition/Error probability of the channel.
    Returns
    -------
    output_bits : 1D ndarray containing {0, 1}
        Output bits from the channel.
    """
    
    random_prob = np.random.uniform(0,1,bit_sequence.shape)
    flipped_bits = np.where(random_prob < bsc_e, 1, 0)
    output_bits = np.subtract(bit_sequence,flipped_bits)
    return output_bits

In [6]:
# Test bsc function
bsc_e = 0.3
input_bits = np.zeros((100,1))
output_bits = bsc(input_bits,bsc_e)
print(np.count_nonzero(output_bits)/output_bits.shape[0])


0.3


In [0]:
# All the data needed for this notebook is stored in MAIN_PATH
MAIN_PATH = "/content/drive/My Drive/Colab Notebooks/"

### Read, normalize, and sort all user's data

In [8]:
# Main parameters of the fingerprints
DATA_FILENAME = MAIN_PATH + 'data/userData_500_256.npy'

# Read data from all the fingerprints
userData = np.load(DATA_FILENAME)

# Create main parameters
NUM_USERS, NUM_FEATURES = userData.shape

print(NUM_USERS)
print(NUM_FEATURES)

500
256


In [15]:
# Construct train set and test set
NUM_SAMPLES = 400 # Number of noisy samples for each user
X = np.zeros((NUM_USERS*NUM_SAMPLES,NUM_FEATURES))
Y = np.zeros((NUM_USERS*NUM_SAMPLES, 1))

for k in range(NUM_USERS):
    for n in range(NUM_SAMPLES):
        X[n + k*NUM_SAMPLES, :] = bsc(np.int_(userData[k, :]), bsc_e)
        Y[n + k*NUM_SAMPLES] = k

train_data, test_data, train_labels, test_labels = train_test_split(X, Y, test_size=0.2, shuffle=True)
print(train_data.shape)
print(test_data.shape)
print(train_labels.shape)
print(test_labels.shape)

(160000, 256)
(40000, 256)
(160000, 1)
(40000, 1)


In [16]:
# Build MLP model and train the NN
clf = MLPClassifier(hidden_layer_sizes=(128, 64, 32), activation='relu',
                    solver='adam', verbose=True, alpha=0.001) 
clf.fit(train_data,train_labels)

  y = column_or_1d(y, warn=True)


Iteration 1, loss = 1.16368329
Iteration 2, loss = 0.05251809
Iteration 3, loss = 0.02160316
Iteration 4, loss = 0.01253752
Iteration 5, loss = 0.01128139
Iteration 6, loss = 0.01061111
Iteration 7, loss = 0.01079058
Iteration 8, loss = 0.01051347
Iteration 9, loss = 0.00869536
Iteration 10, loss = 0.01010410
Iteration 11, loss = 0.00889629
Iteration 12, loss = 0.00630328
Iteration 13, loss = 0.01000601
Iteration 14, loss = 0.00787832
Iteration 15, loss = 0.00561154
Iteration 16, loss = 0.00936315
Iteration 17, loss = 0.00812498
Iteration 18, loss = 0.00704060
Training loss did not improve more than tol=0.000100 for two consecutive epochs. Stopping.


MLPClassifier(activation='relu', alpha=0.001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(128, 64, 32), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=True, warm_start=False)

In [17]:
# Test MLP algorithm
estimated_labels = clf.predict(test_data).reshape(test_labels.shape)
error_rate = np.count_nonzero( estimated_labels - test_labels ) / len(estimated_labels)
print("Error rate = %.4f" % error_rate)

Error rate = 0.0037
