In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import scale
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

# get dataset from UCI repository
import requests
import io
from zipfile import ZipFile
response = requests.get('https://archive.ics.uci.edu/ml/machine-learning-databases/00310/UJIndoorLoc.zip')
compressedFile = io.BytesIO(response.content)
zipFile = ZipFile(compressedFile)

Initial dataset

In [None]:
# TRAINING AND VALIDATION SET

dataset = pd.read_csv(zipFile.open('UJIndoorLoc/trainingData.csv'), header=0)
dataset = dataset.sample(frac=1.00,random_state=0) # CHANGE TO VARY THE DATASET PERCENTAGE #@param

features = np.asarray(dataset.iloc[:,0:520])
# replace NaN with -110 dBm
features[features == 100] = -110
# feature normalization
features = (features - features.mean()) / features.var()

labels = np.asarray(dataset["BUILDINGID"].map(str) + dataset["FLOOR"].map(str))
# convert categorical variable into dummy/indicator variables
labels = np.asarray(pd.get_dummies(labels))

# TEST SET

test_dataset = pd.read_csv(zipFile.open('UJIndoorLoc/validationData.csv'), header=0)

test_features = np.asarray(test_dataset.iloc[:,0:520])
test_features[test_features == 100] = -110
test_features = (test_features - test_features.mean()) / test_features.var()

test_labels = np.asarray(test_dataset["BUILDINGID"].map(str) + test_dataset["FLOOR"].map(str))
test_labels = np.asarray(pd.get_dummies(test_labels))

Augmented dataset with INTERPOLATION

In [None]:
labels_unique = np.unique(labels,axis=0)
features_labels = pd.concat([pd.DataFrame(features),pd.DataFrame(labels)],axis=1)
generated_samples_dict = {}

for i in range(labels_unique.shape[0]):

  class_i_label = labels_unique[i]
  class_i_features = features[(features_labels.iloc[:,520:].values == class_i_label).all(axis=1)]
  class_i_labels = labels[(features_labels.iloc[:,520:].values == class_i_label).all(axis=1)]

  x = np.arange(0, class_i_features.shape[0], 1)
  y = np.arange(0, 520, 1)
  z = class_i_features

  from scipy.interpolate import interp2d
  z = np.array(z).T
  f = interp2d(x, y, z) 

  num_examples_to_generate = int(class_i_features.shape[0]*15.0) # CHANGE TO VARY THE INTERPOL PERCENTAGE #@param

  generated_indexes = np.random.uniform(0,class_i_features.shape[0]-1,num_examples_to_generate)
  generated_samples_class_i = []
  for generated_index in generated_indexes:
    generated_samples_class_i.append(f(x=generated_index,y=np.arange(0, 520, 1)).reshape((1,-1)).flatten())
  generated_samples_dict[i] = np.asarray(generated_samples_class_i)

generated_samples = []
for i in range(labels_unique.shape[0]):
  for j in range(generated_samples_dict[i].shape[0]):
    generated_samples.append(pd.Series(np.concatenate((np.asarray(generated_samples_dict[i][j]),labels_unique[i]))))
generated_samples = pd.DataFrame(generated_samples)

# split generated samples in generated features and labels

generated_features = generated_samples.iloc[:,:520]
generated_labels = generated_samples.iloc[:,520:]
generated_labels.columns = range(13)

# extended features and labels

features = pd.DataFrame(features).append(generated_features).reset_index().drop('index',axis=1).values
labels = pd.DataFrame(labels).append(generated_labels,ignore_index=True).values

# training and validation set

from sklearn.model_selection import train_test_split
train_x, val_x, train_y, val_y = train_test_split(features, labels, test_size=0.3, random_state = 0, stratify=labels)

Neural Network

In [None]:
# NEURAL NETWORK

# parameters

n_input = 520 
print("n_input:",n_input)
n_hidden_1 = 256 
print("n_hidden_1:",n_hidden_1)
n_hidden_2 = 128 
print("n_hidden_2:",n_hidden_2)
n_hidden_3 = 64 
print("n_hidden_3:",n_hidden_3)

n_classes = labels.shape[1]
print("n_classes:",n_classes)

learning_rate = 0.00001 
print("learning_rate:",learning_rate)
training_epochs = 30 
print("training_epochs:",training_epochs)
batch_size = 15 
print("batch_size:",batch_size)

total_batches = train_x.shape[0] // batch_size
print("total_batches = train_x.shape[0] // batch_size: ", train_x.shape[0], '/', batch_size, '=', total_batches)

# network architecture

def weight_variable(shape):
    initial = tf.random.truncated_normal(shape, stddev = 0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.0, shape = shape)
    return tf.Variable(initial)

X = tf.placeholder(tf.float32, shape=[None,n_input])
Y = tf.placeholder(tf.float32,[None,n_classes])

# --------------------- Encoder Variables --------------- #

e_weights_h1 = weight_variable([n_input, n_hidden_1])
e_biases_h1 = bias_variable([n_hidden_1])

e_weights_h2 = weight_variable([n_hidden_1, n_hidden_2])
e_biases_h2 = bias_variable([n_hidden_2])

e_weights_h3 = weight_variable([n_hidden_2, n_hidden_3])
e_biases_h3 = bias_variable([n_hidden_3])

# --------------------- Decoder Variables --------------- #

d_weights_h1 = weight_variable([n_hidden_3, n_hidden_2])
d_biases_h1 = bias_variable([n_hidden_2])

d_weights_h2 = weight_variable([n_hidden_2, n_hidden_1])
d_biases_h2 = bias_variable([n_hidden_1])

d_weights_h3 = weight_variable([n_hidden_1, n_input])
d_biases_h3 = bias_variable([n_input])

# --------------------- DNN Variables ------------------ #

dnn_weights_h1 = weight_variable([n_hidden_3, n_hidden_2])
dnn_biases_h1 = bias_variable([n_hidden_2])

dnn_weights_h2 = weight_variable([n_hidden_2, n_hidden_2])
dnn_biases_h2 = bias_variable([n_hidden_2])

dnn_weights_out = weight_variable([n_hidden_2, n_classes])
dnn_biases_out = bias_variable([n_classes])

def encode(x):
    l1 = tf.nn.tanh(tf.add(tf.matmul(x,e_weights_h1),e_biases_h1))
    l2 = tf.nn.tanh(tf.add(tf.matmul(l1,e_weights_h2),e_biases_h2))
    l3 = tf.nn.tanh(tf.add(tf.matmul(l2,e_weights_h3),e_biases_h3))
    return l3
    
def decode(x):
    l1 = tf.nn.tanh(tf.add(tf.matmul(x,d_weights_h1),d_biases_h1))
    l2 = tf.nn.tanh(tf.add(tf.matmul(l1,d_weights_h2),d_biases_h2))
    l3 = tf.nn.tanh(tf.add(tf.matmul(l2,d_weights_h3),d_biases_h3))
    return l3

def dnn(x):
    l1 = tf.nn.tanh(tf.add(tf.matmul(x,dnn_weights_h1),dnn_biases_h1))
    l2 = tf.nn.tanh(tf.add(tf.matmul(l1,dnn_weights_h2),dnn_biases_h2))
    out = tf.nn.softmax(tf.add(tf.matmul(l2,dnn_weights_out),dnn_biases_out))
    return out

encoded = encode(X)
decoded = decode(encoded) 
y_ = dnn(encoded)

# unsupervised cost function
us_cost_function = tf.reduce_mean(tf.pow(X - decoded, 2))
# supervised cost function
s_cost_function = -tf.reduce_sum(Y * tf.log(y_))

us_optimizer = tf.train.AdamOptimizer(learning_rate).minimize(us_cost_function)
s_optimizer = tf.train.AdamOptimizer(learning_rate).minimize(s_cost_function)

correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

n_input: 520
n_hidden_1: 256
n_hidden_2: 128
n_hidden_3: 64
n_classes: 13
learning_rate: 1e-05
training_epochs: 30
batch_size: 15
total_batches = train_x.shape[0] // batch_size:  223294 / 15 = 14886


In [None]:
# RESULTS

with tf.Session() as session:
    tf.global_variables_initializer().run()
    
    # ------------ 1. Training Autoencoders - Unsupervised Learning ----------- #
    for epoch in range(training_epochs):
        epoch_costs = np.empty(0)
        for b in range(total_batches):
            offset = (b * batch_size) % (train_x.shape[0] - batch_size)
            batch_x = train_x[offset:(offset + batch_size), :]
            _, c = session.run([us_optimizer, us_cost_function],feed_dict={X: batch_x})
            epoch_costs = np.append(epoch_costs,c)
        print("Epoch: ",epoch," Loss: ",np.mean(epoch_costs))
    print("Unsupervised pre-training finished...")
    
    
    # ---------------- 2. Training NN - Supervised Learning ------------------ #
    for epoch in range(training_epochs):
        epoch_costs = np.empty(0)
        for b in range(total_batches):
            offset = (b * batch_size) % (train_x.shape[0] - batch_size)
            batch_x = train_x[offset:(offset + batch_size), :]
            batch_y = train_y[offset:(offset + batch_size), :]
            _, c = session.run([s_optimizer, s_cost_function],feed_dict={X: batch_x, Y : batch_y})
            epoch_costs = np.append(epoch_costs,c)
        print("Epoch: ",epoch," Loss: ",np.mean(epoch_costs)," Training Accuracy: ", \
            session.run(accuracy, feed_dict={X: train_x, Y: train_y}), \
            "Validation Accuracy:", session.run(accuracy, feed_dict={X: val_x, Y: val_y}))
            
    print("Supervised training finished...")
    

    print("\nTesting Accuracy:", session.run(accuracy, feed_dict={X: test_features, Y: test_labels}))

Epoch:  0  Loss:  0.012447323708384092
Epoch:  1  Loss:  0.004519258522106119
Epoch:  2  Loss:  0.0036022981350633224
Epoch:  3  Loss:  0.0031686468337457684
Epoch:  4  Loss:  0.0029317737100373123
Epoch:  5  Loss:  0.002804369711087889
Epoch:  6  Loss:  0.0027264671015313434
Epoch:  7  Loss:  0.0026725408418488694
Epoch:  8  Loss:  0.0026328002279446664
Epoch:  9  Loss:  0.00260270497954269
Epoch:  10  Loss:  0.0025798622782405452
Epoch:  11  Loss:  0.0025626202081940742
Epoch:  12  Loss:  0.0025492137847672215
Epoch:  13  Loss:  0.0025382085210968663
Epoch:  14  Loss:  0.0025287626605498053
Epoch:  15  Loss:  0.0025204368339099233
Epoch:  16  Loss:  0.0025129652565983192
Epoch:  17  Loss:  0.0025061489613709843
Epoch:  18  Loss:  0.0024998250812766705
Epoch:  19  Loss:  0.0024938497294410443
Epoch:  20  Loss:  0.0024880923135241137
Epoch:  21  Loss:  0.002482445434792047
Epoch:  22  Loss:  0.0024768427843569582
Epoch:  23  Loss:  0.0024712776622848077
Epoch:  24  Loss:  0.00246580906