In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import scale
import tensorflow as tf
from tensorflow.keras import layers

# get dataset from UCI repository
import requests
import io
from zipfile import ZipFile
# UNCOMMENT
response = requests.get('https://archive.ics.uci.edu/ml/machine-learning-databases/00310/UJIndoorLoc.zip')
compressedFile = io.BytesIO(response.content)
zipFile = ZipFile(compressedFile)

# # TEMP
# from google.colab import drive
# drive.mount('/content/gdrive')


Dataset

In [None]:
# DATASET

# UNCOMMENT
dataset = pd.read_csv(zipFile.open('UJIndoorLoc/trainingData.csv'), header=0)
# # TEMP
# dataset = pd.read_csv('/content/gdrive/My Drive/Colab Notebooks/measurements/UJIIndoorLoc Data Set/trainingData.csv')
dataset = dataset.sample(frac=1.0,random_state=0) # CHANGE TO VARY THE DATASET PERCENTAGE #@param


features = np.asarray(dataset.iloc[:,0:520])
# replace NaN with -110 dBm
features[features == 100] = -110
# feature normalization
features = (features - features.mean()) / features.var()

labels = np.asarray(dataset["BUILDINGID"].map(str) + dataset["FLOOR"].map(str))
# convert categorical variable into dummy/indicator variables
labels = np.asarray(pd.get_dummies(labels))

# training and validation set
from sklearn.model_selection import train_test_split
train_x, val_x, train_y, val_y = train_test_split(features, labels, test_size=0.3, random_state = 0, stratify=labels)

# test set

# UNCOMMENT
test_dataset = pd.read_csv(zipFile.open('UJIndoorLoc/validationData.csv'), header=0)
# # TEMP
# test_dataset = pd.read_csv('/content/gdrive/My Drive/Colab Notebooks/measurements/UJIIndoorLoc Data Set/validationData.csv')


test_features = np.asarray(test_dataset.iloc[:,0:520])
test_features[test_features == 100] = -110
test_features = (test_features - test_features.mean()) / test_features.var()

test_labels = np.asarray(test_dataset["BUILDINGID"].map(str) + test_dataset["FLOOR"].map(str))
test_labels = np.asarray(pd.get_dummies(test_labels))

Dataset with DCGAN

In [None]:
import tensorflow as tf

In [None]:
def fingerprintsToImages(class_features):

  # convert fingerprints into images 

  height = 24 
  width = 24
  channels = 1 

  class_images = np.zeros((class_features.shape[0],width*height))
  class_images[:class_features.shape[0],:class_features.shape[1]] = class_features
  class_images = class_images.reshape(class_features.shape[0],width,height)
  class_images = np.where(class_images==0, -110, class_images)
  class_images = class_images.reshape(class_images.shape[0], width, height, channels).astype('float32')

  return class_images

# Generator model

def make_generator_model():

  model = tf.keras.Sequential()

  model.add(layers.Dense(6*6*256, use_bias=False, input_shape=(100,)))
  model.add(layers.BatchNormalization())
  model.add(layers.LeakyReLU())


  model.add(layers.Reshape((6, 6, 256)))
  assert model.output_shape == (None, 6, 6, 256)

  model.add(layers.Conv2DTranspose(128, (5, 5), strides=(1, 1), padding='same', use_bias=False))
  assert model.output_shape == (None, 6, 6, 128)
  model.add(layers.BatchNormalization())
  model.add(layers.LeakyReLU())

  model.add(layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', use_bias=False))
  assert model.output_shape == (None, 12, 12, 64)
  model.add(layers.BatchNormalization())
  model.add(layers.LeakyReLU())

  model.add(layers.Conv2DTranspose(1, (5, 5), strides=(2, 2), padding='same', use_bias=False, activation='tanh'))
  assert model.output_shape == (None, 24, 24, 1)

  return model

# Discriminator model

def make_discriminator_model():

  model = tf.keras.Sequential()
  model.add(layers.Conv2D(64, (5, 5), strides=(2, 2), padding='same',
                                  input_shape=[24, 24, 1]))
  model.add(layers.LeakyReLU())
  model.add(layers.Dropout(0.3))

  model.add(layers.Conv2D(128, (5, 5), strides=(2, 2), padding='same'))
  model.add(layers.LeakyReLU())
  model.add(layers.Dropout(0.3))

  model.add(layers.Flatten())
  model.add(layers.Dense(1))

  return model

# Training

def train(dataset, epochs):

  noise_dim = 100

  generator = make_generator_model()

  discriminator = make_discriminator_model()

  # Loss and optimizer

  cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)

  def discriminator_loss(real_output, fake_output):
      real_loss = cross_entropy(tf.ones_like(real_output), real_output)
      fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
      total_loss = real_loss + fake_loss
      return total_loss

  discriminator_optimizer = tf.keras.optimizers.Adam(1e-4)

  def generator_loss(fake_output):
      return cross_entropy(tf.ones_like(fake_output), fake_output)

  generator_optimizer = tf.keras.optimizers.Adam(1e-4)

  print('STARTING TRAINING')  

  for epoch in range(epochs):

    for image_batch in dataset:

      noise = tf.random.normal([BATCH_SIZE, noise_dim])

      with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:

        generated_images = generator(noise, training=True)

        real_output = discriminator(image_batch, training=True)
        fake_output = discriminator(generated_images, training=True)

        gen_loss = generator_loss(fake_output)
        disc_loss = discriminator_loss(real_output, fake_output)

      gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
      gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

      generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
      discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))

  print('MODEL TRAINED')  

  return generator

def generateSamples(generator, num_examples_to_generate):

  noise_dim = 100

  seed = tf.random.normal([num_examples_to_generate, noise_dim])

  generated_images = generator(seed, training=False)

  generated_values = generated_images.numpy().reshape(generated_images.numpy().shape[0],-1)[:,:520]

  return generated_values

In [None]:
labels_unique = np.unique(labels,axis=0)
features_labels = pd.concat([pd.DataFrame(features),pd.DataFrame(labels)],axis=1)
generated_samples_dict = {}

for i in range(labels_unique.shape[0]):

  print('STARTING CLASS',i)

  class_i_label = labels_unique[i]
  class_i_features = features[(features_labels.iloc[:,520:].values == class_i_label).all(axis=1)]
  class_i_labels = labels[(features_labels.iloc[:,520:].values == class_i_label).all(axis=1)]

  num_examples_to_generate = int(class_i_features.shape[0]*15.0) # CHANGE TO VARY THE DCGAN PERCENTAGE #@param
  EPOCHS = 50 
  BATCH_SIZE = 16
  BUFFER_SIZE = class_i_features.shape[0]

  class_i_images = fingerprintsToImages(class_i_features)

  train_batch_class_i = tf.data.Dataset.from_tensor_slices(class_i_images).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
  generator_class_i = train(train_batch_class_i, EPOCHS)
  generated_samples_class_i = generateSamples(generator_class_i, num_examples_to_generate)

  generated_samples_dict[i] = generated_samples_class_i

STARTING CLASS 0
STARTING TRAINING
MODEL TRAINED
STARTING CLASS 1
STARTING TRAINING
MODEL TRAINED
STARTING CLASS 2
STARTING TRAINING
MODEL TRAINED
STARTING CLASS 3
STARTING TRAINING
MODEL TRAINED
STARTING CLASS 4
STARTING TRAINING
MODEL TRAINED
STARTING CLASS 5
STARTING TRAINING
MODEL TRAINED
STARTING CLASS 6
STARTING TRAINING
MODEL TRAINED
STARTING CLASS 7
STARTING TRAINING
MODEL TRAINED
STARTING CLASS 8
STARTING TRAINING
MODEL TRAINED
STARTING CLASS 9
STARTING TRAINING
MODEL TRAINED
STARTING CLASS 10
STARTING TRAINING
MODEL TRAINED
STARTING CLASS 11
STARTING TRAINING
MODEL TRAINED
STARTING CLASS 12
STARTING TRAINING
MODEL TRAINED


In [None]:
generated_samples = []
for i in range(labels_unique.shape[0]):
  for j in range(generated_samples_dict[i].shape[0]):
    generated_samples.append(pd.Series(np.concatenate((np.asarray(generated_samples_dict[i][j]),labels_unique[i]))))
generated_samples = pd.DataFrame(generated_samples)

# split generated samples in generated features and labels

generated_features = generated_samples.iloc[:,:520]
generated_labels = generated_samples.iloc[:,520:]
generated_labels.columns = range(13)

# extended features and labels

features = pd.DataFrame(features).append(generated_features).reset_index().drop('index',axis=1).values
labels = pd.DataFrame(labels).append(generated_labels,ignore_index=True).values

# training and validation set

from sklearn.model_selection import train_test_split
train_x, val_x, train_y, val_y = train_test_split(features, labels, test_size=0.3, random_state = 0, stratify=labels)

Neural Network

In [None]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

Instructions for updating:
non-resource variables are not supported in the long term


In [None]:
# NEURAL NETWORK

# parameters

n_input = 520 
print("n_input:",n_input)
n_hidden_1 = 256 
print("n_hidden_1:",n_hidden_1)
n_hidden_2 = 128 
print("n_hidden_2:",n_hidden_2)
n_hidden_3 = 64 
print("n_hidden_3:",n_hidden_3)

n_classes = labels.shape[1]
print("n_classes:",n_classes)

learning_rate = 0.00001 
print("learning_rate:",learning_rate)
training_epochs = 30 
print("training_epochs:",training_epochs)
batch_size = 15 
print("batch_size:",batch_size)

total_batches = train_x.shape[0] // batch_size
print("total_batches = train_x.shape[0] // batch_size: ", train_x.shape[0], '/', batch_size, '=', total_batches)

# network architecture

def weight_variable(shape):
    initial = tf.random.truncated_normal(shape, stddev = 0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.0, shape = shape)
    return tf.Variable(initial)

X = tf.placeholder(tf.float32, shape=[None,n_input])
Y = tf.placeholder(tf.float32,[None,n_classes])

# --------------------- Encoder Variables --------------- #

e_weights_h1 = weight_variable([n_input, n_hidden_1])
e_biases_h1 = bias_variable([n_hidden_1])

e_weights_h2 = weight_variable([n_hidden_1, n_hidden_2])
e_biases_h2 = bias_variable([n_hidden_2])

e_weights_h3 = weight_variable([n_hidden_2, n_hidden_3])
e_biases_h3 = bias_variable([n_hidden_3])

# --------------------- Decoder Variables --------------- #

d_weights_h1 = weight_variable([n_hidden_3, n_hidden_2])
d_biases_h1 = bias_variable([n_hidden_2])

d_weights_h2 = weight_variable([n_hidden_2, n_hidden_1])
d_biases_h2 = bias_variable([n_hidden_1])

d_weights_h3 = weight_variable([n_hidden_1, n_input])
d_biases_h3 = bias_variable([n_input])

# --------------------- DNN Variables ------------------ #

dnn_weights_h1 = weight_variable([n_hidden_3, n_hidden_2])
dnn_biases_h1 = bias_variable([n_hidden_2])

dnn_weights_h2 = weight_variable([n_hidden_2, n_hidden_2])
dnn_biases_h2 = bias_variable([n_hidden_2])

dnn_weights_out = weight_variable([n_hidden_2, n_classes])
dnn_biases_out = bias_variable([n_classes])

def encode(x):
    l1 = tf.nn.tanh(tf.add(tf.matmul(x,e_weights_h1),e_biases_h1))
    l2 = tf.nn.tanh(tf.add(tf.matmul(l1,e_weights_h2),e_biases_h2))
    l3 = tf.nn.tanh(tf.add(tf.matmul(l2,e_weights_h3),e_biases_h3))
    return l3
    
def decode(x):
    l1 = tf.nn.tanh(tf.add(tf.matmul(x,d_weights_h1),d_biases_h1))
    l2 = tf.nn.tanh(tf.add(tf.matmul(l1,d_weights_h2),d_biases_h2))
    l3 = tf.nn.tanh(tf.add(tf.matmul(l2,d_weights_h3),d_biases_h3))
    return l3

def dnn(x):
    l1 = tf.nn.tanh(tf.add(tf.matmul(x,dnn_weights_h1),dnn_biases_h1))
    l2 = tf.nn.tanh(tf.add(tf.matmul(l1,dnn_weights_h2),dnn_biases_h2))
    out = tf.nn.softmax(tf.add(tf.matmul(l2,dnn_weights_out),dnn_biases_out))
    return out

encoded = encode(X)
decoded = decode(encoded) 
y_ = dnn(encoded)

# unsupervised cost function
us_cost_function = tf.reduce_mean(tf.pow(X - decoded, 2))
# supervised cost function
s_cost_function = -tf.reduce_sum(Y * tf.log(y_))

us_optimizer = tf.train.AdamOptimizer(learning_rate).minimize(us_cost_function)
s_optimizer = tf.train.AdamOptimizer(learning_rate).minimize(s_cost_function)

correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

n_input: 520
n_hidden_1: 256
n_hidden_2: 128
n_hidden_3: 64
n_classes: 13
learning_rate: 1e-05
training_epochs: 30
batch_size: 15
total_batches = train_x.shape[0] // batch_size:  223294 / 15 = 14886


In [None]:
# RESULTS

with tf.Session() as session:
    tf.global_variables_initializer().run()
    
    # ------------ 1. Training Autoencoders - Unsupervised Learning ----------- #
    for epoch in range(training_epochs):
        epoch_costs = np.empty(0)
        for b in range(total_batches):
            offset = (b * batch_size) % (train_x.shape[0] - batch_size)
            batch_x = train_x[offset:(offset + batch_size), :]
            _, c = session.run([us_optimizer, us_cost_function],feed_dict={X: batch_x})
            epoch_costs = np.append(epoch_costs,c)
        print("Epoch: ",epoch," Loss: ",np.mean(epoch_costs))
    print("Unsupervised pre-training finished...")
    
    
    # ---------------- 2. Training NN - Supervised Learning ------------------ #
    for epoch in range(training_epochs):
        epoch_costs = np.empty(0)
        for b in range(total_batches):
            offset = (b * batch_size) % (train_x.shape[0] - batch_size)
            batch_x = train_x[offset:(offset + batch_size), :]
            batch_y = train_y[offset:(offset + batch_size), :]
            _, c = session.run([s_optimizer, s_cost_function],feed_dict={X: batch_x, Y : batch_y})
            epoch_costs = np.append(epoch_costs,c)
        print("Epoch: ",epoch," Loss: ",np.mean(epoch_costs)," Training Accuracy: ", \
            session.run(accuracy, feed_dict={X: train_x, Y: train_y}), \
            "Validation Accuracy:", session.run(accuracy, feed_dict={X: val_x, Y: val_y}))
            
    print("Supervised training finished...")
    

    print("\nTesting Accuracy:", session.run(accuracy, feed_dict={X: test_features, Y: test_labels}))

Epoch:  0  Loss:  0.008527898416315315
Epoch:  1  Loss:  0.0013310450161482064
Epoch:  2  Loss:  0.0010579160595574136
Epoch:  3  Loss:  0.0009355233620564001
Epoch:  4  Loss:  0.0008624084733557282
Epoch:  5  Loss:  0.0008137759219937858
Epoch:  6  Loss:  0.0007775002852549744
Epoch:  7  Loss:  0.0007482773153228864
Epoch:  8  Loss:  0.0007237951807255926
Epoch:  9  Loss:  0.0007030208613537703
Epoch:  10  Loss:  0.000685388531403182
Epoch:  11  Loss:  0.0006704666631321193
Epoch:  12  Loss:  0.0006578151984096639
Epoch:  13  Loss:  0.0006469721233822475
Epoch:  14  Loss:  0.0006375386200691929
Epoch:  15  Loss:  0.0006292323131490346
Epoch:  16  Loss:  0.0006218679271268
Epoch:  17  Loss:  0.0006153138946460533
Epoch:  18  Loss:  0.0006094622878076091
Epoch:  19  Loss:  0.0006042194837503487
Epoch:  20  Loss:  0.0005995081895561872
Epoch:  21  Loss:  0.0005952695734819727
Epoch:  22  Loss:  0.0005914601656674382
Epoch:  23  Loss:  0.0005880448579963058
Epoch:  24  Loss:  0.0005849904