In [1]:
import csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import tensorflow as tf
from tensorflow.keras import layers
import seaborn as sns

pd.options.display.max_rows = 10
pd.options.display.float_format = "{:.1f}".format

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

config = tf.compat.v1.ConfigProto()

config.gpu_options.allow_growth=True

sess = tf.compat.v1.Session(config=config)


2022-12-31 16:14:27.004552: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-31 16:14:28.177835: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-12-31 16:14:28.190006: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-12-31 16:14:28.190136: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but th

1 Physical GPUs, 1 Logical GPUs


2022-12-31 16:14:28.192455: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-12-31 16:14:28.192567: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-12-31 16:14:28.192696: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-12-31 16:14:28.466601: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-12-31 16:14:28.466730: I tensorflow/compiler/xla/stream_executo

In [2]:
USE_BT = True


DATASET_ROOT="data/"
TRAIN_LOCATIONS_FILE="SignatureLocs_altered.csv" # Replaced commas with dots
TRAIN_STRENGTHS_FILE="P_Signatures.csv" if USE_BT else "P_SA_Signatures.csv"
NUMBER_OF_BEACONS=57 if USE_BT else 11

### Prepare the data

In [3]:
df_train_strengths = pd.read_csv(DATASET_ROOT+TRAIN_STRENGTHS_FILE, sep=';', names=[x for x in range(NUMBER_OF_BEACONS)])
df_train_locs = pd.read_csv(DATASET_ROOT+TRAIN_LOCATIONS_FILE, sep=';', names=['x','y'], dtype=float)

train_features = df_train_strengths
train_target = df_train_locs

normalization_values = np.array(train_features)

### Define our VAE model

In [4]:
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

In [5]:
latent_dim = 2

#normalizer = layers.Normalization()
#normalizer.adapt(normalization_values)
encoder_inputs = tf.keras.Input(shape=NUMBER_OF_BEACONS)
x = layers.Dense(NUMBER_OF_BEACONS, activation="relu", name="encoder_0")(encoder_inputs)
x = layers.Dense(math.ceil(0.7*NUMBER_OF_BEACONS), activation="relu", name="encoder_1")(x)
x = layers.Dense(math.ceil(0.5*NUMBER_OF_BEACONS), activation="relu", name="encoder_2")(x)
x = layers.Dense(math.ceil(0.3*NUMBER_OF_BEACONS), activation="relu", name="encoder_3")(x)
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
z = Sampling()([z_mean, z_log_var])
encoder = tf.keras.Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")
encoder.summary()

Model: "encoder"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 57)]         0           []                               
                                                                                                  
 encoder_0 (Dense)              (None, 57)           3306        ['input_1[0][0]']                
                                                                                                  
 encoder_1 (Dense)              (None, 40)           2320        ['encoder_0[0][0]']              
                                                                                                  
 encoder_2 (Dense)              (None, 29)           1189        ['encoder_1[0][0]']              
                                                                                            

In [6]:
latent_inputs = tf.keras.Input(shape=(latent_dim,))
x = layers.Dense(math.ceil(0.3*NUMBER_OF_BEACONS), activation="relu", name="decoder_0")(latent_inputs)
x = layers.Dense(math.ceil(0.5*NUMBER_OF_BEACONS), activation="relu", name="decoder_1")(x)
x = layers.Dense(math.ceil(0.7*NUMBER_OF_BEACONS), activation="relu", name="decoder_2")(x)
decoder_outputs = layers.Dense(NUMBER_OF_BEACONS, name="decoder_output")(x)
decoder = tf.keras.Model(latent_inputs, decoder_outputs, name="decoder")
decoder.summary()

Model: "decoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 2)]               0         
                                                                 
 decoder_0 (Dense)           (None, 18)                54        
                                                                 
 decoder_1 (Dense)           (None, 29)                551       
                                                                 
 decoder_2 (Dense)           (None, 40)                1200      
                                                                 
 decoder_output (Dense)      (None, 57)                2337      
                                                                 
Total params: 4,142
Trainable params: 4,142
Non-trainable params: 0
_________________________________________________________________


In [7]:
class VAE(tf.keras.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.total_loss_tracker = tf.keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = tf.keras.metrics.Mean(
            name="reconstruction_loss"
        )
        self.kl_loss_tracker = tf.keras.metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]

    def train_step(self, data):
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)
            #reconstruction_loss = tf.reduce_mean(
            #    tf.reduce_sum(
            #        tf.keras.losses.binary_crossentropy(data, reconstruction), axis=(1, 2)
            #    )
            #)
            reconstruction_loss = tf.keras.losses.mse(data, reconstruction)
            kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
            kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
            total_loss = reconstruction_loss + kl_loss
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }


### Training the model

In [9]:
EPOCHS = 15000
BATCH_SIZE = 8

vae = VAE(encoder, decoder)
vae.compile(optimizer=tf.keras.optimizers.Adam())

#es = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=500)
#history = vae.fit(train_features, epochs=EPOCHS, batch_size=BATCH_SIZE, callbacks=[es])
history = vae.fit(train_features, epochs=EPOCHS, batch_size=BATCH_SIZE)

Epoch 1/30000
Epoch 2/30000
Epoch 3/30000
Epoch 4/30000
Epoch 5/30000
Epoch 6/30000
Epoch 7/30000
Epoch 8/30000
Epoch 9/30000
Epoch 10/30000
Epoch 11/30000
Epoch 12/30000
Epoch 13/30000
Epoch 14/30000
Epoch 15/30000
Epoch 16/30000
Epoch 17/30000
Epoch 18/30000
Epoch 19/30000
Epoch 20/30000
Epoch 21/30000
Epoch 22/30000
Epoch 23/30000
Epoch 24/30000
Epoch 25/30000
Epoch 26/30000
Epoch 27/30000
Epoch 28/30000
Epoch 29/30000
Epoch 30/30000
Epoch 31/30000
Epoch 32/30000
Epoch 33/30000
Epoch 34/30000
Epoch 35/30000
Epoch 36/30000
Epoch 37/30000
Epoch 38/30000
Epoch 39/30000
Epoch 40/30000
Epoch 41/30000
Epoch 42/30000
Epoch 43/30000
Epoch 44/30000
Epoch 45/30000
Epoch 46/30000
Epoch 47/30000
Epoch 48/30000
Epoch 49/30000
Epoch 50/30000
Epoch 51/30000
Epoch 52/30000
Epoch 53/30000
Epoch 54/30000
Epoch 55/30000
Epoch 56/30000
Epoch 57/30000
Epoch 58/30000
Epoch 59/30000
Epoch 60/30000
Epoch 61/30000
Epoch 62/30000
Epoch 63/30000
Epoch 64/30000
Epoch 65/30000
Epoch 66/30000
Epoch 67/30000
Epoc

KeyboardInterrupt: 

### Generate new data from training data

In [12]:
df_train_locs = pd.read_csv(DATASET_ROOT+TRAIN_LOCATIONS_FILE, sep=';', names=['x','y'], dtype=np.float32)
print(df_train_locs)

       x    y
0   46.8 34.4
1   46.8 31.3
2   46.8 28.2
3   43.7 28.2
4   43.7 31.3
..   ...  ...
123  9.6 15.8
124 12.7 15.8
125 15.8 15.8
126 12.7 22.0
127 15.8 22.0

[128 rows x 2 columns]


In [13]:
MULTIPLICATION_FACTOR = 9 # Controls how many samples will be added, make sure the square root of this is an int
SAMPLE_VARIANCE = 0.025

positions = []
samples = []

for index, row in df_train_strengths.iterrows():
    x, y = df_train_locs.iloc[index]
    x = round(x, 1)
    y = round(y, 1)

    reshaped_row = np.reshape(row.values, (-1, NUMBER_OF_BEACONS))
    z_mean, _, _ = vae.encoder.predict(reshaped_row)
    sample = vae.decoder.predict(z_mean)
    samples.append(sample[0])
    positions.append((x, y))
    
    new_z_means = []
    for dx in np.linspace(-SAMPLE_VARIANCE, SAMPLE_VARIANCE, int(math.sqrt(MULTIPLICATION_FACTOR))):
        for dy in np.linspace(-SAMPLE_VARIANCE, SAMPLE_VARIANCE, int(math.sqrt(MULTIPLICATION_FACTOR))):
            new_z_means.append([z_mean[0][0] + dx, z_mean[0][1] + dy])
    
    new_samples = vae.decoder.predict(new_z_means)
    for new_sample in new_samples:
        samples.append(new_sample)
        positions.append((x, y))

    



In [14]:
# Data post processing
assert len(samples) == len(positions)
for i in range(len(samples)):
    assert len(samples[i]) == NUMBER_OF_BEACONS
    for j in range(NUMBER_OF_BEACONS):
        samples[i][j] = int(round(samples[i][j]))


In [15]:
GENERATED_TRAIN_LOCATIONS_FILE="SignatureLocs_altered_generated.csv" # Replaced commas with dots
GENERATED_TRAIN_STRENGTHS_FILE="P_Signatures_generated.csv" if USE_BT else "P_SA_Signatures_generated.csv"

In [16]:
with open(DATASET_ROOT + GENERATED_TRAIN_LOCATIONS_FILE, 'w') as f:
    writer = csv.writer(f, delimiter=';')
    for pos in positions:
        writer.writerow(pos)

with open(DATASET_ROOT + GENERATED_TRAIN_STRENGTHS_FILE, 'w') as f:
    writer = csv.writer(f, delimiter=';')
    for sample in samples:
        writer.writerow(sample)