# Indoor Localization - Data Preprocessing

#### Import libraries

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import numpy as np
import pandas as pd
import math
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras import layers

#### Read data

In [2]:
df_train = pd.read_csv("data/TrainingData.csv")
df_val = pd.read_csv("data/ValidationData.csv")

In [3]:
l1 = len(df_train)
l1

19937

In [4]:
l2 = len(df_val)
l2

1111

In [5]:
l1/(l1+l2)

0.947215887495249

In [6]:
df_train.columns

Index(['WAP001', 'WAP002', 'WAP003', 'WAP004', 'WAP005', 'WAP006', 'WAP007',
       'WAP008', 'WAP009', 'WAP010',
       ...
       'WAP520', 'LONGITUDE', 'LATITUDE', 'FLOOR', 'BUILDINGID', 'SPACEID',
       'RELATIVEPOSITION', 'USERID', 'PHONEID', 'TIMESTAMP'],
      dtype='object', length=529)

Since the validation set is 5% of the total data and since `df_train` and `df_val` are identical, we merge both, randomize and get 20% for testing (80-20 split).

### Merge datasets into `df`

In [7]:
df = pd.concat([df_train, df_val])

In [8]:
len(df) == l1 + l2

True

### Dropping unncessary columns

In [9]:
# Dropping timestamps (as they play no role in localization)
df.drop(columns = ["SPACEID", "RELATIVEPOSITION", "USERID", "PHONEID", "TIMESTAMP"], inplace=True)

## Normalization

In [10]:
# Normalize x which is in the range [xmin, xmax] to a range [a,b]
def normalize(x, xmin, xmax, a, b):
    numerator = x - xmin
    denominator = xmax - xmin
    multiplier = b - a
    ans = (numerator/denominator)*multiplier + a
    return ans

### WiFi

The WiFi signals are present from (-104, 0) decibel milliWatts (dBm) and if a WiFi signal is not reachable, it is denoted by a 100. So, for normalization, we convert the range of (-104, 0) to (0.25, 1) and make 100 to 0. So, at the end, we get values in the range of [0,1]

In [11]:
sig_min = -104
sig_max = 0
tar_min = 0.25
tar_max = 1.0
no_sig = 100
def normalize_wifi(num):
    ans = 0
    num = float(num)
    if math.isclose(num, no_sig, rel_tol=1e-3):
        return 0
    else:
        ans = normalize(num, sig_min, sig_max, tar_min, tar_max)
        return ans

Latitude and Longitude values are normalized between 0 and 1.

### Latitude

In [12]:
lat_min = 4864745.7450159714
lat_max = 4865017.3646842018
tarmin = 0
tarmax = 1
def normalize_lat(num):
    num = float(num)
    ans = normalize(num, lat_min, lat_max, tarmin, tarmax)
    return ans

### Longitude

In [13]:
long_min = -7695.9387549299299000
long_max = -7299.786516730871000
tarmin = 0
tarmax = 1
def normalize_long(num):
    num = float(num)
    ans = normalize(num, long_min, long_max, tarmin, tarmax)
    return ans

In [14]:
wifi_cells = df.columns[:519] # 520 wifi signals, index started at 1

In [15]:
# Normalize WiFi values
for i in wifi_cells:
    df[i] = df[i].apply(normalize_wifi)

In [16]:
# Normalize latitudes and longitudes
df["LATITUDE"] = df["LATITUDE"].apply(normalize_lat)
df["LONGITUDE"] = df["LONGITUDE"].apply(normalize_long)

### Separate X an Y values

In [17]:
x = df[wifi_cells]

In [18]:
y = df[["LATITUDE", "LONGITUDE", "BUILDINGID", "FLOOR"]]

### Test Train Split

In [19]:
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=42, test_size=0.15)

### Building the ML model

In [39]:
model = Sequential()
model.add(layers.Dense(519, activation="relu"))
model.add(layers.Dense(300, activation="relu"))
model.add(layers.Dense(100, activation="relu"))
model.add(layers.Dense(4, activation="relu"))
model.compile(optimizer='adam',
                loss="mse", metrics=['accuracy'])

In [40]:
X = tf.convert_to_tensor(x)
Y = tf.convert_to_tensor(y)

### Training the model

In [41]:
model.fit(x=X, y=Y, epochs=5, validation_split=0.15, shuffle=True)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f4ad02299f0>

### Saving the model

In [42]:
model.save("ml_model")



INFO:tensorflow:Assets written to: ml_model/assets


INFO:tensorflow:Assets written to: ml_model/assets


In [43]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 519)               269880    
                                                                 
 dense_7 (Dense)             (None, 300)               156000    
                                                                 
 dense_8 (Dense)             (None, 100)               30100     
                                                                 
 dense_9 (Dense)             (None, 4)                 404       
                                                                 
Total params: 456,384
Trainable params: 456,384
Non-trainable params: 0
_________________________________________________________________


In [59]:
x.iloc[0]

WAP001    0.0
WAP002    0.0
WAP003    0.0
WAP004    0.0
WAP005    0.0
         ... 
WAP515    0.0
WAP516    0.0
WAP517    0.0
WAP518    0.0
WAP519    0.0
Name: 0, Length: 519, dtype: float64

In [60]:
y.iloc[0]

LATITUDE      0.644405
LONGITUDE     0.390442
BUILDINGID    1.000000
FLOOR         2.000000
Name: 0, dtype: float64

In [61]:
to_pred = np.asarray(x.iloc[0])

In [63]:
to_pred = to_pred.reshape(1,519)

In [65]:
model.predict(to_pred)



array([[0.70963645, 0.38856775, 0.9717524 , 1.9604622 ]], dtype=float32)