# Deep Learning for Predicting Indoor Location Using WiFi Fingerprinting
Ha Vu Tran

In [1]:
# necessary Libraries
import numpy as np
import pandas as pd
import time
import pprint

#Visualizations
import matplotlib.pyplot as plt
import seaborn as sns


#Preprocessing
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA 
from scipy.sparse import lil_matrix

#Scoring Metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error



import tensorflow as tf
from keras.models import Sequential
from keras.layers import *

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Preprocess Data

In [2]:
df = pd.read_csv("challenge1.csv")

#df.head(10)

In [3]:
#df.describe()

In [4]:

#Drop unneeded data
df.drop(['Unnamed: 0', 'USERID', 'PHONEID', 'TIMESTAMP'], axis = 1, inplace=True)

#Remove "NaN" value
col = df.columns[0:520]
for i in col:
    df[i].fillna(0, inplace=True)
df.dropna(subset=['LONGITUDE','LATITUDE', 'FLOOR', 'BUILDINGID' ], inplace=True)
#trainingData.isnull().sum()



#Process "WAP" data
df.iloc[:, 0:520] = np.where(df.iloc[:, 0:520] <= 0, 
                        df.iloc[:, 0:520] + 105, 
                        df.iloc[:, 0:520] - 100)

#Process Longtitude
df.iloc[:, 520] = np.where(df.iloc[:, 520] <= 0, 
                        -df.iloc[:, 520], 
                        df.iloc[:, 520])


df.describe()

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP514,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520,LONGITUDE,LATITUDE,BUILDINGID
count,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,...,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0
mean,0.95172,0.965082,1.145415,1.150895,0.979644,1.396263,1.750822,1.821859,1.907198,1.182891,...,1.13889,1.103868,5.919202,7.535832,1.1534,1.048176,1.161856,7464.202052,4864871.0,1.213581
std,9.907839,9.94587,10.907006,10.93278,9.975353,10.995386,11.424759,11.274403,11.739711,10.859246,...,10.38636,10.499751,16.221807,16.427428,10.88876,10.432185,10.984137,123.311468,66.96052,0.832702
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7300.81899,4864746.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7359.1485,4864821.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7423.0609,4864852.0,1.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7594.2641,4864930.0,2.0
max,105.0,105.0,105.0,105.0,105.0,105.0,105.0,105.0,105.0,105.0,...,105.0,105.0,105.0,105.0,105.0,105.0,105.0,7691.3384,4865017.0,2.0


In [5]:

min_LGT = 7300.818990
min_LAT = 4.864746e+06

df.iloc[:,520] = (df.iloc[:, 520] - min_LGT + 1)
df.iloc[:,521] = (df.iloc[:, 521] - min_LAT + 1)


In [6]:
def preprocess_data(df):
    
    # split the data set into features and targets(Floor and BuildingID)
    X1 = df.drop(['LONGITUDE', 'LATITUDE', 'BUILDINGID','FLOOR'], axis=1)
    y1 = df[[ 'BUILDINGID']]
    
    X2 = df.drop(['LONGITUDE', 'LATITUDE','FLOOR'], axis=1)
    y2 = df[['FLOOR']]
    
    
    X3 = df.drop(['LONGITUDE', 'LATITUDE','FLOOR'], axis=1)
    y3 = df[[ 'LONGITUDE', 'LATITUDE']]
    
    #create Dummies for the targets to feed into the model
    y1 = pd.get_dummies(data=y1, columns=[ 'BUILDINGID'])
    y2 = pd.get_dummies(data=y2, columns=[ 'FLOOR']) 
    X3 = pd.get_dummies(data=X3, columns=['BUILDINGID']) 
    
    return X1, y1, X2, y2, X3, y3

In [7]:
X1, y1, X2, y2, X3, y3 = preprocess_data(df)

In [8]:
#Scale Data with Standard Scaler

scaler1 = StandardScaler()
scaler1.fit(X1)    
X1 = scaler1.transform(X1)

scaler2 = StandardScaler()
scaler2.fit(X2)
X2 = scaler2.transform(X2)

scaler3 = StandardScaler()
scaler3.fit(X3)
X3 = scaler3.transform(X3)


In [9]:
y1 = lil_matrix(y1).toarray()
y2 = lil_matrix(y2).toarray()
y3 = lil_matrix(y3).toarray()

# Model Training 

## Predicting buildings

In [10]:
start_time = time.time()

# Define the model
model_1 = Sequential()
model_1.add(Dense(3, input_dim=520, activation='relu'))
model_1.add(BatchNormalization())
model_1.add(Dense(3, activation='softmax'))
model_1.compile(loss='binary_crossentropy', optimizer='adam')


# Train the model
model_1.fit(
    X1,
    y1,
    batch_size=1000,
    validation_split = 0.4,
    epochs=100,
    shuffle=True,
    verbose=2
)

predictions1 = np.round(model_1.predict(X1))

# accuracy
print("Accuracy of predicting buildings = ",accuracy_score(y1,predictions1))

print("--- Run time: %s mins ---" % np.round(((time.time() - start_time)/60),2))

Train on 11495 samples, validate on 7664 samples
Epoch 1/100
1s - loss: 0.6642 - val_loss: 0.5959
Epoch 2/100
1s - loss: 0.5546 - val_loss: 0.5137
Epoch 3/100
1s - loss: 0.4784 - val_loss: 0.4473
Epoch 4/100
1s - loss: 0.4236 - val_loss: 0.3934
Epoch 5/100
1s - loss: 0.3815 - val_loss: 0.3491
Epoch 6/100
0s - loss: 0.3470 - val_loss: 0.3134
Epoch 7/100
1s - loss: 0.3180 - val_loss: 0.2845
Epoch 8/100
1s - loss: 0.2936 - val_loss: 0.2608
Epoch 9/100
1s - loss: 0.2718 - val_loss: 0.2409
Epoch 10/100
0s - loss: 0.2529 - val_loss: 0.2242
Epoch 11/100
1s - loss: 0.2362 - val_loss: 0.2099
Epoch 12/100
1s - loss: 0.2213 - val_loss: 0.1976
Epoch 13/100
1s - loss: 0.2083 - val_loss: 0.1870
Epoch 14/100
0s - loss: 0.1964 - val_loss: 0.1777
Epoch 15/100
0s - loss: 0.1859 - val_loss: 0.1696
Epoch 16/100
0s - loss: 0.1759 - val_loss: 0.1625
Epoch 17/100
1s - loss: 0.1673 - val_loss: 0.1562
Epoch 18/100
1s - loss: 0.1594 - val_loss: 0.1505
Epoch 19/100
1s - loss: 0.1520 - val_loss: 0.1455
Epoch 20/1

## Predicting floors

In [11]:
start_time = time.time()

# Define the model
model_2 = Sequential()
model_2.add(Dense(13, input_dim=521, activation='relu'))
model_2.add(BatchNormalization())
model_2.add(Dense(5, activation='softmax'))
model_2.compile(loss='binary_crossentropy', optimizer='adam')

# Train the model
model_2.fit(
    X2,
    y2,
    batch_size=1000,
    validation_split = 0.4,
    epochs=80,
    shuffle=True,
    verbose=2
)


predictions2 = np.round(model_2.predict(X2))

# accuracy
print("Accuracy of predicting floors = ",accuracy_score(y2,predictions2))

print("--- Run time: %s mins ---" % np.round(((time.time() - start_time)/60),2))

Train on 11495 samples, validate on 7664 samples
Epoch 1/80
1s - loss: 0.6131 - val_loss: 0.5649
Epoch 2/80
1s - loss: 0.5191 - val_loss: 0.5242
Epoch 3/80
1s - loss: 0.4546 - val_loss: 0.4949
Epoch 4/80
1s - loss: 0.4067 - val_loss: 0.4705
Epoch 5/80
1s - loss: 0.3695 - val_loss: 0.4482
Epoch 6/80
1s - loss: 0.3386 - val_loss: 0.4279
Epoch 7/80
1s - loss: 0.3124 - val_loss: 0.4091
Epoch 8/80
1s - loss: 0.2898 - val_loss: 0.3933
Epoch 9/80
1s - loss: 0.2702 - val_loss: 0.3788
Epoch 10/80
1s - loss: 0.2531 - val_loss: 0.3666
Epoch 11/80
1s - loss: 0.2381 - val_loss: 0.3563
Epoch 12/80
1s - loss: 0.2244 - val_loss: 0.3487
Epoch 13/80
1s - loss: 0.2124 - val_loss: 0.3427
Epoch 14/80
1s - loss: 0.2017 - val_loss: 0.3365
Epoch 15/80
1s - loss: 0.1926 - val_loss: 0.3324
Epoch 16/80
1s - loss: 0.1841 - val_loss: 0.3291
Epoch 17/80
1s - loss: 0.1770 - val_loss: 0.3266
Epoch 18/80
1s - loss: 0.1701 - val_loss: 0.3261
Epoch 19/80
1s - loss: 0.1639 - val_loss: 0.3243
Epoch 20/80
1s - loss: 0.1585

In [12]:
predictions = np.hstack((predictions1, predictions2)) 
y_test = np.hstack((y1,y2))  
# accuracy
print("Total Accuracy = ",accuracy_score(y_test,predictions))

print("--- Run time: %s mins ---" % np.round(((time.time() - start_time)/60),2))

Total Accuracy =  0.8584999217078135
--- Run time: 1.71 mins ---


## Predicting longitude and latitude

In [13]:
start_time = time.time()

# Define the model
model_3 = Sequential()
model_3.add(Dense(100, input_dim=523, activation='relu'))
model_3.add(BatchNormalization())
model_3.add(Dropout(0.2))
model_3.add(Dense(100, activation='relu'))
model_3.add(BatchNormalization())
model_3.add(Dense(2, activation='linear'))
model_3.compile(loss='mean_absolute_error', optimizer='adam')


# Train the model
model_3.fit(
    X3,
    y3,
    batch_size=500,
    validation_split = 0.4,
    epochs=600,
    shuffle=True,
    verbose=2
)




Train on 11495 samples, validate on 7664 samples
Epoch 1/600
4s - loss: 132.9721 - val_loss: 163.2738
Epoch 2/600
2s - loss: 132.6700 - val_loss: 162.8345
Epoch 3/600
3s - loss: 132.1702 - val_loss: 161.7280
Epoch 4/600
2s - loss: 131.4269 - val_loss: 160.5058
Epoch 5/600
2s - loss: 130.4168 - val_loss: 158.9330
Epoch 6/600
3s - loss: 129.1273 - val_loss: 156.7421
Epoch 7/600
3s - loss: 127.5527 - val_loss: 154.2955
Epoch 8/600
2s - loss: 125.6898 - val_loss: 151.7519
Epoch 9/600
2s - loss: 123.5375 - val_loss: 148.8830
Epoch 10/600
3s - loss: 121.1002 - val_loss: 144.4814
Epoch 11/600
2s - loss: 118.3739 - val_loss: 140.4018
Epoch 12/600
3s - loss: 115.3649 - val_loss: 135.9269
Epoch 13/600
3s - loss: 112.0703 - val_loss: 129.3219
Epoch 14/600
2s - loss: 108.4932 - val_loss: 125.3163
Epoch 15/600
2s - loss: 104.6389 - val_loss: 117.3968
Epoch 16/600
3s - loss: 100.5002 - val_loss: 109.6164
Epoch 17/600
2s - loss: 96.0917 - val_loss: 102.6538
Epoch 18/600
2s - loss: 91.4231 - val_loss:

2s - loss: 6.2836 - val_loss: 11.7356
Epoch 159/600
2s - loss: 5.7861 - val_loss: 11.0641
Epoch 160/600
3s - loss: 5.7428 - val_loss: 11.1366
Epoch 161/600
3s - loss: 5.9080 - val_loss: 11.1447
Epoch 162/600
2s - loss: 5.7181 - val_loss: 10.9388
Epoch 163/600
2s - loss: 5.7810 - val_loss: 11.0678
Epoch 164/600
2s - loss: 5.9613 - val_loss: 11.3402
Epoch 165/600
2s - loss: 5.8865 - val_loss: 10.9939
Epoch 166/600
2s - loss: 5.6959 - val_loss: 10.9359
Epoch 167/600
2s - loss: 5.7293 - val_loss: 11.2639
Epoch 168/600
2s - loss: 5.7863 - val_loss: 11.0119
Epoch 169/600
3s - loss: 5.7667 - val_loss: 10.9843
Epoch 170/600
3s - loss: 6.0770 - val_loss: 10.9541
Epoch 171/600
3s - loss: 6.1157 - val_loss: 11.0936
Epoch 172/600
3s - loss: 5.6943 - val_loss: 10.9601
Epoch 173/600
2s - loss: 6.2092 - val_loss: 11.1117
Epoch 174/600
2s - loss: 5.7933 - val_loss: 11.0736
Epoch 175/600
2s - loss: 5.6155 - val_loss: 10.9954
Epoch 176/600
2s - loss: 5.9603 - val_loss: 11.0089
Epoch 177/600
3s - loss: 5

2s - loss: 5.3695 - val_loss: 10.7218
Epoch 317/600
2s - loss: 5.2909 - val_loss: 10.7357
Epoch 318/600
3s - loss: 4.6897 - val_loss: 10.8633
Epoch 319/600
2s - loss: 4.5433 - val_loss: 10.7651
Epoch 320/600
2s - loss: 5.5236 - val_loss: 10.7766
Epoch 321/600
2s - loss: 5.3337 - val_loss: 10.6746
Epoch 322/600
2s - loss: 4.9865 - val_loss: 10.7007
Epoch 323/600
2s - loss: 4.8470 - val_loss: 10.7540
Epoch 324/600
2s - loss: 4.7893 - val_loss: 10.7275
Epoch 325/600
2s - loss: 4.3586 - val_loss: 10.7157
Epoch 326/600
2s - loss: 4.5988 - val_loss: 10.8207
Epoch 327/600
2s - loss: 4.6553 - val_loss: 10.7596
Epoch 328/600
3s - loss: 4.8619 - val_loss: 10.8267
Epoch 329/600
2s - loss: 4.8303 - val_loss: 10.8544
Epoch 330/600
2s - loss: 4.9257 - val_loss: 10.7021
Epoch 331/600
2s - loss: 4.4184 - val_loss: 10.7538
Epoch 332/600
2s - loss: 5.0205 - val_loss: 10.6665
Epoch 333/600
2s - loss: 5.5980 - val_loss: 10.6466
Epoch 334/600
2s - loss: 4.4859 - val_loss: 10.7663
Epoch 335/600
2s - loss: 5

2s - loss: 4.6697 - val_loss: 10.7215
Epoch 475/600
2s - loss: 4.7608 - val_loss: 10.7050
Epoch 476/600
3s - loss: 4.6199 - val_loss: 10.6401
Epoch 477/600
2s - loss: 4.9452 - val_loss: 10.7171
Epoch 478/600
3s - loss: 5.0040 - val_loss: 10.7637
Epoch 479/600
3s - loss: 4.8358 - val_loss: 10.7742
Epoch 480/600
3s - loss: 4.3834 - val_loss: 10.9986
Epoch 481/600
2s - loss: 4.0708 - val_loss: 10.6170
Epoch 482/600
2s - loss: 4.9722 - val_loss: 10.7239
Epoch 483/600
2s - loss: 5.2784 - val_loss: 10.6165
Epoch 484/600
2s - loss: 4.6564 - val_loss: 10.7037
Epoch 485/600
2s - loss: 4.4802 - val_loss: 10.6692
Epoch 486/600
2s - loss: 4.7428 - val_loss: 10.6405
Epoch 487/600
2s - loss: 4.7395 - val_loss: 10.6692
Epoch 488/600
2s - loss: 5.6228 - val_loss: 10.9208
Epoch 489/600
3s - loss: 4.4320 - val_loss: 10.6981
Epoch 490/600
2s - loss: 5.2941 - val_loss: 10.8854
Epoch 491/600
2s - loss: 5.0225 - val_loss: 10.7827
Epoch 492/600
3s - loss: 4.7954 - val_loss: 10.7109
Epoch 493/600
3s - loss: 5

<keras.callbacks.History at 0x7f00b8787780>

In [14]:
predictions3 = (model_3.predict(X3))

# accuracy
print("RMSE of predicting LONGTITUDE = ", mean_absolute_error(y3[:,0],predictions3[:,0]))
print("RMSE of predicting LATITUDE = ", mean_absolute_error(y3[:,1],predictions3[:,1]))


print("--- Run time: %s mins ---" % np.round(((time.time() - start_time)/60),2))

RMSE of predicting LONGTITUDE =  5.858654325928133
RMSE of predicting LATITUDE =  4.733857631419396
--- Run time: 29.24 mins ---
