# Deep Learning for Predicting Indoor Location Using WiFi Fingerprinting
Ha Vu Tran

In [1]:
# necessary Libraries
import numpy as np
import pandas as pd
import time
import pprint

#Visualizations
import matplotlib.pyplot as plt
import seaborn as sns


#Preprocessing
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA 
from scipy.sparse import lil_matrix

#Scoring Metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error



import tensorflow as tf
from keras.models import Sequential
from keras.layers import *

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Preprocess Data

In [2]:
df = pd.read_csv("challenge1.csv")

#df.head(10)

In [3]:
#df.describe()

In [4]:

#Drop unneeded data
df.drop(['Unnamed: 0', 'USERID', 'PHONEID', 'TIMESTAMP'], axis = 1, inplace=True)

#Remove "NaN" value
col = df.columns[0:520]
for i in col:
    df[i].fillna(0, inplace=True)
df.dropna(subset=['LONGITUDE','LATITUDE', 'FLOOR', 'BUILDINGID' ], inplace=True)
#trainingData.isnull().sum()



#Process "WAP" data
df.iloc[:, 0:520] = np.where(df.iloc[:, 0:520] <= 0, 
                        df.iloc[:, 0:520] + 105, 
                        df.iloc[:, 0:520] - 100)

#Process Longtitude
df.iloc[:, 520] = np.where(df.iloc[:, 520] <= 0, 
                        -df.iloc[:, 520], 
                        df.iloc[:, 520])


df.describe()

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP514,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520,LONGITUDE,LATITUDE,BUILDINGID
count,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,...,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0
mean,0.95172,0.965082,1.145415,1.150895,0.979644,1.396263,1.750822,1.821859,1.907198,1.182891,...,1.13889,1.103868,5.919202,7.535832,1.1534,1.048176,1.161856,7464.202052,4864871.0,1.213581
std,9.907839,9.94587,10.907006,10.93278,9.975353,10.995386,11.424759,11.274403,11.739711,10.859246,...,10.38636,10.499751,16.221807,16.427428,10.88876,10.432185,10.984137,123.311468,66.96052,0.832702
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7300.81899,4864746.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7359.1485,4864821.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7423.0609,4864852.0,1.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7594.2641,4864930.0,2.0
max,105.0,105.0,105.0,105.0,105.0,105.0,105.0,105.0,105.0,105.0,...,105.0,105.0,105.0,105.0,105.0,105.0,105.0,7691.3384,4865017.0,2.0


In [5]:

min_LGT = 7300.818990
min_LAT = 4.864746e+06

df.iloc[:,520] = (df.iloc[:, 520] - min_LGT + 1)
df.iloc[:,521] = (df.iloc[:, 521] - min_LAT + 1)


In [6]:
def preprocess_data(df):
    
    # split the data set into features and targets(Floor and BuildingID)
    X1 = df.drop(['LONGITUDE', 'LATITUDE', 'BUILDINGID','FLOOR'], axis=1)
    y1 = df[[ 'BUILDINGID']]
    
    X2 = df.drop(['LONGITUDE', 'LATITUDE','FLOOR'], axis=1)
    y2 = df[['FLOOR']]
    
    
    X3 = df.drop(['LONGITUDE', 'LATITUDE','FLOOR'], axis=1)
    y3 = df[[ 'LONGITUDE', 'LATITUDE']]
    
    #create Dummies for the targets to feed into the model
    y1 = pd.get_dummies(data=y1, columns=[ 'BUILDINGID'])
    y2 = pd.get_dummies(data=y2, columns=[ 'FLOOR']) 
    X3 = pd.get_dummies(data=X3, columns=['BUILDINGID']) 
    
    return X1, y1, X2, y2, X3, y3

In [7]:
X1, y1, X2, y2, X3, y3 = preprocess_data(df)

In [8]:
#Scale Data with Standard Scaler

scaler1 = StandardScaler()
scaler1.fit(X1)    
X1 = scaler1.transform(X1)

scaler2 = StandardScaler()
scaler2.fit(X2)
X2 = scaler2.transform(X2)

scaler3 = StandardScaler()
scaler3.fit(X3)
X3 = scaler3.transform(X3)


In [9]:
y1 = lil_matrix(y1).toarray()
y2 = lil_matrix(y2).toarray()
y3 = lil_matrix(y3).toarray()

# Model Training 

## Predicting buildings

In [10]:
start_time = time.time()

# Define the model
model_1 = Sequential()
model_1.add(Dense(150, input_dim=520, activation='relu'))
model_1.add(BatchNormalization())
model_1.add(Dropout(0.2))
model_1.add(Dense(150, activation='relu'))
model_1.add(BatchNormalization())
model_1.add(Dropout(0.2))
model_1.add(Dense(50, activation='relu'))
model_1.add(BatchNormalization())
model_1.add(Dense(3, activation='softmax'))
model_1.compile(loss='binary_crossentropy', optimizer='adam')


# Train the model
model_1.fit(
    X1,
    y1,
    batch_size=1000,
    validation_split = 0.4,
    epochs=10,
    shuffle=True,
    verbose=2
)

predictions1 = np.round(model_1.predict(X1))

# accuracy
print("Accuracy of predicting buildings = ",accuracy_score(y1,predictions1))

print("--- Run time: %s mins ---" % np.round(((time.time() - start_time)/60),2))

Train on 11495 samples, validate on 7664 samples
Epoch 1/10
1s - loss: 0.5720 - val_loss: 0.3406
Epoch 2/10
1s - loss: 0.2292 - val_loss: 0.1776
Epoch 3/10
1s - loss: 0.1150 - val_loss: 0.1114
Epoch 4/10
1s - loss: 0.0762 - val_loss: 0.0867
Epoch 5/10
1s - loss: 0.0557 - val_loss: 0.0761
Epoch 6/10
1s - loss: 0.0467 - val_loss: 0.0699
Epoch 7/10
1s - loss: 0.0371 - val_loss: 0.0661
Epoch 8/10
1s - loss: 0.0312 - val_loss: 0.0641
Epoch 9/10
1s - loss: 0.0254 - val_loss: 0.0628
Epoch 10/10
1s - loss: 0.0215 - val_loss: 0.0626
Accuracy of predicting buildings =  0.9851766793673992
--- Run time: 0.29 mins ---


## Predicting floors

In [11]:
start_time = time.time()

# Define the model
model_2 = Sequential()
model_2.add(Dense(150, input_dim=521, activation='relu'))
model_2.add(BatchNormalization())
model_2.add(Dropout(0.2))
model_2.add(Dense(150, activation='relu'))
model_2.add(BatchNormalization())
model_2.add(Dropout(0.2))
model_2.add(Dense(50, activation='relu'))
model_2.add(BatchNormalization())
model_2.add(Dense(5, activation='softmax'))
model_2.compile(loss='binary_crossentropy', optimizer='adam')

# Train the model
model_2.fit(
    X2,
    y2,
    batch_size=1000,
    validation_split = 0.4,
    epochs=40,
    shuffle=True,
    verbose=2
)


predictions2 = np.round(model_2.predict(X2))

# accuracy
print("Accuracy of predicting floors = ",accuracy_score(y2,predictions2))

print("--- Run time: %s mins ---" % np.round(((time.time() - start_time)/60),2))

Train on 11495 samples, validate on 7664 samples
Epoch 1/40
1s - loss: 0.5606 - val_loss: 0.5039
Epoch 2/40
1s - loss: 0.4128 - val_loss: 0.4618
Epoch 3/40
1s - loss: 0.3367 - val_loss: 0.4226
Epoch 4/40
1s - loss: 0.2800 - val_loss: 0.3890
Epoch 5/40
1s - loss: 0.2344 - val_loss: 0.3566
Epoch 6/40
1s - loss: 0.2014 - val_loss: 0.3380
Epoch 7/40
1s - loss: 0.1729 - val_loss: 0.3249
Epoch 8/40
1s - loss: 0.1562 - val_loss: 0.3177
Epoch 9/40
1s - loss: 0.1391 - val_loss: 0.3171
Epoch 10/40
1s - loss: 0.1250 - val_loss: 0.3165
Epoch 11/40
1s - loss: 0.1094 - val_loss: 0.3226
Epoch 12/40
1s - loss: 0.1010 - val_loss: 0.3271
Epoch 13/40
1s - loss: 0.0910 - val_loss: 0.3336
Epoch 14/40
1s - loss: 0.0820 - val_loss: 0.3359
Epoch 15/40
1s - loss: 0.0713 - val_loss: 0.3436
Epoch 16/40
1s - loss: 0.0666 - val_loss: 0.3582
Epoch 17/40
1s - loss: 0.0606 - val_loss: 0.3647
Epoch 18/40
1s - loss: 0.0537 - val_loss: 0.3703
Epoch 19/40
1s - loss: 0.0489 - val_loss: 0.3825
Epoch 20/40
1s - loss: 0.0438

In [12]:
predictions = np.hstack((predictions1, predictions2)) 
y_test = np.hstack((y1,y2))  
# accuracy
print("Total Accuracy = ",accuracy_score(y_test,predictions))

print("--- Run time: %s mins ---" % np.round(((time.time() - start_time)/60),2))

Total Accuracy =  0.8863197452894201
--- Run time: 0.95 mins ---


## Predicting longitude and latitude

In [13]:
start_time = time.time()

# Define the model
model_3 = Sequential()
model_3.add(Dense(300, input_dim=523, activation='relu'))
model_3.add(BatchNormalization())
model_3.add(Dropout(0.2))
model_3.add(Dense(300, activation='relu'))
model_3.add(BatchNormalization())
model_3.add(Dropout(0.2))
model_3.add(Dense(300, activation='relu'))
model_3.add(BatchNormalization())
model_3.add(Dense(2, activation='linear'))
model_3.compile(loss='mean_absolute_error', optimizer='adam')


# Train the model
model_3.fit(
    X3,
    y3,
    batch_size=500,
    validation_split = 0.4,
    epochs=400,
    shuffle=True,
    verbose=2
)




Train on 11495 samples, validate on 7664 samples
Epoch 1/400
3s - loss: 132.8572 - val_loss: 161.6397
Epoch 2/400
3s - loss: 132.1203 - val_loss: 160.1506
Epoch 3/400
3s - loss: 130.7856 - val_loss: 158.7981
Epoch 4/400
2s - loss: 128.7131 - val_loss: 153.1603
Epoch 5/400
2s - loss: 125.8321 - val_loss: 150.5759
Epoch 6/400
3s - loss: 122.0929 - val_loss: 141.7781
Epoch 7/400
3s - loss: 117.4877 - val_loss: 126.8255
Epoch 8/400
3s - loss: 112.0156 - val_loss: 113.3146
Epoch 9/400
2s - loss: 105.6634 - val_loss: 96.4094
Epoch 10/400
2s - loss: 98.4430 - val_loss: 83.0864
Epoch 11/400
2s - loss: 90.3828 - val_loss: 74.0573
Epoch 12/400
2s - loss: 81.4624 - val_loss: 69.6719
Epoch 13/400
2s - loss: 71.7460 - val_loss: 60.0020
Epoch 14/400
3s - loss: 61.3636 - val_loss: 54.2280
Epoch 15/400
2s - loss: 50.5301 - val_loss: 44.7416
Epoch 16/400
4s - loss: 40.2584 - val_loss: 40.4122
Epoch 17/400
3s - loss: 31.4249 - val_loss: 37.7248
Epoch 18/400
3s - loss: 24.8923 - val_loss: 33.8833
Epoch 1

5s - loss: 5.2324 - val_loss: 9.1994
Epoch 162/400
4s - loss: 4.6866 - val_loss: 8.9821
Epoch 163/400
4s - loss: 4.6992 - val_loss: 8.9538
Epoch 164/400
4s - loss: 5.0373 - val_loss: 8.8440
Epoch 165/400
4s - loss: 5.0023 - val_loss: 8.9132
Epoch 166/400
4s - loss: 5.3444 - val_loss: 9.0441
Epoch 167/400
4s - loss: 5.6671 - val_loss: 9.0071
Epoch 168/400
5s - loss: 4.3400 - val_loss: 8.9925
Epoch 169/400
5s - loss: 4.0438 - val_loss: 9.2363
Epoch 170/400
5s - loss: 4.9009 - val_loss: 8.7539
Epoch 171/400
4s - loss: 4.8204 - val_loss: 9.3058
Epoch 172/400
4s - loss: 4.9534 - val_loss: 8.8313
Epoch 173/400
5s - loss: 4.5802 - val_loss: 8.7908
Epoch 174/400
5s - loss: 4.9468 - val_loss: 8.9609
Epoch 175/400
4s - loss: 4.7008 - val_loss: 9.0689
Epoch 176/400
4s - loss: 4.4653 - val_loss: 8.9462
Epoch 177/400
4s - loss: 5.1212 - val_loss: 9.0489
Epoch 178/400
4s - loss: 5.0130 - val_loss: 9.2181
Epoch 179/400
4s - loss: 4.6722 - val_loss: 8.9085
Epoch 180/400
4s - loss: 5.0658 - val_loss: 8

5s - loss: 4.0178 - val_loss: 8.4874
Epoch 323/400
5s - loss: 4.3916 - val_loss: 8.5268
Epoch 324/400
5s - loss: 4.8889 - val_loss: 8.5383
Epoch 325/400
5s - loss: 3.5519 - val_loss: 8.5487
Epoch 326/400
5s - loss: 4.0665 - val_loss: 8.5436
Epoch 327/400
5s - loss: 4.0740 - val_loss: 8.5233
Epoch 328/400
4s - loss: 4.4186 - val_loss: 8.5900
Epoch 329/400
5s - loss: 3.8656 - val_loss: 8.5610
Epoch 330/400
4s - loss: 4.1650 - val_loss: 8.4637
Epoch 331/400
6s - loss: 4.4990 - val_loss: 8.6452
Epoch 332/400
6s - loss: 4.1018 - val_loss: 8.6944
Epoch 333/400
5s - loss: 3.8141 - val_loss: 8.5968
Epoch 334/400
5s - loss: 3.7440 - val_loss: 8.5582
Epoch 335/400
5s - loss: 4.1534 - val_loss: 8.5362
Epoch 336/400
5s - loss: 3.3377 - val_loss: 8.4599
Epoch 337/400
5s - loss: 3.9372 - val_loss: 8.4019
Epoch 338/400
5s - loss: 4.2305 - val_loss: 8.4412
Epoch 339/400
5s - loss: 4.3458 - val_loss: 8.5404
Epoch 340/400
5s - loss: 4.0678 - val_loss: 8.4678
Epoch 341/400
5s - loss: 3.8808 - val_loss: 8

<keras.callbacks.History at 0x7f6d7c4867b8>

In [14]:
predictions3 = (model_3.predict(X3))

# accuracy
print("RMSE of predicting LONGTITUDE = ", mean_absolute_error(y3[:,0],predictions3[:,0]))
print("RMSE of predicting LATITUDE = ", mean_absolute_error(y3[:,1],predictions3[:,1]))


print("--- Run time: %s mins ---" % np.round(((time.time() - start_time)/60),2))

RMSE of predicting LONGTITUDE =  4.345472417491149
RMSE of predicting LATITUDE =  3.695972426463191
--- Run time: 32.18 mins ---
