# Deep Learning for Predicting Indoor Location Using WiFi Fingerprinting
Ha Vu Tran

In [1]:
# necessary Libraries
import numpy as np
import pandas as pd
import time
import pprint

#Visualizations
import matplotlib.pyplot as plt
import seaborn as sns


#Preprocessing
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA 
from scipy.sparse import lil_matrix

#Scoring Metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error


import tensorflow as tf
from keras.models import Sequential
from keras.layers import *

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Preprocess Data

In [15]:
df = pd.read_csv("challenge1.csv")

#df.head(10)

In [16]:
#df.describe()

In [17]:

#Drop unneeded data
df.drop(['Unnamed: 0', 'USERID', 'PHONEID', 'TIMESTAMP'], axis = 1, inplace=True)

#Remove "NaN" value
col = df.columns[0:520]
for i in col:
    df[i].fillna(0, inplace=True)
df.dropna(subset=['LONGITUDE','LATITUDE', 'FLOOR', 'BUILDINGID' ], inplace=True)
#trainingData.isnull().sum()



#Process "WAP" data
df.iloc[:, 0:520] = np.where(df.iloc[:, 0:520] <= 0, 
                        df.iloc[:, 0:520] + 105, 
                        df.iloc[:, 0:520] - 100)

#Process Longtitude
df.iloc[:, 520] = np.where(df.iloc[:, 520] <= 0, 
                        -df.iloc[:, 520], 
                        df.iloc[:, 520])


df.describe()

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP514,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520,LONGITUDE,LATITUDE,BUILDINGID
count,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,...,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0,19159.0
mean,0.95172,0.965082,1.145415,1.150895,0.979644,1.396263,1.750822,1.821859,1.907198,1.182891,...,1.13889,1.103868,5.919202,7.535832,1.1534,1.048176,1.161856,7464.202052,4864871.0,1.213581
std,9.907839,9.94587,10.907006,10.93278,9.975353,10.995386,11.424759,11.274403,11.739711,10.859246,...,10.38636,10.499751,16.221807,16.427428,10.88876,10.432185,10.984137,123.311468,66.96052,0.832702
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7300.81899,4864746.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7359.1485,4864821.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7423.0609,4864852.0,1.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7594.2641,4864930.0,2.0
max,105.0,105.0,105.0,105.0,105.0,105.0,105.0,105.0,105.0,105.0,...,105.0,105.0,105.0,105.0,105.0,105.0,105.0,7691.3384,4865017.0,2.0


In [18]:

min_LGT = 7300.818990
min_LAT = 4.864746e+06

df.iloc[:,520] = (df.iloc[:, 520] - min_LGT + 1)
df.iloc[:,521] = (df.iloc[:, 521] - min_LAT + 1)


In [19]:
def preprocess_data(df):
    
    # split the data set into features and targets(Floor and BuildingID)
    X1 = df.drop(['LONGITUDE', 'LATITUDE', 'BUILDINGID','FLOOR'], axis=1)
    y1 = df[[ 'BUILDINGID']]
    
    X2 = df.drop(['LONGITUDE', 'LATITUDE','FLOOR'], axis=1)
    y2 = df[['FLOOR']]
    
    
    X3 = df.drop(['LONGITUDE', 'LATITUDE','FLOOR'], axis=1)
    y3 = df[[ 'LONGITUDE', 'LATITUDE']]
    
    #create Dummies for the targets to feed into the model
    y1 = pd.get_dummies(data=y1, columns=[ 'BUILDINGID'])
    y2 = pd.get_dummies(data=y2, columns=[ 'FLOOR']) 
    X3 = pd.get_dummies(data=X3, columns=['BUILDINGID']) 
    
    return X1, y1, X2, y2, X3, y3

In [20]:
def split_data(X, y):
    
    X_train, X_test, y_train, y_test = train_test_split(X, 
                                                        y, 
                                                        test_size =  0.1, 
                                                        random_state = 42,
                                                        shuffle=True)

    # Show the results of the split
    print("Training set has {} samples.".format(X_train.shape[0]))
    print("Testing set has {} samples.".format(X_test.shape[0]))
    return X_train, X_test, y_train, y_test

In [21]:
X1, y1, X2, y2, X3, y3 = preprocess_data(df)
X_train1, X_test1, y_train1, y_test1 = split_data(X1,y1)
X_train2, X_test2, y_train2, y_test2 = split_data(X2,y2)
X_train3, X_test3, y_train3, y_test3 = split_data(X3,y3)

Training set has 17243 samples.
Testing set has 1916 samples.
Training set has 17243 samples.
Testing set has 1916 samples.
Training set has 17243 samples.
Testing set has 1916 samples.


In [22]:
#Scale Data with Standard Scaler

scaler1 = StandardScaler()
scaler1.fit(X_train1)
    
# Apply transform to both the training set and the test set.
X_train1 = scaler1.transform(X_train1)
X_test1 = scaler1.transform(X_test1)

scaler2 = StandardScaler()
scaler2.fit(X_train2)
    
# Apply transform to both the training set and the test set.
X_train2 = scaler2.transform(X_train2)
X_test2 = scaler2.transform(X_test2)

scaler3 = StandardScaler()
scaler3.fit(X_train3)
    
# Apply transform to both the training set and the test set.
X_train3 = scaler3.transform(X_train3)
X_test3 = scaler3.transform(X_test3)


In [23]:
y_train1 = lil_matrix(y_train1).toarray()
y_test1 = lil_matrix(y_test1).toarray()
y_train2 = lil_matrix(y_train2).toarray()
y_test2 = lil_matrix(y_test2).toarray()
y_train3 = lil_matrix(y_train3).toarray()
y_test3 = lil_matrix(y_test3).toarray()

# Model Training 

## Predicting buildings

In [11]:
start_time = time.time()

# Define the model
model_1 = Sequential()
model_1.add(Dense(50, input_dim=520, activation='relu'))
model_1.add(Dense(80, activation='relu'))
model_1.add(Dense(50, activation='relu'))
model_1.add(Dense(3, activation='softmax'))
model_1.compile(loss='binary_crossentropy', optimizer='adam')

# Train the model
model_1.fit(
    X_train1,
    y_train1,
    epochs=70,
    shuffle=True,
    verbose=2
)

predictions1 = np.round(model_1.predict(X_test1))

# accuracy
print("Accuracy of predicting buildings = ",accuracy_score(y_test1,predictions1))

print("--- Run time: %s mins ---" % np.round(((time.time() - start_time)/60),2))

Epoch 1/70
2s - loss: 0.1154
Epoch 2/70
2s - loss: 0.0252
Epoch 3/70
2s - loss: 0.0107
Epoch 4/70
2s - loss: 0.0046
Epoch 5/70
2s - loss: 0.0018
Epoch 6/70
1s - loss: 5.5615e-04
Epoch 7/70
1s - loss: 0.0042
Epoch 8/70
1s - loss: 0.0104
Epoch 9/70
1s - loss: 0.0047
Epoch 10/70
1s - loss: 6.5234e-04
Epoch 11/70
1s - loss: 1.4587e-04
Epoch 12/70
1s - loss: 3.3420e-05
Epoch 13/70
2s - loss: 2.0977e-05
Epoch 14/70
2s - loss: 1.4159e-05
Epoch 15/70
2s - loss: 9.7611e-06
Epoch 16/70
2s - loss: 6.9269e-06
Epoch 17/70
1s - loss: 4.9394e-06
Epoch 18/70
1s - loss: 3.5638e-06
Epoch 19/70
1s - loss: 2.5852e-06
Epoch 20/70
2s - loss: 1.8994e-06
Epoch 21/70
1s - loss: 1.4008e-06
Epoch 22/70
1s - loss: 1.0485e-06
Epoch 23/70
1s - loss: 7.8927e-07
Epoch 24/70
1s - loss: 6.0511e-07
Epoch 25/70
1s - loss: 4.6814e-07
Epoch 26/70
2s - loss: 3.6839e-07
Epoch 27/70
1s - loss: 2.9624e-07
Epoch 28/70
1s - loss: 2.4424e-07
Epoch 29/70
1s - loss: 2.0650e-07
Epoch 30/70
1s - loss: 1.7911e-07
Epoch 31/70
2s - loss

## Predicting floors

In [12]:
start_time = time.time()

# Define the model
model_2 = Sequential()
model_2.add(Dense(50, input_dim=521, activation='relu'))
model_2.add(Dense(80, activation='relu'))
model_2.add(Dense(50, activation='relu'))
model_2.add(Dense(5, activation='softmax'))
model_2.compile(loss='binary_crossentropy', optimizer='adam')

# Train the model
model_2.fit(
    X_train2,
    y_train2,
    epochs=100,
    shuffle=True,
    verbose=2
)

predictions2 = np.round(model_2.predict(X_test2))

# accuracy
print("Accuracy of predicting floors = ",accuracy_score(y_test2,predictions2))

print("--- Run time: %s mins ---" % np.round(((time.time() - start_time)/60),2))

Epoch 1/100
1s - loss: 0.2696
Epoch 2/100
1s - loss: 0.1431
Epoch 3/100
1s - loss: 0.0946
Epoch 4/100
1s - loss: 0.0614
Epoch 5/100
1s - loss: 0.0397
Epoch 6/100
1s - loss: 0.0284
Epoch 7/100
1s - loss: 0.0207
Epoch 8/100
1s - loss: 0.0171
Epoch 9/100
1s - loss: 0.0139
Epoch 10/100
1s - loss: 0.0129
Epoch 11/100
2s - loss: 0.0119
Epoch 12/100
2s - loss: 0.0098
Epoch 13/100
2s - loss: 0.0081
Epoch 14/100
2s - loss: 0.0075
Epoch 15/100
2s - loss: 0.0066
Epoch 16/100
2s - loss: 0.0080
Epoch 17/100
3s - loss: 0.0095
Epoch 18/100
3s - loss: 0.0080
Epoch 19/100
3s - loss: 0.0080
Epoch 20/100
2s - loss: 0.0055
Epoch 21/100
2s - loss: 0.0099
Epoch 22/100
2s - loss: 0.0085
Epoch 23/100
2s - loss: 0.0043
Epoch 24/100
2s - loss: 0.0024
Epoch 25/100
2s - loss: 0.0024
Epoch 26/100
2s - loss: 0.0036
Epoch 27/100
2s - loss: 0.0140
Epoch 28/100
2s - loss: 0.0089
Epoch 29/100
2s - loss: 0.0038
Epoch 30/100
2s - loss: 0.0024
Epoch 31/100
1s - loss: 0.0015
Epoch 32/100
1s - loss: 0.0033
Epoch 33/100
1s -

In [13]:
predictions = np.hstack((predictions1, predictions2)) 
y_test = np.hstack((y_test1, y_test2))  
# accuracy
print("Total Accuracy = ",accuracy_score(y_test,predictions))

print("--- Run time: %s mins ---" % np.round(((time.time() - start_time)/60),2))

Total Accuracy =  0.8596033402922756
--- Run time: 3.41 mins ---


## Predicting longitude and latitude

In [24]:
start_time = time.time()

# Define the model
model_3 = Sequential()
model_3.add(Dense(50, input_dim=523, activation='relu'))
model_3.add(Dense(80, activation='relu'))
model_3.add(Dense(50, activation='relu'))
model_3.add(Dense(2, activation='linear'))
model_3.compile(loss='mean_squared_error', optimizer='adam')

# Train the model
model_3.fit(
    X_train3,
    y_train3,
    epochs=100,
    shuffle=True,
    verbose=2
)

predictions3 = (model_3.predict(X_test3))

# accuracy
print("RMSE of predicting LONGTITUDE = ", mean_squared_error(y_test3[:,0],predictions3[:,0])**(0.5))
print("RMSE of predicting LATITUDE = ", mean_squared_error(y_test3[:,1],predictions3[:,1])**(0.5))


print("--- Run time: %s mins ---" % np.round(((time.time() - start_time)/60),2))

Epoch 1/100
1s - loss: 4890.1547
Epoch 2/100
1s - loss: 677.9568
Epoch 3/100
1s - loss: 413.2666
Epoch 4/100
1s - loss: 305.4853
Epoch 5/100
1s - loss: 247.4825
Epoch 6/100
1s - loss: 206.2158
Epoch 7/100
2s - loss: 177.8666
Epoch 8/100
1s - loss: 158.9924
Epoch 9/100
1s - loss: 144.6003
Epoch 10/100
1s - loss: 129.9483
Epoch 11/100
1s - loss: 120.8270
Epoch 12/100
1s - loss: 111.2803
Epoch 13/100
1s - loss: 102.1603
Epoch 14/100
1s - loss: 95.0603
Epoch 15/100
1s - loss: 89.6231
Epoch 16/100
1s - loss: 84.2001
Epoch 17/100
1s - loss: 80.5839
Epoch 18/100
1s - loss: 77.3686
Epoch 19/100
1s - loss: 72.7160
Epoch 20/100
1s - loss: 69.2387
Epoch 21/100
1s - loss: 67.0740
Epoch 22/100
1s - loss: 64.6244
Epoch 23/100
1s - loss: 63.5106
Epoch 24/100
1s - loss: 60.4678
Epoch 25/100
1s - loss: 58.7425
Epoch 26/100
1s - loss: 55.5861
Epoch 27/100
1s - loss: 54.2645
Epoch 28/100
1s - loss: 52.4753
Epoch 29/100
1s - loss: 53.4318
Epoch 30/100
1s - loss: 51.9022
Epoch 31/100
1s - loss: 49.8759
Epo