We load the data

In [11]:
import pandas as pd

whole_train_df = pd.read_csv('uji_wifi/UJIndoorLoc/trainingData.csv')
test_df = pd.read_csv('uji_wifi/UJIndoorLoc/ValidationData.csv')

Train/validation split

In [12]:
train_mask = whole_train_df['USERID'] <= 13
train_df = whole_train_df[train_mask]
val_df = whole_train_df[~train_mask]
print(train_df.shape, val_df.shape)

(15647, 529) (4290, 529)


Some data preparation

In [13]:
import numpy as np

train_X = train_df.iloc[:,0:520].to_numpy()
train_building = train_df["BUILDINGID"].to_numpy(dtype=np.int64)
train_floor = train_df["FLOOR"].to_numpy(dtype=np.int64)
train_long = train_df["LONGITUDE"].to_numpy()
train_lat = train_df["LATITUDE"].to_numpy()

val_X = val_df.iloc[:,0:520].to_numpy()
val_building = val_df["BUILDINGID"].to_numpy(dtype=np.int64)
val_floor = val_df["FLOOR"].to_numpy(dtype=np.int64)
val_long = val_df["LONGITUDE"].to_numpy()
val_lat = val_df["LATITUDE"].to_numpy()

test_X = test_df.iloc[:,0:520].to_numpy()
test_building = test_df["BUILDINGID"].to_numpy(dtype=np.int64)
test_floor = test_df["FLOOR"].to_numpy(dtype=np.int64)
test_long = test_df["LONGITUDE"].to_numpy()
test_lat = test_df["LATITUDE"].to_numpy()


train_X[train_X == 100] = -110
val_X[val_X == 100] = -110
test_X[test_X == 100] = -110

Evaluation metrics

In [14]:
from sklearn.metrics import mean_squared_error


def accuracy(pred_Y, true_Y):
    return np.sum(pred_Y == true_Y)/len(true_Y)

def distance_rmse(pred_long, pred_lat, true_long, true_lat):
    sq_dist = (pred_long - true_long)**2 + (pred_lat - true_lat)**2
    return np.sqrt(np.sum(sq_dist)/len(sq_dist))

def dist_mean_error(pred_long, pred_lat, true_long, true_lat):
    dist = np.sqrt((pred_long - true_long)**2 + (pred_lat - true_lat)**2)
    return np.sum(dist)/len(pred_long)

Training of the k-NN models for building and floor classification

In [None]:
from sklearn.neighbors import KNeighborsClassifier

def knn_weight(d):
    return 1/(d+0.000001)**2

building_knn_clf = KNeighborsClassifier(n_neighbors=5,weights=knn_weight)
floor_knn_clf = KNeighborsClassifier(n_neighbors=5,weights=knn_weight)

building_knn_clf.fit(train_X, train_building)
floor_knn_clf.fit(train_X, train_floor)

Evaluation on the validation set

In [24]:
pred_building = building_knn_clf.predict(val_X)
pred_floor = floor_knn_clf.predict(val_X)

print('Val building accuracy : ', accuracy(pred_building, val_building))
print('Val floor accuracy : ', accuracy(pred_floor, val_floor))

Val building accuracy :  0.9731934731934732
Val floor accuracy :  0.9072261072261072


Training of the k-NN models for longitude/latitude estimation

In [17]:
from sklearn.neighbors import KNeighborsRegressor

long_knn_reg = KNeighborsRegressor(n_neighbors=3, weights=knn_weight)
lat_knn_reg = KNeighborsRegressor(n_neighbors=3, weights=knn_weight)

long_knn_reg.fit(train_X, train_long)
lat_knn_reg.fit(train_X, train_lat)

KNeighborsRegressor(n_neighbors=3,
                    weights=<function knn_weight at 0x7efedaaec170>)

Evaluation on the validation set

In [18]:
pred_long = long_knn_reg.predict(val_X)
pred_lat = lat_knn_reg.predict(val_X)

long_rmse = mean_squared_error(pred_long, val_long, squared=False)
lat_rmse = mean_squared_error(pred_lat, val_lat, squared=False)
dist_rmse = distance_rmse(pred_long, pred_lat, val_long, val_lat)
dist_me = dist_mean_error(pred_long, pred_lat, val_long, val_lat)

print('Val longitude RMSE : ', long_rmse)
print('Val latitude RMSE : ', lat_rmse)
print('Val distance RMSE : ', dist_rmse)
print('Val distance ME : ', dist_me)

Val longitude RMSE :  13.157371208395501
Val latitude RMSE :  8.522246644395064
Val distance RMSE :  15.676259279031392
Val distance ME :  8.9558595722621


Evaluation on the test set

In [19]:
pred_building = building_knn_clf.predict(test_X)
pred_floor = floor_knn_clf.predict(test_X)
print('Test building accuracy : ', accuracy(pred_building, test_building))
print('Test floor accuracy : ', accuracy(pred_floor, test_floor))

Test building accuracy :  0.9954995499549955
Test floor accuracy :  0.891989198919892


In [20]:
pred_long = long_knn_reg.predict(test_X)
pred_lat = lat_knn_reg.predict(test_X)

long_rmse = mean_squared_error(pred_long, test_long, squared=False)
lat_rmse = mean_squared_error(pred_lat, test_lat, squared=False)
dist_rmse = distance_rmse(pred_long, pred_lat, test_long, test_lat)
dist_me = dist_mean_error(pred_long, pred_lat, test_long, test_lat)


print('Test longitude RMSE : ', long_rmse)
print('Test latitude RMSE : ', lat_rmse)
print('Test distance RMSE : ', dist_rmse)
print('Test distance ME : ', dist_me)


Test longitude RMSE :  12.278969871721623
Test latitude RMSE :  10.25358185612303
Test distance RMSE :  15.997157309686704
Test distance ME :  9.25949772687459
