First, we load the data.

In [1]:
import pandas as pd

whole_train_df = pd.read_csv('uji_wifi/UJIndoorLoc/trainingData.csv')
test_df = pd.read_csv('uji_wifi/UJIndoorLoc/ValidationData.csv')

We split the training data into training and validation sets using the USERID field.

In [51]:
train_mask = whole_train_df['USERID'] <= 13
train_df = whole_train_df[train_mask]
val_df = whole_train_df[~train_mask]
print(train_df.shape, val_df.shape)

(15647, 529) (4290, 529)


We put the data into NumPy arrays. Lack of AP measurement is represented by RSSI value of -110dBm.

In [52]:
import numpy as np

train_X = train_df.iloc[:,0:520].to_numpy()
train_building = train_df["BUILDINGID"].to_numpy(dtype=np.int64)
train_floor = train_df["FLOOR"].to_numpy(dtype=np.int64)
train_long = train_df["LONGITUDE"].to_numpy()
train_lat = train_df["LATITUDE"].to_numpy()

val_X = val_df.iloc[:,0:520].to_numpy()
val_building = val_df["BUILDINGID"].to_numpy(dtype=np.int64)
val_floor = val_df["FLOOR"].to_numpy(dtype=np.int64)
val_long = val_df["LONGITUDE"].to_numpy()
val_lat = val_df["LATITUDE"].to_numpy()

test_X = test_df.iloc[:,0:520].to_numpy()
test_building = test_df["BUILDINGID"].to_numpy(dtype=np.int64)
test_floor = test_df["FLOOR"].to_numpy(dtype=np.int64)
test_long = test_df["LONGITUDE"].to_numpy()
test_lat = test_df["LATITUDE"].to_numpy()


train_X[train_X == 100] = -110
val_X[val_X == 100] = -110
test_X[test_X == 100] = -110

Some functions that we will use for to evaluate our model.

In [53]:
from sklearn.metrics import mean_squared_error


def accuracy(pred_Y, true_Y):
    return np.sum(pred_Y == true_Y)/len(true_Y)

def distance_rmse(pred_long, pred_lat, true_long, true_lat):
    sq_dist = (pred_long - true_long)**2 + (pred_lat - true_lat)**2
    return np.sqrt(np.sum(sq_dist)/len(sq_dist))

def dist_mean_error(pred_long, pred_lat, true_long, true_lat):
    dist = np.sqrt((pred_long - true_long)**2 + (pred_lat - true_lat)**2)
    return np.sum(dist)/len(pred_long)

We use a simple MLP with only one hidden layer and batch normalization.

In [54]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class MLPClf(nn.Module):
    def __init__(self, num_outputs=1):
        super(MLPClf, self).__init__()
        
        self.linear1 = nn.Linear(520, 256) 
        self.bn1 = nn.BatchNorm1d(256)

        self.final = nn.Linear(256, num_outputs)
      
    def forward(self, x):
        x = F.leaky_relu(self.bn1(self.linear1(x)))
    
        x = self.final(x)
        return x

Below is the function we will use to train our model.

In [55]:
import time
import copy

def train(model, train_loader, val_loader, optimizer, criterion, scheduler, num_epochs, patience=10, transform=None):
    data_loaders = {'train': train_loader, 'val': val_loader}
    data_lengths = {'train': len(train_loader), 'val': len(val_loader)}
    best_val_loss = None
    epochs_no_improve = 0
    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}')
        epoch_start = time.time()
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
            running_loss = 0.0
            for i, data in enumerate(data_loaders[phase]):   
                inputs, targets = data
                inputs, targets = inputs.cuda(),targets.cuda()
                if phase == 'train' and transform is not None:
                    inputs = transform(inputs)

                optimizer.zero_grad()

                outputs = model(inputs).squeeze(-1) 
                loss = criterion(outputs,targets)  
                
                if phase == 'train':
                    loss.backward() 
                    optimizer.step() 

                running_loss += loss.item()
            
            epoch_loss = running_loss/data_lengths[phase]
            if phase == 'train':
                print(f'Training loss : {epoch_loss}')
                print(f'Learning rate : {optimizer.param_groups[0]["lr"]}')
            else:
                scheduler.step(epoch_loss)
                print(f'Validation loss : {epoch_loss}')
                if best_val_loss is None or epoch_loss < best_val_loss:
                    epochs_no_improve = 0
                    best_val_loss = epoch_loss
                    best_model_wts = copy.deepcopy(model.state_dict())
                else:
                    epochs_no_improve += 1
                    if epochs_no_improve >= patience:
                        print('Early stopping')
                        model.load_state_dict(best_model_wts)
                        return model
                
        print(f'Epoch duration : {time.time() - epoch_start}')
    model.load_state_dict(best_model_wts)
    return model

The following function will be used to make predictions with our trained model.

In [56]:
def predict(model, X):
    model.eval()
    with torch.no_grad():
        inputs = torch.tensor(normalize_rssi(X), dtype=torch.float32).cuda()
        outputs = model(inputs).squeeze(-1)
        return outputs.cpu().detach().numpy()

Input normalization gives us better results.

In [57]:
def normalize_rssi(rssi):
    return (110 + rssi)/110

We add white Gaussian noise to input data to reduce overfitting.

In [58]:
class AddGaussianNoise:
    def __init__(self, mean=0., std=1.):
        self.std = std
        self.mean = mean
        
    def __call__(self, tensor):
        return tensor + torch.randn(tensor.size()).cuda() * self.std + self.mean
    
    def __repr__(self):
        return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)

In [59]:
from torch.utils.data import Dataset, DataLoader, TensorDataset


train_loader = DataLoader(TensorDataset(torch.tensor(normalize_rssi(train_X), dtype=torch.float32), torch.tensor(train_building)), batch_size=128, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(TensorDataset(torch.tensor(normalize_rssi(val_X), dtype=torch.float32), torch.tensor(val_building)), batch_size=128, shuffle=False, num_workers=4, pin_memory=True)

Training of the building classifier :

In [60]:
from torch.optim import lr_scheduler
import torch.optim as optim

building_clf = MLPClf(num_outputs=3).cuda()

optimizer = optim.Adam(building_clf.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=10, verbose=True)

building_clf = train(building_clf, train_loader, val_loader, optimizer, criterion, scheduler, num_epochs=200, patience=20, transform=AddGaussianNoise(0.0, 0.1))

Epoch 1
Training loss : 0.12450788999960674
Learning rate : 0.001
Validation loss : 0.019261302578824517
Epoch duration : 1.6629669666290283
Epoch 2
Training loss : 0.05315937665934727
Learning rate : 0.001
Validation loss : 0.013369251083692206
Epoch duration : 1.550142765045166
Epoch 3
Training loss : 0.048001321214364796
Learning rate : 0.001
Validation loss : 0.010435926489555962
Epoch duration : 1.957895278930664
Epoch 4
Training loss : 0.04567119433749013
Learning rate : 0.001
Validation loss : 0.010584441272358698
Epoch duration : 1.6323180198669434
Epoch 5
Training loss : 0.03816487384218026
Learning rate : 0.001
Validation loss : 0.010306759979250594
Epoch duration : 1.5441865921020508
Epoch 6
Training loss : 0.039542515875726214
Learning rate : 0.001
Validation loss : 0.01148200830870979
Epoch duration : 1.5804731845855713
Epoch 7
Training loss : 0.03701384346629304
Learning rate : 0.001
Validation loss : 0.013008972057244083
Epoch duration : 1.798020362854004
Epoch 8
Trainin

Evaluation of the building classifier on the validation set.

In [61]:
pred_building = np.argmax(predict(building_clf, val_X), axis=1)

print('Validation building accuracy : ', accuracy(pred_building, val_building))

Validation building accuracy :  0.9983682983682983


Training of the floor classifier :

In [62]:
from torch.utils.data import Dataset, DataLoader, TensorDataset


train_loader = DataLoader(TensorDataset(torch.tensor(normalize_rssi(train_X), dtype=torch.float32), torch.tensor(train_floor)), batch_size=128, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(TensorDataset(torch.tensor(normalize_rssi(val_X), dtype=torch.float32), torch.tensor(val_floor)), batch_size=128, shuffle=False, num_workers=4, pin_memory=True)

floor_clf = MLPClf(num_outputs=5).cuda()

optimizer = optim.Adam(floor_clf.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=10, verbose=True)

floor_clf = train(floor_clf, train_loader, val_loader, optimizer, criterion, scheduler, num_epochs=200, patience=20, transform=AddGaussianNoise(0.0, 0.1))

Epoch 1
Training loss : 0.6665984622346677
Learning rate : 0.001
Validation loss : 0.3451508802964407
Epoch duration : 1.493243932723999
Epoch 2
Training loss : 0.4123702911826653
Learning rate : 0.001
Validation loss : 0.2820905656279886
Epoch duration : 1.5758042335510254
Epoch 3
Training loss : 0.3256432318348226
Learning rate : 0.001
Validation loss : 0.2584413898122661
Epoch duration : 1.709512710571289
Epoch 4
Training loss : 0.2719575717924087
Learning rate : 0.001
Validation loss : 0.2486893957042519
Epoch duration : 1.4719984531402588
Epoch 5
Training loss : 0.22938896122017527
Learning rate : 0.001
Validation loss : 0.22199852622168906
Epoch duration : 1.5245661735534668
Epoch 6
Training loss : 0.2082682261622049
Learning rate : 0.001
Validation loss : 0.21510299536235192
Epoch duration : 1.4582386016845703
Epoch 7
Training loss : 0.18778171814311811
Learning rate : 0.001
Validation loss : 0.23434145556872382
Epoch duration : 1.4348077774047852
Epoch 8
Training loss : 0.17716

Evalutation of the floor classifier on the validation set.

In [65]:
pred_floor = np.argmax(predict(floor_clf, val_X), axis=1)

print('Validation floor accuracy : ', accuracy(pred_floor, val_floor))

Validation floor accuracy :  0.9282051282051282


Finally, we evaluate our classifiers on the test set.

In [73]:
pred_building = np.argmax(predict(building_clf, test_X), axis=1)

print('Test building accuracy : ', accuracy(pred_building, test_building))

Test building accuracy :  0.9990999099909991


In [68]:
pred_floor = np.argmax(predict(floor_clf, test_X), axis=1)

print('Test floor accuracy : ', accuracy(pred_floor, test_floor))

Test floor accuracy :  0.9297929792979298
