# Indoor localization

An indoor positioning system (IPS) is a system to locate objects or people inside a building using radio waves, magnetic fields, acoustic signals, or other sensory information collected by mobile devices. There are several commercial systems on the market, but there is no standard for an IPS system.

IPSes use different technologies, including distance measurement to nearby anchor nodes (nodes with known positions, e.g., WiFi access points), magnetic positioning, dead reckoning. They either actively locate mobile devices and tags or provide ambient location or environmental context for devices to get sensed.

According to the [report](https://www.marketsandmarkets.com/Market-Reports/indoor-positioning-navigation-ipin-market-989.html), the global indoor location market size is expected to grow from USD 7.11 Billion in 2017 to USD 40.99 Billion by 2022, at a Compound Annual Growth Rate (CAGR) of 42.0% during the forecast period. Hassle-free navigation, improved decision-making, and increased adoption of connected devices are boosting the growth of the indoor location market across the globe.

In this problem, you are going to use signals from seven different wi-fi access points to define in which room the user is located.

In [2]:
import pandas
import numpy as np
import xgboost
from sklearn.metrics import accuracy_score, f1_score

Loading the data and breaking it into training and cross-validation sets.

In [3]:
train_set = pandas.read_csv('train_set.csv')
cv_set = pandas.read_csv('cv_set.csv')

train_data = train_set[['wifi'+str(i) for i in range(1, len(train_set.columns) - 1)]]
train_labels = train_set['room']
cv_data = cv_set[['wifi'+str(i) for i in range(1, len(cv_set.columns) - 1)]]
cv_labels = cv_set['room']

In [4]:
print(train_data[:10])
print(train_labels[:10])


   wifi1  wifi2  wifi3  wifi4  wifi5  wifi6  wifi7
0    -68    -57    -61    -65    -71    -85    -85
1    -63    -60    -60    -67    -76    -85    -84
2    -61    -60    -68    -62    -77    -90    -80
3    -65    -61    -65    -67    -69    -87    -84
4    -61    -63    -58    -66    -74    -87    -82
5    -62    -60    -66    -68    -80    -86    -91
6    -65    -59    -61    -67    -72    -86    -81
7    -63    -57    -61    -65    -73    -84    -84
8    -66    -60    -65    -62    -70    -85    -83
9    -67    -60    -59    -61    -71    -86    -91
0    1
1    1
2    1
3    1
4    1
5    1
6    1
7    1
8    1
9    1
Name: room, dtype: int64


In [5]:
print(cv_data[:10])
print(cv_labels[:10])

   wifi1  wifi2  wifi3  wifi4  wifi5  wifi6  wifi7
0    -64    -56    -61    -66    -71    -82    -81
1    -63    -65    -60    -63    -77    -81    -87
2    -64    -55    -63    -66    -76    -88    -83
3    -65    -60    -59    -63    -76    -86    -82
4    -67    -61    -62    -67    -77    -83    -91
5    -61    -59    -65    -63    -74    -89    -87
6    -63    -56    -63    -65    -72    -82    -89
7    -66    -59    -64    -68    -68    -97    -83
8    -67    -57    -64    -71    -75    -89    -87
9    -63    -57    -59    -67    -71    -82    -93
0    1
1    1
2    1
3    1
4    1
5    1
6    1
7    1
8    1
9    1
Name: room, dtype: int64


In [6]:
train_labels.describe()
# train_data.sample(5, random_state = 0)

count    1603.000000
mean        2.503431
std         1.109130
min         1.000000
25%         2.000000
50%         3.000000
75%         3.000000
max         4.000000
Name: room, dtype: float64

### Training XGBoost regressor

In [7]:
dtrain = xgboost.DMatrix(train_data, label=train_labels)
dtest = xgboost.DMatrix(cv_data)

In [8]:
model = xgboost.XGBClassifier()

In [9]:
m = model.fit(train_data,train_labels)
preds = m.predict(cv_data)
print('--Accuracy score:\t{metric:.4f}'.format(metric=accuracy_score(y_pred=preds, y_true=cv_labels)))
print('--F1 score:\t{metric:.4f}'.format(metric=f1_score(y_pred=preds, y_true=cv_labels, average = 'macro')))

--Accuracy score:	0.9824
--F1 score:	0.9810


  if diff:


### Tuning hyperparameters

In [12]:
import hyperopt
from hyperopt import hp, Trials, fmin, tpe
from hyperopt.pyll import scope
from sklearn.model_selection import KFold, cross_val_score
from time import sleep

In [13]:
class Model(object):
    

    def __init__(self):
        self._model = None
        self._best_params = None
        self._best_score = None

    def fit(self, x_train, y_train):
        """
        Fit the model
        """

        
        space = {
            'eta':
               scope.float(hp.qloguniform('dtree_eta', np.log(1e-4), np.log(1), 1e-5)),
#             'gamma':
#                scope.int(hp.qloguniform('dtree_gamma', np.log(1), np.log(1000), 1)),
            'max_depth':
                scope.int(hp.qloguniform('dtree_max_depth', np.log(1), np.log(100), 1)),
#             'min_child_weight': 
#                 scope.int(hp.qloguniform('min_child_weight', np.log(1), np.log(100), 1)),
            'subsample':
                scope.float(hp.qloguniform('dtree_subsample', np.log(0.5), np.log(1), 1e-2)),
            'lambda':
                scope.int(hp.qloguniform('dtree_lambda', np.log(1e-2), np.log(100), 1)),
            'alpha':
            
                scope.int(hp.qloguniform('dtree_alpha', np.log(1e-2), np.log(100), 1))
            
            }
        # fit the model
        trials = Trials()
        max_evals = 100
        best = fmin(
            fn=lambda params: self.train(x_train=x_train, 
                                         y_train=y_train, 
                                         seed=1234
                                        ),
            space=space,
            algo=tpe.suggest,
            max_evals=max_evals,
            trials=trials,
            rstate=np.random.RandomState(1234),
        )

        self._best_params = hyperopt.space_eval(space, best)
        self._best_score = -trials.best_trial['result']['loss']
        print('Best params:\n{}'.format(self._best_params))

        self._model = xgboost.XGBClassifier(seed=1234, **dict(**self._best_params))
        self._model.fit(x_train, y_train)

    @staticmethod
    def train(x_train, y_train, **kwargs):
        """
        Function to optimize - random forest model trained and tested with hyperparameters

        :param kwargs: hyperparameters for the sklearn.ensemble.RandomForestClassifier
        """
        
        train_data = xgboost.DMatrix(x_train, label=y_train)
        
        rf = xgboost.XGBClassifier(**kwargs)
        cv = KFold(n_splits=3, shuffle=True, random_state=1234)
        metric = cross_val_score(rf, x_train, y_train, cv=cv,
                                 scoring='neg_log_loss',
                                 )
        return metric[1]

    def predict(self, x_test):
        return self.model.predict(x_test)

    def predict_proba(self, x_test):
        return self.model.predict_proba(x_test)

    @property
    def model(self):
        return self._model

    @property
    def best_params(self):
        return self._best_params

    @property
    def best_score(self):
        return self._best_score

In [14]:
model = Model()
model.fit(
    x_train=train_data,
    y_train=train_labels)

Best params:
{'alpha': 0, 'eta': 0.30800000000000005, 'lambda': 0, 'max_depth': 2, 'subsample': 0.85}


In [15]:
proba = model.predict_proba(x_test=cv_data)
preds = model.predict(x_test=cv_data)

print('--Accuracy:\t{metric:.4f}'.format(metric=accuracy_score(y_pred=preds, y_true=cv_labels)))
print('--F1 score:\t{metric:.4f}'.format(metric=f1_score(y_pred=preds, y_true=cv_labels, average = 'macro')))

--Accuracy:	0.9824
--F1 score:	0.9810


  if diff:
