# Bluetooth RSSI - Walk datasets

## Python imports

In [39]:
import csv
import numpy as np
import os.path
import pandas as pd
import pickle
import re

## Parameters

In [40]:
PIXEL_DATA = True

if PIXEL_DATA:
  DATASET_FILE  = '../dados_indoor_location/Pixel/Bluetooth/1228187371660504_teste_final.csv' # Use dataset.csv for unified "atrio" label, or "dataset-no-merge.csv"
  WALK_TIMES = '../dados_indoor_location/walks/Walk_Pixel.tsv'
else:
  DATASET_FILE = '../dados_indoor_location/Xiaomi/Bluetooth/97082214722162_teste_final.csv'
  WALK_TIMES = '../dados_indoor_location/walks/Walk_Xiaomi.tsv'

MODEL_TYPE    = 'RANDOM_FOREST-generic' # POSSIBLE VALUES: KNN, DECISION_TREE RANDOM_FOREST
BEACON_LIST = [
  "6859b8e6126b",
  "c69294c41e47", 
  "e7b2d23d89ec",
  "e9bdcc7d8fe6",
  "ef3b3dd2a002",
  "ef3b3dd2a003",
  "ef3b3dd2a005",
  "ef3b3dd2a006",
  "ef3b3dd2a008",
  "ef3b3dd2a009",
  "ef3b3dd2a011",
  "ef3b3dd2a012",
  "ef3b3dd2a013",
  "ef3b3dd2a014",
  "ef3b3dd2a015",
  "ef3b3dd2a016",
  "ef3b3dd2a018",
  "ef3b3dd2a019",
  "ef3b3dd2a020",
  # "ef3b3dd2c001", THIS ONE IS IGNORED
  "ef3b3dd2e001",
  "ef3b3dd2e003",
  "ef3b3dd2e004",
  "ef3b3dd2e006",
  "ef3b3dd2e007",
  "ef3b3dd2e008",
  "ef3b3dd2e009",
  "f80332eda645"
]
MERGE_ATRIO_LABELS = False
BEACON_NOT_VISIBLE_VALUE = -200
TIME_GRANULARITY = 1_000_000_000 # 5_000_000_000 # 2_000_000_000 # 500_000_000 # 1_000_000_000

## Read dataset 

In [41]:
COLS = [
    'time', 
    'beacon', 
    'rssi'
  ]
TYPES = {
    'time': np.int64, 
    'beacon': 'string', 
    'rssi': np.int8 
  }
df = pd.read_csv(DATASET_FILE, 
                    header=None, 
                    names=COLS,
                    dtype=TYPES)
df['time'] = df['time'] // TIME_GRANULARITY 
df['time'] = df['time'] - df['time'][0]
df.loc[:,'beacon'] = df.loc[:, 'beacon'].str.lower()
df = df[df['beacon'].isin(BEACON_LIST)]
df = df.sort_values(by=['time','beacon'])
df = df.groupby(by=['time','beacon']).mean()
df=df.reset_index()
RSSI_DATA = df
df

Unnamed: 0,time,beacon,rssi
0,0,ef3b3dd2e001,-89.00
1,0,ef3b3dd2e004,-90.00
2,1,e7b2d23d89ec,-104.00
3,1,e9bdcc7d8fe6,-87.75
4,1,ef3b3dd2a008,-102.00
...,...,...,...
6016,383,ef3b3dd2a016,-92.00
6017,383,ef3b3dd2e001,-103.00
6018,383,ef3b3dd2e003,-93.25
6019,383,ef3b3dd2e006,-95.00


## Read walk data

In [42]:
time_re = re.compile('^(\d+):(\d+)$')
times = []
with open(WALK_TIMES, newline='') as csv_input:
    data = csv.reader(csv_input, delimiter='\t')
    for row in data:
      (begin,end,label) = row[1:4]
      print(begin,end,label)
      m = time_re.match(begin)
      begin = int(m.group(1)) * 60 + int(m.group(2)) 
      m = time_re.match(end)
      end = int(m.group(1)) * 60 + int(m.group(2))
      begin *= 1_000_000_000 / TIME_GRANULARITY
      end *= 1_000_000_000 / TIME_GRANULARITY
      if MERGE_ATRIO_LABELS and "AT_" in label:
        label = "A"
      times.append((begin,end,label))
df = pd.DataFrame({'begin': [t[0] for t in times],
                  'end': [t[1] for t in times],
                  'label': [t[2] for t in times]})
WALK_TIMES_DATA = df
df

0:00 0:19 SS
0:20 0:40 TMA
0:41 0:54 DC
0:55 1:11 DG
1:12 1:31 TS
1:32 1:47 AH
1:48 2:34 SA
2:35 3:06 DF
3:07 3:24 CN
3:24 3:36 SN
3:37 3:55 ES
3:56 4:12 GL
4:13 4:27 HA
4:28 4:39 AT_I1
4:40 4:51 AT_M
4:52 5:05 AT_CH
5:06 5:16 AT_S
5:17 5:36 AT_I2
5:37 5:47 AT_CA
5:48 6:04 AT_O1
6:05 6:13 AT_O2


Unnamed: 0,begin,end,label
0,0.0,19.0,SS
1,20.0,40.0,TMA
2,41.0,54.0,DC
3,55.0,71.0,DG
4,72.0,91.0,TS
5,92.0,107.0,AH
6,108.0,154.0,SA
7,155.0,186.0,DF
8,187.0,204.0,CN
9,204.0,216.0,SN


## Map labels 

In [43]:
def get_label_for_item(x, walk_times):
    t = x['time']
    for _,r in walk_times.iterrows():
        if t >= r['begin'] and t <= r['end']:
            return r['label']
    return 'NONE'
RSSI_DATA['label'] = RSSI_DATA.apply(lambda x: get_label_for_item(x, WALK_TIMES_DATA), axis=1)
RSSI_DATA = RSSI_DATA[RSSI_DATA['label'] != 'NONE']
RSSI_DATA

Unnamed: 0,time,beacon,rssi,label
0,0,ef3b3dd2e001,-89.00,SS
1,0,ef3b3dd2e004,-90.00,SS
2,1,e7b2d23d89ec,-104.00,SS
3,1,e9bdcc7d8fe6,-87.75,SS
4,1,ef3b3dd2a008,-102.00,SS
...,...,...,...,...
5801,373,ef3b3dd2e001,-101.00,AT_O2
5802,373,ef3b3dd2e003,-88.50,AT_O2
5803,373,ef3b3dd2e006,-77.00,AT_O2
5804,373,ef3b3dd2e008,-89.25,AT_O2


In [44]:
df=RSSI_DATA.groupby(by=['label']).size()
print(df.count(), 'distinct labels')
df

21 distinct labels


label
AH       148
AT_CA    242
AT_CH    304
AT_I1    205
AT_I2    452
AT_M     214
AT_O1    397
AT_O2    185
AT_S     245
CN       303
DC       267
DF       351
DG       288
ES       280
GL       190
HA       186
SA       668
SN       223
SS       166
TMA      325
TS       167
dtype: int64

In [45]:
df=RSSI_DATA.groupby(by=['beacon']).size()
print(df.count(), 'distinct beacons')
df

27 distinct beacons


beacon
6859b8e6126b    138
c69294c41e47    193
e7b2d23d89ec    259
e9bdcc7d8fe6    233
ef3b3dd2a002    283
ef3b3dd2a003    162
ef3b3dd2a005    262
ef3b3dd2a006    250
ef3b3dd2a008    263
ef3b3dd2a009    280
ef3b3dd2a011    265
ef3b3dd2a012    308
ef3b3dd2a013    240
ef3b3dd2a014    267
ef3b3dd2a015    223
ef3b3dd2a016    217
ef3b3dd2a018    206
ef3b3dd2a019    193
ef3b3dd2a020    218
ef3b3dd2e001     93
ef3b3dd2e003    274
ef3b3dd2e004     54
ef3b3dd2e006    168
ef3b3dd2e007    227
ef3b3dd2e008    171
ef3b3dd2e009    136
f80332eda645    223
dtype: int64

## Derive pivot table

In [46]:
RSSI_DATA = pd.pivot_table(RSSI_DATA, 
                    values='rssi', 
                    fill_value=BEACON_NOT_VISIBLE_VALUE,
                    index=['time','label'],
                    columns=['beacon'])
RSSI_DATA['source'] = 'Pixel' if PIXEL_DATA else 'Xiaomi'
RSSI_DATA.reset_index(inplace=True)
RSSI_DATA

beacon,time,label,6859b8e6126b,c69294c41e47,e7b2d23d89ec,e9bdcc7d8fe6,ef3b3dd2a002,ef3b3dd2a003,ef3b3dd2a005,ef3b3dd2a006,...,ef3b3dd2a020,ef3b3dd2e001,ef3b3dd2e003,ef3b3dd2e004,ef3b3dd2e006,ef3b3dd2e007,ef3b3dd2e008,ef3b3dd2e009,f80332eda645,source
0,0,SS,-200.0,-200.0,-200.00,-200.000000,-200.000000,-200.0,-200.000000,-200.000000,...,-200.000000,-89.00,-200.0,-90.00,-200.00,-200.0,-200.00,-200.0,-200.0,Pixel
1,1,SS,-200.0,-200.0,-104.00,-87.750000,-200.000000,-200.0,-200.000000,-200.000000,...,-200.000000,-81.40,-200.0,-80.60,-200.00,-200.0,-200.00,-200.0,-200.0,Pixel
2,2,SS,-200.0,-200.0,-200.00,-81.800000,-200.000000,-200.0,-200.000000,-200.000000,...,-200.000000,-77.75,-200.0,-90.00,-200.00,-200.0,-200.00,-200.0,-200.0,Pixel
3,3,SS,-200.0,-200.0,-200.00,-84.800000,-200.000000,-200.0,-200.000000,-200.000000,...,-200.000000,-83.60,-102.0,-79.80,-200.00,-200.0,-200.00,-200.0,-200.0,Pixel
4,4,SS,-200.0,-200.0,-200.00,-82.666667,-200.000000,-200.0,-200.000000,-200.000000,...,-200.000000,-86.00,-200.0,-84.75,-200.00,-200.0,-200.00,-200.0,-200.0,Pixel
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
369,369,AT_O2,-99.0,-103.0,-85.00,-96.000000,-67.000000,-100.5,-79.500000,-90.750000,...,-87.500000,-200.00,-93.5,-200.00,-86.80,-200.0,-85.00,-94.0,-94.5,Pixel
370,370,AT_O2,-101.0,-200.0,-79.25,-200.000000,-65.666667,-90.0,-79.500000,-95.333333,...,-91.500000,-200.00,-90.0,-200.00,-82.75,-200.0,-89.00,-200.0,-200.0,Pixel
371,371,AT_O2,-200.0,-104.0,-78.25,-99.000000,-70.500000,-200.0,-80.000000,-92.000000,...,-91.333333,-200.00,-93.8,-200.00,-83.00,-200.0,-91.50,-100.0,-93.0,Pixel
372,372,AT_O2,-200.0,-100.0,-77.00,-93.666667,-69.250000,-200.0,-200.000000,-85.000000,...,-95.750000,-200.00,-89.0,-200.00,-84.80,-200.0,-91.00,-95.0,-200.0,Pixel


## Write derived walk data (ground truth +  pivot table)

In [47]:
TSV_OUTPUT_FILE = 'rssi-walk-Pixel.tsv' if PIXEL_DATA else 'rssi-walk-Xiaomi.tsv'
RSSI_DATA.to_csv(TSV_OUTPUT_FILE, sep='\t', header=True, index=False)