# Bluetooth RSSI - Walk datasets

## Python imports

In [10]:
import csv
import numpy as np
import os.path
import pandas as pd
import pickle
import re

## Parameters

In [11]:
PIXEL_DATA = False

if PIXEL_DATA:
  DATASET_FILE  = 'raw_data/Pixel/Bluetooth/1228187371660504_teste_final.csv' # Use dataset.csv for unified "atrio" label, or "dataset-no-merge.csv"
  WALK_TIMES = 'raw_data/Pixel/Bluetooth/walk-times.tsv'
else:
  DATASET_FILE = 'raw_data/Redmi/Bluetooth/97082214722162_teste_final.csv'
  WALK_TIMES = 'raw_data/Redmi/Bluetooth/walk-times.tsv'

BEACON_LIST = [
  "6859b8e6126b",
  "c69294c41e47", 
  "e7b2d23d89ec",
  "e9bdcc7d8fe6",
  "ef3b3dd2a002",
  "ef3b3dd2a003",
  "ef3b3dd2a005",
  "ef3b3dd2a006",
  "ef3b3dd2a008",
  "ef3b3dd2a009",
  "ef3b3dd2a011",
  "ef3b3dd2a012",
  "ef3b3dd2a013",
  "ef3b3dd2a014",
  "ef3b3dd2a015",
  "ef3b3dd2a016",
  "ef3b3dd2a018",
  "ef3b3dd2a019",
  "ef3b3dd2a020",
  # "ef3b3dd2c001", THIS ONE IS IGNORED
  "ef3b3dd2e001",
  "ef3b3dd2e003",
  "ef3b3dd2e004",
  "ef3b3dd2e006",
  "ef3b3dd2e007",
  "ef3b3dd2e008",
  "ef3b3dd2e009",
  "f80332eda645"
]
MERGE_ATRIO_LABELS = False
BEACON_NOT_VISIBLE_VALUE = -200
TIME_GRANULARITY = 1_000_000_000 # 5_000_000_000 # 2_000_000_000 # 500_000_000 # 1_000_000_000

## Read dataset 

In [12]:
COLS = [
    'time', 
    'beacon', 
    'rssi'
  ]
TYPES = {
    'time': np.int64, 
    'beacon': 'string', 
    'rssi': np.int8 
  }
df = pd.read_csv(DATASET_FILE, 
                    header=None, 
                    names=COLS,
                    dtype=TYPES)
df['time'] = df['time'] // TIME_GRANULARITY 
df['time'] = df['time'] - df['time'][0]
df.loc[:,'beacon'] = df.loc[:, 'beacon'].str.lower()
df = df[df['beacon'].isin(BEACON_LIST)]
df = df.sort_values(by=['time','beacon'])
df = df.groupby(by=['time','beacon']).mean()
df=df.reset_index()
RSSI_DATA = df
df

Unnamed: 0,time,beacon,rssi
0,0,e7b2d23d89ec,-96.000000
1,0,e9bdcc7d8fe6,-80.666667
2,0,ef3b3dd2a009,-98.500000
3,0,ef3b3dd2a011,-96.000000
4,0,ef3b3dd2a013,-98.000000
...,...,...,...
9097,550,ef3b3dd2a011,-87.000000
9098,550,ef3b3dd2a012,-89.000000
9099,550,ef3b3dd2a013,-76.000000
9100,550,ef3b3dd2a019,-85.000000


## Read walk data

In [13]:
time_re = re.compile('^(\d+):(\d+)$')
times = []
with open(WALK_TIMES, newline='') as csv_input:
    data = csv.reader(csv_input, delimiter='\t')
    for row in data:
      (begin,end,label) = row[1:4]
      print(begin,end,label)
      m = time_re.match(begin)
      begin = int(m.group(1)) * 60 + int(m.group(2)) 
      m = time_re.match(end)
      end = int(m.group(1)) * 60 + int(m.group(2))
      begin *= 1_000_000_000 / TIME_GRANULARITY
      end *= 1_000_000_000 / TIME_GRANULARITY
      if MERGE_ATRIO_LABELS and "AT_" in label:
        label = "A"
      times.append((begin,end,label))
df = pd.DataFrame({'begin': [t[0] for t in times],
                  'end': [t[1] for t in times],
                  'label': [t[2] for t in times]})
WALK_TIMES_DATA = df
df

0:00 0:20 SS
0:21 0:40 TMA
0:41 1:00 DC
1:01 1:17 DG
1:17 1:50 TS
1:51 2:05 AH
2:06 2:49 SA
2:50 3:40 DF
3:41 4:00 CN
4:01 4:12 SN
4:13 4:28 ES
4:29 5:04 GL
5:05 5:30 HA
5:31 5:45 AT_I1
5:46 6:23 AT_M
6:24 6:50 AT_CH
6:51 7:18 AT_S
7:19 7:48 AT_I2
7:49 8:07 AT_CA
8:08 8:33 AT_O1
8:34 8:58 AT_O2


Unnamed: 0,begin,end,label
0,0.0,20.0,SS
1,21.0,40.0,TMA
2,41.0,60.0,DC
3,61.0,77.0,DG
4,77.0,110.0,TS
5,111.0,125.0,AH
6,126.0,169.0,SA
7,170.0,220.0,DF
8,221.0,240.0,CN
9,241.0,252.0,SN


## Map labels 

In [14]:
def get_label_for_item(x, walk_times):
    t = x['time']
    for _,r in walk_times.iterrows():
        if t >= r['begin'] and t <= r['end']:
            return r['label']
    return 'NONE'
RSSI_DATA['label'] = RSSI_DATA.apply(lambda x: get_label_for_item(x, WALK_TIMES_DATA), axis=1)
RSSI_DATA = RSSI_DATA[RSSI_DATA['label'] != 'NONE']
RSSI_DATA

Unnamed: 0,time,beacon,rssi,label
0,0,e7b2d23d89ec,-96.000000,SS
1,0,e9bdcc7d8fe6,-80.666667,SS
2,0,ef3b3dd2a009,-98.500000,SS
3,0,ef3b3dd2a011,-96.000000,SS
4,0,ef3b3dd2a013,-98.000000,SS
...,...,...,...,...
8926,538,ef3b3dd2e006,-88.333333,AT_O2
8927,538,ef3b3dd2e007,-92.250000,AT_O2
8928,538,ef3b3dd2e008,-81.500000,AT_O2
8929,538,ef3b3dd2e009,-92.750000,AT_O2


In [15]:
df=RSSI_DATA.groupby(by=['label']).size()
print(df.count(), 'distinct labels')
df

21 distinct labels


label
AH       207
AT_CA    367
AT_CH    563
AT_I1    257
AT_I2    562
AT_M     737
AT_O1    524
AT_O2    515
AT_S     511
CN       309
DC       350
DF       791
DG       303
ES       220
GL       467
HA       389
SA       726
SN       260
SS       228
TMA      294
TS       351
dtype: int64

In [16]:
df=RSSI_DATA.groupby(by=['beacon']).size()
print(df.count(), 'distinct beacons')
df

27 distinct beacons


beacon
6859b8e6126b     59
c69294c41e47    315
e7b2d23d89ec    407
e9bdcc7d8fe6    380
ef3b3dd2a002    435
ef3b3dd2a003    101
ef3b3dd2a005    419
ef3b3dd2a006    427
ef3b3dd2a008    352
ef3b3dd2a009    417
ef3b3dd2a011    421
ef3b3dd2a012    412
ef3b3dd2a013    433
ef3b3dd2a014    421
ef3b3dd2a015    384
ef3b3dd2a016    299
ef3b3dd2a018    347
ef3b3dd2a019    345
ef3b3dd2a020    367
ef3b3dd2e001    176
ef3b3dd2e003    426
ef3b3dd2e004     86
ef3b3dd2e006    238
ef3b3dd2e007    322
ef3b3dd2e008    318
ef3b3dd2e009    253
f80332eda645    371
dtype: int64

## Derive pivot table

In [17]:
RSSI_DATA = pd.pivot_table(RSSI_DATA, 
                    values='rssi', 
                    fill_value=BEACON_NOT_VISIBLE_VALUE,
                    index=['time','label'],
                    columns=['beacon'])
RSSI_DATA['source'] = 'Pixel' if PIXEL_DATA else 'Redmi'
RSSI_DATA.reset_index(inplace=True)
RSSI_DATA

beacon,time,label,6859b8e6126b,c69294c41e47,e7b2d23d89ec,e9bdcc7d8fe6,ef3b3dd2a002,ef3b3dd2a003,ef3b3dd2a005,ef3b3dd2a006,...,ef3b3dd2a020,ef3b3dd2e001,ef3b3dd2e003,ef3b3dd2e004,ef3b3dd2e006,ef3b3dd2e007,ef3b3dd2e008,ef3b3dd2e009,f80332eda645,source
0,0,SS,-200.0,-200.000000,-96.000000,-80.666667,-200.000000,-200.000000,-200.00,-200.000000,...,-200.000000,-82.666667,-96.333333,-94.666667,-200.000000,-200.000000,-200.000000,-200.000000,-200.000000,Redmi
1,1,SS,-200.0,-200.000000,-98.250000,-79.666667,-200.000000,-200.000000,-200.00,-200.000000,...,-200.000000,-83.333333,-96.000000,-92.500000,-200.000000,-200.000000,-200.000000,-200.000000,-200.000000,Redmi
2,2,SS,-200.0,-200.000000,-200.000000,-80.333333,-99.250000,-200.000000,-200.00,-96.333333,...,-200.000000,-80.600000,-98.333333,-84.333333,-200.000000,-200.000000,-200.000000,-200.000000,-200.000000,Redmi
3,3,SS,-200.0,-101.000000,-97.500000,-76.600000,-95.666667,-200.000000,-200.00,-95.000000,...,-200.000000,-76.200000,-97.000000,-84.400000,-200.000000,-200.000000,-200.000000,-100.000000,-200.000000,Redmi
4,4,SS,-200.0,-200.000000,-90.500000,-74.666667,-95.333333,-200.000000,-200.00,-102.000000,...,-200.000000,-74.750000,-96.000000,-80.200000,-200.000000,-200.000000,-200.000000,-200.000000,-200.000000,Redmi
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
456,534,AT_O2,-99.0,-200.000000,-81.333333,-90.333333,-69.000000,-86.333333,-71.00,-73.000000,...,-83.000000,-200.000000,-85.500000,-200.000000,-97.000000,-98.000000,-82.333333,-85.500000,-94.500000,Redmi
457,535,AT_O2,-97.0,-98.000000,-86.200000,-92.000000,-68.000000,-94.500000,-73.00,-73.333333,...,-88.500000,-200.000000,-83.400000,-200.000000,-86.500000,-92.000000,-86.250000,-88.666667,-93.000000,Redmi
458,536,AT_O2,-200.0,-97.000000,-87.250000,-91.250000,-68.500000,-90.666667,-73.00,-72.500000,...,-88.333333,-200.000000,-86.600000,-200.000000,-84.000000,-97.500000,-84.200000,-93.333333,-88.333333,Redmi
459,537,AT_O2,-98.0,-94.333333,-81.000000,-91.250000,-71.800000,-94.000000,-71.25,-74.666667,...,-200.000000,-200.000000,-87.250000,-200.000000,-87.333333,-86.285714,-86.000000,-93.000000,-90.750000,Redmi


## Write derived walk data (ground truth +  pivot table)

In [18]:
TSV_OUTPUT_FILE = 'datasets/brssi/PW.tsv' if PIXEL_DATA else 'datasets/brssi/RW.tsv'
RSSI_DATA.to_csv(TSV_OUTPUT_FILE, sep='\t', header=True, index=False)