In [1]:
import pandas as pd
import numpy as np
import pickle

In [2]:
dataset_name = 'LIB1' # LIB1 # LIB2


work_path = './' + dataset_name + '/'

file = open('./raw_datasets/LIB/' + dataset_name + '/dataset.pickle', 'rb')
dataframe = pickle.load(file)
file.close()


dataframe['coord_z'] = 'NULL'
dataframe['building_id'] = 'library'
dataframe['site'] = 'main_room'
dataframe['tile'] = 'single_tile'
dataframe['user_id'] = 1
dataframe['device_id'] = 1
dataframe['epoch'] = 'NULL'

dataframe[[x for x in dataframe.columns]] = dataframe[[x for x in dataframe.columns]].replace(100, 'NULL')

dataframe = dataframe[[x for x in dataframe.columns if 'WAP' in x] + ['coord_x', 'coord_y', 'coord_z', 'floor_id', 'building_id', 'site', 'tile', 'user_id', 'device_id', 'epoch', 'set']]
dataframe.reset_index(inplace=True)

dataframe.columns = ['fingerprint_id'] + [x for x in dataframe.columns if 'WAP' in x] + ['coord_x', 'coord_y', 'coord_z', 'floor', 'building', 'site', 'tile', 'user_id', 'device_id', 'epoch', 'set']


# Assigning the logical tiles to the training set fingerprints
# Note that we do not group by also building and floor, since the tile names need not be distinct among the different floors
group_tiles = dataframe[dataframe['set'] != 'test'][['coord_x', 'coord_y', 'set']].groupby(['coord_x', 'coord_y']).count()
coords_map_tile = {}
tile_count = 0
for ind, row in group_tiles.iterrows():
    coords_map_tile[ind] = 'tile_' + str(tile_count)
    tile_count += 1
    
tilevals = []
for ind, row in dataframe.iterrows():
    if row['set'] == 'training':
        tilevals.append(coords_map_tile[(row['coord_x'],row['coord_y'])])
    else:
        tilevals.append('NULL')

dataframe['tile'] = tilevals


dataframe

Unnamed: 0,fingerprint_id,WAP000,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,...,coord_y,coord_z,floor,building,site,tile,user_id,device_id,epoch,set
0,0,-84,-53,-58,-85,-84,-85,-58,-86,-52,...,29.207532,,3,library,main_room,tile_23,1,1,,training
1,1,-82,-52,-56,,-86,-83,-55,-87,-50,...,29.207532,,3,library,main_room,tile_23,1,1,,training
2,2,-83,-51,-60,-90,-83,-81,-58,-90,-52,...,29.207532,,3,library,main_room,tile_23,1,1,,training
3,3,-84,-50,-57,-89,-84,-81,-56,-89,-52,...,29.207532,,3,library,main_room,tile_23,1,1,,training
4,4,-82,-53,-56,-90,-88,-83,-58,-91,-52,...,29.207532,,3,library,main_room,tile_23,1,1,,training
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3691,3691,,,,,,-92,-83,,-87,...,29.207532,,5,library,main_room,,1,1,,test
3692,3692,,,,,,-85,,,,...,29.207532,,5,library,main_room,,1,1,,test
3693,3693,,,,,,-84,-84,-82,-87,...,29.207532,,5,library,main_room,,1,1,,test
3694,3694,,,,,,-85,,-78,-88,...,29.207532,,5,library,main_room,,1,1,,test


In [3]:
# Generating the file places.csv

places = dataframe[['building', 'floor', 'site']].copy().drop_duplicates()
places.reset_index(inplace=True, drop=True)
places['floor_number'] = places['floor']
places = places[['building', 'floor', 'floor_number', 'site']]
places['site_height'] = 2.65
places['site_area'] = 'NULL'
places['floor_height'] = 2.65
places['floor_area'] = 'NULL'
places['building_area'] = 'NULL'

for col in places.columns:
    places[col] = places[col].astype(object)

places.to_csv(work_path + '/places.csv', sep=',', na_rep='NULL', index=False)

places

Unnamed: 0,building,floor,floor_number,site,site_height,site_area,floor_height,floor_area,building_area
0,library,3,3,main_room,2.65,,2.65,,
1,library,5,5,main_room,2.65,,2.65,,


In [4]:
# Generating the file tessellations.csv

tessellations = dataframe[['building', 'floor', 'site', 'tile']].copy().drop_duplicates()
tessellations['tessellation_type'] = 'logical'
tessellations['coord_a_x'] = 'NULL'
tessellations['coord_a_y'] = 'NULL'
tessellations['coord_b_x'] = 'NULL'
tessellations['coord_b_y'] = 'NULL'
tessellations['coord_c_x'] = 'NULL'
tessellations['coord_c_y'] = 'NULL'
tessellations['coord_d_x'] = 'NULL'
tessellations['coord_d_y'] = 'NULL'
tessellations = tessellations[(tessellations['tile'] != 'NULL')]


for col in tessellations.columns:
    tessellations[col] = tessellations[col].astype(object)
    

tessellations.to_csv(work_path + '/tessellations.csv', sep=',', na_rep='NULL', index=False)

tessellations

Unnamed: 0,building,floor,site,tile,tessellation_type,coord_a_x,coord_a_y,coord_b_x,coord_b_y,coord_c_x,coord_c_y,coord_d_x,coord_d_y
0,library,3,main_room,tile_23,logical,,,,,,,,
6,library,3,main_room,tile_15,logical,,,,,,,,
12,library,3,main_room,tile_7,logical,,,,,,,,
18,library,3,main_room,tile_22,logical,,,,,,,,
24,library,3,main_room,tile_14,logical,,,,,,,,
30,library,3,main_room,tile_6,logical,,,,,,,,
36,library,3,main_room,tile_21,logical,,,,,,,,
42,library,3,main_room,tile_13,logical,,,,,,,,
48,library,3,main_room,tile_5,logical,,,,,,,,
54,library,3,main_room,tile_20,logical,,,,,,,,


In [5]:
# Generating the file fingerprints.csv

fingerprints = dataframe[['fingerprint_id', 'coord_x', 'coord_y', 'coord_z', 'user_id', 'device_id', 'epoch', 'set', 'building', 'floor', 'site', 'tile']].copy()
fingerprints['tile_id'] = fingerprints.merge(tessellations, on=['building', 'floor', 'site', 'tile'], how='left', suffixes=('_1', ''))['tile'].fillna('NULL').values
fingerprints = fingerprints[['fingerprint_id', 'coord_x', 'coord_y', 'coord_z', 'building', 'floor', 'site', 'tile', 'user_id', 'device_id', 'epoch', 'set']]
fingerprints['user_id'] = [x if x != 0 else 'NULL' for x in fingerprints['user_id']]
fingerprints['is_radio_map'] = [True if x == 'training' else False for x in fingerprints['set']]
fingerprints['preceded_by'] = 'NULL'
fingerprints['followed_by'] = 'NULL'
fingerprints['notes'] = 'NULL'

for col in fingerprints.columns:
    fingerprints[col] = fingerprints[col].astype(object)
    

fingerprints.to_csv(work_path + '/fingerprints.csv', sep=',', na_rep='NULL', index=False)

fingerprints

Unnamed: 0,fingerprint_id,coord_x,coord_y,coord_z,building,floor,site,tile,user_id,device_id,epoch,set,is_radio_map,preceded_by,followed_by,notes
0,0,12.9045,29.2075,,library,3,main_room,tile_23,1,1,,training,True,,,
1,1,12.9045,29.2075,,library,3,main_room,tile_23,1,1,,training,True,,,
2,2,12.9045,29.2075,,library,3,main_room,tile_23,1,1,,training,True,,,
3,3,12.9045,29.2075,,library,3,main_room,tile_23,1,1,,training,True,,,
4,4,12.9045,29.2075,,library,3,main_room,tile_23,1,1,,training,True,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3691,3691,12.9045,29.2075,,library,5,main_room,,1,1,,test,False,,,
3692,3692,12.9045,29.2075,,library,5,main_room,,1,1,,test,False,,,
3693,3693,12.9045,29.2075,,library,5,main_room,,1,1,,test,False,,,
3694,3694,12.9045,29.2075,,library,5,main_room,,1,1,,test,False,,,


In [6]:
# Generating the file wifi_obs.csv

wifi_obs = dataframe[[x for x in dataframe.columns if 'WAP' in x]].copy()
wifi_obs['fingerprint_id'] = dataframe['fingerprint_id']
wifi_obs = wifi_obs[['fingerprint_id'] + [x for x in dataframe.columns if 'WAP' in x]]

for col in wifi_obs.columns:
    wifi_obs[col] = wifi_obs[col].astype(object)
    
wifi_obs.columns = ["AP-" + x + "-NULL" if "WAP" in x else x for x in list(wifi_obs.columns)]

wifi_obs.to_csv(work_path + '/wifi_obs.csv', sep=',', na_rep='NULL', index=False)

wifi_obs

Unnamed: 0,fingerprint_id,AP-WAP000-NULL,AP-WAP001-NULL,AP-WAP002-NULL,AP-WAP003-NULL,AP-WAP004-NULL,AP-WAP005-NULL,AP-WAP006-NULL,AP-WAP007-NULL,AP-WAP008-NULL,...,AP-WAP187-NULL,AP-WAP188-NULL,AP-WAP189-NULL,AP-WAP190-NULL,AP-WAP191-NULL,AP-WAP192-NULL,AP-WAP193-NULL,AP-WAP194-NULL,AP-WAP195-NULL,AP-WAP196-NULL
0,0,-84,-53,-58,-85,-84,-85,-58,-86,-52,...,,,,,,,,,,
1,1,-82,-52,-56,,-86,-83,-55,-87,-50,...,,,,,,,,,,
2,2,-83,-51,-60,-90,-83,-81,-58,-90,-52,...,,,,,,,,,,
3,3,-84,-50,-57,-89,-84,-81,-56,-89,-52,...,,,,,,,,,,
4,4,-82,-53,-56,-90,-88,-83,-58,-91,-52,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3691,3691,,,,,,-92,-83,,-87,...,,,,,,,,,,
3692,3692,,,,,,-85,,,,...,,,,,,,-69,,,
3693,3693,,,,,,-84,-84,-82,-87,...,,,,,,,-72,,,
3694,3694,,,,,,-85,,-78,-88,...,,,,,,,-72,,,
