In [1]:
import pandas as pd
import numpy as np
import pickle
import math

In [2]:
dataset_name = 'UTS' 


work_path = './' + dataset_name + '/'

file = open('./raw_datasets/' + dataset_name + '/dataset.pickle', 'rb')
dataframe = pickle.load(file)
file.close()


dataframe['coord_z'] = 'NULL'
dataframe['tile'] = 'NULL'
dataframe['user_id'] = 1
dataframe['device_id'] = 1
dataframe['epoch'] = 'NULL'
dataframe['site'] = 'NULL'

dataframe[[x for x in dataframe.columns]] = dataframe[[x for x in dataframe.columns]].replace(100, 'NULL')

dataframe = dataframe[[x for x in dataframe.columns if 'WAP' in x] + ['coord_x', 'coord_y', 'coord_z', 'floor_id', 'building_id', 'site', 'tile', 'user_id', 'device_id', 'epoch', 'set']]
dataframe.reset_index(inplace=True)

dataframe.columns = ['fingerprint_id'] + [x for x in dataframe.columns if 'WAP' in x] + ['coord_x', 'coord_y', 'coord_z', 'floor', 'building', 'site', 'tile', 'user_id', 'device_id', 'epoch', 'set']



# Assigning the logical tiles to the training set fingerprints
# Note that we do not group by also building and floor, since the tile names need not be distinct among the different floors
group_tiles = dataframe[dataframe['set'] != 'test'][['coord_x', 'coord_y', 'set']].groupby(['coord_x', 'coord_y']).count()
coords_map_tile = {}
tile_count = 0
for ind, row in group_tiles.iterrows():
    coords_map_tile[ind] = 'tile_' + str(tile_count)
    tile_count += 1
    
tilevals = []
for ind, row in dataframe.iterrows():
    if row['set'] == 'training':
        tilevals.append(coords_map_tile[(row['coord_x'],row['coord_y'])])
    else:
        tilevals.append('NULL')

dataframe['tile'] = tilevals

        
dataframe

Unnamed: 0,fingerprint_id,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,...,coord_y,coord_z,floor,building,site,tile,user_id,device_id,epoch,set
0,0,,,,,,,,,,...,31.0272,,11,0,,tile_439,1,1,,training
1,1,,,,,,,,,,...,31.0272,,11,0,,tile_439,1,1,,training
2,2,,,,,,,,,,...,31.0272,,11,0,,tile_439,1,1,,training
3,3,,,,,,,,,,...,30.7985,,11,0,,tile_427,1,1,,training
4,4,,,,,,,,,,...,30.7985,,11,0,,tile_427,1,1,,training
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9491,9491,,,,,,,,,,...,23.1212,,0,0,,,1,1,,test
9492,9492,,,,,,,,,,...,23.2927,,0,0,,,1,1,,test
9493,9493,,,,,,,,,,...,24.2186,,0,0,,,1,1,,test
9494,9494,,,,,,,,,,...,24.1157,,0,0,,,1,1,,test


In [3]:
# Generating the file places.csv

places = dataframe[['building', 'floor', 'site']].copy().drop_duplicates()
places.reset_index(inplace=True, drop=True)
places['floor_number'] = places['floor']
places = places[['building', 'floor', 'floor_number', 'site']]
places['site_height'] = 'NULL'
places['site_area'] = 'NULL'
places['floor_height'] = 3.7
places['floor_area'] = 'NULL'
places['building_area'] = 'NULL'

for col in places.columns:
    places[col] = places[col].astype(object)

places.to_csv(work_path + '/places.csv', sep=',', na_rep='NULL', index=False)

places

Unnamed: 0,building,floor,floor_number,site,site_height,site_area,floor_height,floor_area,building_area
0,0,11,11,,,,3.7,,
1,0,10,10,,,,3.7,,
2,0,9,9,,,,3.7,,
3,0,8,8,,,,3.7,,
4,0,7,7,,,,3.7,,
5,0,6,6,,,,3.7,,
6,0,5,5,,,,3.7,,
7,0,4,4,,,,3.7,,
8,0,3,3,,,,3.7,,
9,0,2,2,,,,3.7,,


In [4]:
# Generating the file tessellations.csv
    

tessellations = dataframe[['building', 'floor', 'site', 'tile']].copy().drop_duplicates()
tessellations['tessellation_type'] = 'logical'


tessellations['coord_a_x'] = 'NULL'
tessellations['coord_a_y'] = 'NULL'
tessellations['coord_b_x'] = 'NULL'
tessellations['coord_b_y'] = 'NULL'
tessellations['coord_c_x'] = 'NULL'
tessellations['coord_c_y'] = 'NULL'
tessellations['coord_d_x'] = 'NULL'
tessellations['coord_d_y'] = 'NULL'
tessellations = tessellations[(tessellations['tile'] != 'NULL')]

tessellations['site'] = 'NULL'

for col in tessellations.columns:
    tessellations[col] = tessellations[col].astype(object)
    

tessellations.to_csv(work_path + '/tessellations.csv', sep=',', na_rep='NULL', index=False)

tessellations

Unnamed: 0,building,floor,site,tile,tessellation_type,coord_a_x,coord_a_y,coord_b_x,coord_b_y,coord_c_x,coord_c_y,coord_d_x,coord_d_y
0,0,11,,tile_439,logical,,,,,,,,
3,0,11,,tile_427,logical,,,,,,,,
5,0,11,,tile_426,logical,,,,,,,,
8,0,11,,tile_440,logical,,,,,,,,
11,0,11,,tile_444,logical,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9056,0,15,,tile_557,logical,,,,,,,,
9067,0,15,,tile_516,logical,,,,,,,,
9075,0,15,,tile_475,logical,,,,,,,,
9085,0,15,,tile_429,logical,,,,,,,,


In [5]:
# Generating the file fingerprints.csv

fingerprints = dataframe[['fingerprint_id', 'coord_x', 'coord_y', 'coord_z', 'user_id', 'device_id', 'epoch', 'set', 'building', 'floor', 'site', 'tile']].copy()
fingerprints['tile_id'] = fingerprints.merge(tessellations, on=['building', 'floor', 'site', 'tile'], how='left', suffixes=('_1', ''))['tile'].fillna('NULL').values
fingerprints = fingerprints[['fingerprint_id', 'coord_x', 'coord_y', 'coord_z', 'building', 'floor', 'site', 'tile', 'user_id', 'device_id', 'epoch', 'set']]
fingerprints['user_id'] = [x if x != 0 else 'NULL' for x in fingerprints['user_id']]
fingerprints['is_radio_map'] = [True if x == 'training' else False for x in fingerprints['set']]
fingerprints['preceded_by'] = 'NULL'
fingerprints['followed_by'] = 'NULL'
fingerprints['notes'] = 'NULL'
fingerprints['site'] = 'NULL'

for col in fingerprints.columns:
    fingerprints[col] = fingerprints[col].astype(object)
    

fingerprints.to_csv(work_path + '/fingerprints.csv', sep=',', na_rep='NULL', index=False)

fingerprints

Unnamed: 0,fingerprint_id,coord_x,coord_y,coord_z,building,floor,site,tile,user_id,device_id,epoch,set,is_radio_map,preceded_by,followed_by,notes
0,0,40.7871,31.0272,,0,11,,tile_439,1,1,,training,True,,,
1,1,40.7871,31.0272,,0,11,,tile_439,1,1,,training,True,,,
2,2,40.7871,31.0272,,0,11,,tile_439,1,1,,training,True,,,
3,3,39.4464,30.7985,,0,11,,tile_427,1,1,,training,True,,,
4,4,39.4464,30.7985,,0,11,,tile_427,1,1,,training,True,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9491,9491,85.0452,23.1212,,0,0,,,1,1,,test,False,,,
9492,9492,87.8801,23.2927,,0,0,,,1,1,,test,False,,,
9493,9493,90.0977,24.2186,,0,0,,,1,1,,test,False,,,
9494,9494,92.201,24.1157,,0,0,,,1,1,,test,False,,,


In [6]:
# Generating the file wifi_obs.csv

wifi_obs = dataframe[[x for x in dataframe.columns if 'WAP' in x]].copy()
wifi_obs['fingerprint_id'] = dataframe['fingerprint_id']
wifi_obs = wifi_obs[['fingerprint_id'] + [x for x in dataframe.columns if 'WAP' in x]]

for col in wifi_obs.columns:
    wifi_obs[col] = wifi_obs[col].astype(object)
    
wifi_obs.columns = ["AP-" + x + "-NULL" if "WAP" in x else x for x in list(wifi_obs.columns)]

wifi_obs.to_csv(work_path + '/wifi_obs.csv', sep=',', na_rep='NULL', index=False)

wifi_obs

Unnamed: 0,fingerprint_id,AP-WAP001-NULL,AP-WAP002-NULL,AP-WAP003-NULL,AP-WAP004-NULL,AP-WAP005-NULL,AP-WAP006-NULL,AP-WAP007-NULL,AP-WAP008-NULL,AP-WAP009-NULL,...,AP-WAP580-NULL,AP-WAP581-NULL,AP-WAP582-NULL,AP-WAP583-NULL,AP-WAP584-NULL,AP-WAP585-NULL,AP-WAP586-NULL,AP-WAP587-NULL,AP-WAP588-NULL,AP-WAP589-NULL
0,0,,,,,,,,,,...,,,,,,,,,,
1,1,,,,,,,,,,...,,,,,,,,,,
2,2,,,,,,,,,,...,,,,,,,,,,
3,3,,,,,,,,,,...,,,,,,,,,,
4,4,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9491,9491,,,,,,,,,,...,,,-88,-90,,,,,,
9492,9492,,,,,,,,,,...,,,,,,,,,,
9493,9493,,,,,,,,,,...,,,,,,,,,,
9494,9494,,,,,,,,,,...,,,,,,,,,,
