In [14]:
import pandas as pd
import numpy as np
import pickle
import math

In [17]:
dataset_name = 'SIM001' # SIM001 # SIM002 # SIM003 # SIM004 # SIM005 # SIM006 # SIM007 # SIM008 # SIM009 # SIM010 


work_path = './' + dataset_name + '/'

file = open('./raw_datasets/SIM/' + dataset_name + '/dataset.pickle', 'rb')
dataframe = pickle.load(file)
file.close()


dataframe['coord_z'] = 'NULL'
dataframe['building_id'] = 'laboratory'
dataframe['site'] = 'main_room'
dataframe['tile'] = 'NULL'
dataframe['user_id'] = 1
dataframe['device_id'] = 1
dataframe['epoch'] = 'NULL'

dataframe[[x for x in dataframe.columns]] = dataframe[[x for x in dataframe.columns]].replace(100, 'NULL')

dataframe = dataframe[[x for x in dataframe.columns if 'WAP' in x] + ['coord_x', 'coord_y', 'coord_z', 'floor_id', 'building_id', 'site', 'tile', 'user_id', 'device_id', 'epoch', 'set']]
dataframe.reset_index(inplace=True)

dataframe.columns = ['fingerprint_id'] + [x for x in dataframe.columns if 'WAP' in x] + ['coord_x', 'coord_y', 'coord_z', 'floor', 'building', 'site', 'tile', 'user_id', 'device_id', 'epoch', 'set']


# Assigning the grid tiles to the training set fingerprints

def get_tile_id(row):
    if row['set'] == 'training':
        return str(int(math.floor(row['coord_x']))) + "_" + str(int(math.floor(row['coord_y'])))
    else:
        return 'NULL'

dataframe['tile'] = dataframe.apply(lambda row: get_tile_id(row), axis=1)

        
dataframe

Unnamed: 0,fingerprint_id,WAP000,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,coord_x,coord_y,coord_z,floor,building,site,tile,user_id,device_id,epoch,set
0,0,-56,-64,-69,-71,-67,-68,-70,-70,0.000000,0.000000,,0,laboratory,main_room,0_0,1,1,,training
1,1,-55,-66,-70,-75,-67,-71,-71,-74,0.000000,1.000000,,0,laboratory,main_room,0_1,1,1,,training
2,2,-54,-63,-71,-75,-64,-65,-69,-75,0.000000,2.000000,,0,laboratory,main_room,0_2,1,1,,training
3,3,-53,-63,-74,-69,-64,-68,-72,-71,0.000000,3.000000,,0,laboratory,main_room,0_3,1,1,,training
4,4,-57,-65,-72,-75,-66,-70,-72,-71,0.000000,4.000000,,0,laboratory,main_room,0_4,1,1,,training
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11705,11705,-75,-71,-66,-67,-71,-72,-66,-57,49.578696,15.812447,,0,laboratory,main_room,,1,1,,test
11706,11706,-71,-67,-69,-64,-72,-69,-62,-57,49.974595,16.115297,,0,laboratory,main_room,,1,1,,test
11707,11707,-70,-71,-69,-65,-69,-72,-62,-58,49.204610,17.079679,,0,laboratory,main_room,,1,1,,test
11708,11708,-73,-73,-65,-64,-69,-69,-66,-52,49.617586,18.047484,,0,laboratory,main_room,,1,1,,test


In [18]:
# Generating the file places.csv

places = dataframe[['building', 'floor', 'site']].copy().drop_duplicates()
places.reset_index(inplace=True, drop=True)
places['floor_number'] = places['floor']
places = places[['building', 'floor', 'floor_number', 'site']]
places['site_height'] = 8
places['site_area'] = 1000
places['floor_height'] = 8
places['floor_area'] = 1000
places['building_area'] = 'NULL'

for col in places.columns:
    places[col] = places[col].astype(object)

places.to_csv(work_path + '/places.csv', sep=',', na_rep='NULL', index=False)

places

Unnamed: 0,building,floor,floor_number,site,site_height,site_area,floor_height,floor_area,building_area
0,laboratory,0,0,main_room,8,1000,8,1000,


In [19]:
# Generating the file tessellations.csv


def generate_tile_coords(row, coord_index):
    
    if row['tile'] != 'NULL':
    
        tile_x =  int(row['tile'].split('_')[0])
        tile_y =  int(row['tile'].split('_')[1])

        ax = tile_x - 0.5
        ay = tile_y - 0.5

        bx = tile_x + 0.5
        by = tile_y - 0.5

        cx = tile_x + 0.5
        cy = tile_y + 0.5

        dx = tile_x - 0.5
        dy = tile_y + 0.5
        
        coords = [ax, ay, bx, by, cx, cy, dx, dy]

        return coords[coord_index]
    
    else:
        
        return 'NULL'
    

tessellations = dataframe[['building', 'floor', 'site', 'tile']].copy().drop_duplicates()
tessellations['tessellation_type'] = 'grid'


tessellations['coord_a_x'] = tessellations.apply(lambda row: generate_tile_coords(row, 0), axis=1)
tessellations['coord_a_y'] = tessellations.apply(lambda row: generate_tile_coords(row, 1), axis=1)
tessellations['coord_b_x'] = tessellations.apply(lambda row: generate_tile_coords(row, 2), axis=1)
tessellations['coord_b_y'] = tessellations.apply(lambda row: generate_tile_coords(row, 3), axis=1)
tessellations['coord_c_x'] = tessellations.apply(lambda row: generate_tile_coords(row, 4), axis=1)
tessellations['coord_c_y'] = tessellations.apply(lambda row: generate_tile_coords(row, 5), axis=1)
tessellations['coord_d_x'] = tessellations.apply(lambda row: generate_tile_coords(row, 6), axis=1)
tessellations['coord_d_y'] = tessellations.apply(lambda row: generate_tile_coords(row, 7), axis=1)
tessellations = tessellations[(tessellations['tile'] != 'NULL')]

tessellations['site'] = 'NULL'

for col in tessellations.columns:
    tessellations[col] = tessellations[col].astype(object)
    

tessellations.to_csv(work_path + '/tessellations.csv', sep=',', na_rep='NULL', index=False)

tessellations

Unnamed: 0,building,floor,site,tile,tessellation_type,coord_a_x,coord_a_y,coord_b_x,coord_b_y,coord_c_x,coord_c_y,coord_d_x,coord_d_y
0,laboratory,0,,0_0,grid,-0.5,-0.5,0.5,-0.5,0.5,0.5,-0.5,0.5
1,laboratory,0,,0_1,grid,-0.5,0.5,0.5,0.5,0.5,1.5,-0.5,1.5
2,laboratory,0,,0_2,grid,-0.5,1.5,0.5,1.5,0.5,2.5,-0.5,2.5
3,laboratory,0,,0_3,grid,-0.5,2.5,0.5,2.5,0.5,3.5,-0.5,3.5
4,laboratory,0,,0_4,grid,-0.5,3.5,0.5,3.5,0.5,4.5,-0.5,4.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1066,laboratory,0,,50_16,grid,49.5,15.5,50.5,15.5,50.5,16.5,49.5,16.5
1067,laboratory,0,,50_17,grid,49.5,16.5,50.5,16.5,50.5,17.5,49.5,17.5
1068,laboratory,0,,50_18,grid,49.5,17.5,50.5,17.5,50.5,18.5,49.5,18.5
1069,laboratory,0,,50_19,grid,49.5,18.5,50.5,18.5,50.5,19.5,49.5,19.5


In [5]:
# Generating the file adjacences.csv
# x from 0 to 50
# y from 0 to 20

# tile_1_building, tile_1_floor, tile_1_site, tile_1_tile, tile_2_building, tile_2_floor, tile_2_site, tile_2_tile, walkable, cost

all_tiles = tessellations['tile'].drop_duplicates().values

adjacences = []
for tile in all_tiles:
    x_cell = int(tile.split('_')[0])
    y_cell = int(tile.split('_')[1])
    
    neighbours = []
    for mod1 in [-1, 0, 1]:
        for mod2 in [-1, 0, 1]:
            neighbours.append((x_cell + mod1, y_cell + mod2))
  
    for neigh in neighbours: 
        if not(neigh[0] == x_cell and neigh[1] == y_cell) and neigh[0] >= 0 and neigh[1] >= 0 and neigh[0] <= 50 and neigh[1] <= 20:
            adjacences.append(['laboratory', 0, 'NULL', tile, 'laboratory', 0, 'NULL', str(neigh[0]) + "_" + str(neigh[1]), 1, 'NULL'])
            

adjacences = pd.DataFrame(adjacences, columns=['tile_1_building', 'tile_1_floor', 'tile_1_site', 'tile_1_tile', 'tile_2_building', 'tile_2_floor', 'tile_2_site', 'tile_2_tile', 'walkable', 'cost'])

adjacences.to_csv(work_path + '/adjacences.csv', sep=',', na_rep='NULL', index=False)

adjacences

Unnamed: 0,tile_1_building,tile_1_floor,tile_1_site,tile_1_tile,tile_2_building,tile_2_floor,tile_2_site,tile_2_tile,walkable,cost
0,laboratory,0,,0_0,laboratory,0,,0_1,1,
1,laboratory,0,,0_0,laboratory,0,,1_0,1,
2,laboratory,0,,0_0,laboratory,0,,1_1,1,
3,laboratory,0,,0_1,laboratory,0,,0_0,1,
4,laboratory,0,,0_1,laboratory,0,,0_2,1,
...,...,...,...,...,...,...,...,...,...,...
8135,laboratory,0,,50_19,laboratory,0,,50_18,1,
8136,laboratory,0,,50_19,laboratory,0,,50_20,1,
8137,laboratory,0,,50_20,laboratory,0,,49_19,1,
8138,laboratory,0,,50_20,laboratory,0,,49_20,1,


In [6]:
# Generating the file fingerprints.csv

fingerprints = dataframe[['fingerprint_id', 'coord_x', 'coord_y', 'coord_z', 'user_id', 'device_id', 'epoch', 'set', 'building', 'floor', 'site', 'tile']].copy()
fingerprints['tile_id'] = fingerprints.merge(tessellations, on=['building', 'floor', 'site', 'tile'], how='left', suffixes=('_1', ''))['tile'].fillna('NULL').values
fingerprints = fingerprints[['fingerprint_id', 'coord_x', 'coord_y', 'coord_z', 'building', 'floor', 'site', 'tile', 'user_id', 'device_id', 'epoch', 'set']]
fingerprints['user_id'] = [x if x != 0 else 'NULL' for x in fingerprints['user_id']]
fingerprints['is_radio_map'] = [True if x == 'training' else False for x in fingerprints['set']]
fingerprints['preceded_by'] = 'NULL'
fingerprints['followed_by'] = 'NULL'
fingerprints['notes'] = 'NULL'
fingerprints['site'] = 'NULL'

for col in fingerprints.columns:
    fingerprints[col] = fingerprints[col].astype(object)
    

fingerprints.to_csv(work_path + '/fingerprints.csv', sep=',', na_rep='NULL', index=False)

fingerprints

Unnamed: 0,fingerprint_id,coord_x,coord_y,coord_z,building,floor,site,tile,user_id,device_id,epoch,set,is_radio_map,preceded_by,followed_by,notes
0,0,0,0,,laboratory,0,,0_0,1,1,,training,True,,,
1,1,0,1,,laboratory,0,,0_1,1,1,,training,True,,,
2,2,0,2,,laboratory,0,,0_2,1,1,,training,True,,,
3,3,0,3,,laboratory,0,,0_3,1,1,,training,True,,,
4,4,0,4,,laboratory,0,,0_4,1,1,,training,True,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11705,11705,49.7101,15.9894,,laboratory,0,,,1,1,,test,False,,,
11706,11706,49.1506,16.7645,,laboratory,0,,,1,1,,test,False,,,
11707,11707,49.1665,17.242,,laboratory,0,,,1,1,,test,False,,,
11708,11708,49.0437,18.6727,,laboratory,0,,,1,1,,test,False,,,


In [7]:
# Generating the file wifi_obs.csv

wifi_obs = dataframe[[x for x in dataframe.columns if 'WAP' in x]].copy()
wifi_obs['fingerprint_id'] = dataframe['fingerprint_id']
wifi_obs = wifi_obs[['fingerprint_id'] + [x for x in dataframe.columns if 'WAP' in x]]

for col in wifi_obs.columns:
    wifi_obs[col] = wifi_obs[col].astype(object)
    
wifi_obs.columns = ["AP-" + x + "-NULL" if "WAP" in x else x for x in list(wifi_obs.columns)]

wifi_obs.to_csv(work_path + '/wifi_obs.csv', sep=',', na_rep='NULL', index=False)

wifi_obs

Unnamed: 0,fingerprint_id,AP-WAP000-NULL,AP-WAP001-NULL,AP-WAP002-NULL,AP-WAP003-NULL,AP-WAP004-NULL,AP-WAP005-NULL,AP-WAP006-NULL,AP-WAP007-NULL
0,0,-54,-64,-70,-72,-68,-69,-71,-74
1,1,-56,-65,-68,-74,-62,-70,-77,-74
2,2,-58,-65,-70,-72,-63,-70,-72,-73
3,3,-54,-63,-70,-76,-62,-68,-70,-74
4,4,-58,-66,-69,-73,-60,-67,-73,-70
...,...,...,...,...,...,...,...,...,...
11705,11705,-71,-75,-68,-65,-71,-69,-68,-61
11706,11706,-75,-71,-64,-64,-73,-70,-65,-59
11707,11707,-72,-70,-65,-67,-68,-69,-65,-57
11708,11708,-72,-72,-68,-62,-68,-72,-64,-56
