In [1]:
import os
from pathlib import Path
import numpy as np
import pandas as pd
from collections import defaultdict
from sklearn.preprocessing import LabelEncoder

data_dir = Path('./data')
data_dir.mkdir(parents=True, exist_ok=True)

rssi_threshold = 40

df_signal = pd.read_csv(Path('./data/signal_data.csv'), header=None)
# rssi(%)를 string에서 int로 변환(%)
df_signal[1] = pd.to_numeric(df_signal[1])
df_signal = df_signal[df_signal[1] > rssi_threshold]

bssid_set = np.unique(df_signal[0])
rp_set = np.unique(df_signal[4])
timestamp_set = np.unique(df_signal[2])

print(rp_set)

['2층 입구' 'LLOYD' 'ZARA' 'ZIOZIA' '버스정류장' '서점']


In [2]:
rp_encoder = LabelEncoder()
bssid_encoder = LabelEncoder()

# string으로 되어있는 rp를 숫자로 mapping
rp_encoder.fit(rp_set)
np.save('./data/classes.npy', rp_encoder.classes_)
df_signal[4] = rp_encoder.transform(df_signal[4])

In [3]:
# key : timestamp, value : bssid-rssi, rp
# df_signal : [0] bssid, [1] rssi, [2] timestamp, [3] position, [4] rp
scan_dict = defaultdict(lambda : defaultdict(int).fromkeys(bssid_set, 0))
for idx, signal in df_signal.iterrows():
    #         timestamp  bssid        rssi
    scan_dict[signal[2]][signal[0]] = signal[1]
    scan_dict[signal[2]]['rp'] = signal[4]

train_data = pd.DataFrame.from_dict(scan_dict).transpose()

train_data.to_csv(Path('./data/train_data.csv'), index=False)
train_data

Unnamed: 0,00:07:79:0a:15:be,00:0f:00:2b:4f:6c,00:1d:93:89:d5:13,00:e0:4b:d7:04:c4,00:e1:6d:a4:62:e0,00:e1:6d:a4:ba:40,04:0e:3c:fd:f6:d5,04:5e:a4:d4:99:13,04:92:26:6b:13:a0,04:92:26:6b:13:a4,...,fc:0a:81:9a:64:41,fc:0a:81:9a:96:90,fc:0a:81:9a:96:91,fc:0a:81:9a:98:a0,fc:0a:81:9a:98:a1,fc:0a:81:9a:98:a2,fe:cb:ac:34:26:8f,fe:cb:bc:34:26:8f,fe:cb:bc:34:27:5a,rp
2020-11-26 21:30:22.159452,0,0,0,0,0,0,60,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-11-26 21:30:26.219957,0,0,0,0,0,0,53,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-11-26 21:30:30.297193,0,0,0,0,0,0,72,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-11-26 21:30:34.352415,53,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-11-26 21:30:38.379748,65,0,0,0,0,0,0,0,0,0,...,0,0,0,0,50,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-11-26 22:07:25.946249,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4
2020-11-26 22:07:30.208701,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4
2020-11-26 22:07:34.378276,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4
2020-11-26 22:07:38.520357,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4
