# Overview
- from_nikaido_make_Dataset.ipynb からfork
- 24site以外から情報を抽出する

In [1]:
import subprocess
cmd = "git rev-parse --short HEAD"
hash = subprocess.check_output(cmd.split()).strip().decode('utf-8')
print(hash)

7def5a2


# Const

In [2]:
NB = '028'
# N_BSSID = 300

DIR_TRAIN = './../data_ignore/input/train/'
DIR_TEST = './../data_ignore/input/test/'
DIR_WIFI = './../data_ignore/input/wifi/'
PATH_SUB = './../data_ignore/input/sample_submission.csv'
PATH_99_SUB = './../data/input/floor_99per_acc_sub.csv'
DIR_SAVE_IGNORE = f'./../data_ignore/nb/{NB}/'
DIR_SAVE = f'./../data/nb/{NB}/'

# Import everything I need:)

In [3]:
import os
import gc
import time
import yaml
import types
import random
import pickle
import builtins
import numpy as np
import pandas as pd
import seaborn as sns
from icecream import ic
import matplotlib.pyplot as plt
from dataclasses import dataclass
from ipdb import set_trace as st
# from tqdm import tqdm
from fastprogress import progress_bar, master_bar
from glob import glob
from loguru import logger
from collections import OrderedDict

# sklearn
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import KFold

# pytorch
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn.functional as F

# Function

In [4]:
def imports():
    for name, val in globals().items():
        # module imports
        if isinstance(val, types.ModuleType):
            yield name, val

            # functions / callables
        if hasattr(val, '__call__'):
            yield name, val


def noglobal(f):
    '''
    ref: https://gist.github.com/raven38/4e4c3c7a179283c441f575d6e375510c
    '''
    return types.FunctionType(f.__code__,
                              dict(imports()),
                              f.__name__,
                              f.__defaults__,
                              f.__closure__
                              )


def comp_metric(xhat, yhat, fhat, x, y, f):
    intermediate = np.sqrt(np.power(xhat-x, 2) + np.power(yhat-y, 2)) + 15 * np.abs(fhat-f)
    return intermediate.sum()/xhat.shape[0]

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

In [5]:
@dataclass
class ReadData:
    acce: np.ndarray
    acce_uncali: np.ndarray
    gyro: np.ndarray
    gyro_uncali: np.ndarray
    magn: np.ndarray
    magn_uncali: np.ndarray
    ahrs: np.ndarray
    wifi: np.ndarray
    ibeacon: np.ndarray
    waypoint: np.ndarray


def read_data_file(data_filename):
    acce = []
    acce_uncali = []
    gyro = []
    gyro_uncali = []
    magn = []
    magn_uncali = []
    ahrs = []
    wifi = []
    ibeacon = []
    waypoint = []

    with open(data_filename, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    for line_data in lines:
        line_data = line_data.strip()
        if not line_data or line_data[0] == '#':
            continue

        line_data = line_data.split('\t')

        if line_data[1] == 'TYPE_ACCELEROMETER':
            acce.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_ACCELEROMETER_UNCALIBRATED':
            acce_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_GYROSCOPE':
            gyro.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_GYROSCOPE_UNCALIBRATED':
            gyro_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_MAGNETIC_FIELD':
            magn.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_MAGNETIC_FIELD_UNCALIBRATED':
            magn_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_ROTATION_VECTOR':
            ahrs.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_WIFI':
            sys_ts = line_data[0]
            ssid = line_data[2]
            bssid = line_data[3]
            rssi = line_data[4]
            lastseen_ts = line_data[6]
            wifi_data = [sys_ts, ssid, bssid, rssi, lastseen_ts]
            wifi.append(wifi_data)
            continue

        if line_data[1] == 'TYPE_BEACON':
            ts = line_data[0]
            uuid = line_data[2]
            major = line_data[3]
            minor = line_data[4]
            rssi = line_data[6]
            ibeacon_data = [ts, '_'.join([uuid, major, minor]), rssi]
            ibeacon.append(ibeacon_data)
            continue

        if line_data[1] == 'TYPE_WAYPOINT':
            waypoint.append([int(line_data[0]), float(line_data[2]), float(line_data[3])])

    acce = np.array(acce)
    acce_uncali = np.array(acce_uncali)
    gyro = np.array(gyro)
    gyro_uncali = np.array(gyro_uncali)
    magn = np.array(magn)
    magn_uncali = np.array(magn_uncali)
    ahrs = np.array(ahrs)
    wifi = np.array(wifi)
    ibeacon = np.array(ibeacon)
    waypoint = np.array(waypoint)

    return ReadData(acce, acce_uncali, gyro, gyro_uncali, magn, magn_uncali, ahrs, wifi, ibeacon, waypoint)

# Preparation

<br>

set

In [6]:
# seed_everything(config['globals']['seed'])
pd.set_option('display.max_rows', 500)

if not os.path.exists(DIR_SAVE_IGNORE):
    os.makedirs(DIR_SAVE_IGNORE)
if not os.path.exists(DIR_SAVE):
    os.makedirs(DIR_SAVE)

<br>

load dataset

In [7]:
with open(f'{DIR_WIFI}train_all.pkl', 'rb') as f:
    df_train = pickle.load( f)
with open(f'{DIR_WIFI}test_all.pkl', 'rb') as f:
    df_test = pickle.load( f)

sample_submission = pd.read_csv(PATH_SUB)

# Dataseet 作成
- from_nikaido_make_Dataset.ipynb を参考にした
- site内で指定回以上登場するWiFiを抽出
- 100とかにしたかったが対象となるWiFiが少なすぎる。testのwaypointに紐づくWiFiの件数が少ない

In [8]:
base_path = './../data_ignore/input/'
ssubm = pd.read_csv('from_nikaido_real_timestamp_from_wifi_sample_submission.csv')
ssubm_df = ssubm["site_path_timestamp"].apply(lambda x: pd.Series(x.split("_")))
used_buildings = sorted(ssubm_df[0].value_counts().index.tolist())
floor_map = {"B2":-2.0, "B1":-1.0, "F1":0.0, "F2": 1.0, "F3":2.0, "F4":3.0, "F5":4.0, "F6":5.0, "F7":6.0,"F8":7.0, "F9":8.0,
             "1F":0.0, "2F":1.0, "3F":2.0, "4F":3.0, "5F":4.0, "6F":5.0, "7F":6.0, "8F": 7.0, "9F":8.0}

In [9]:
# site_path_time_df = realtime_sample_submission_df['site_path_timestamp'].str.split('_').apply(lambda x: pd.Series(x))
ssubm_df.columns = ['site_id', 'path_id', 'waypoint_timestamp']
ssubm_df['site_path_timestamp'] = ssubm['site_path_timestamp']
ssubm_df.head(3)

Unnamed: 0,site_id,path_id,waypoint_timestamp,site_path_timestamp
0,5a0546857ecc773753327266,046cfa46be49fc10834815c6,1578474563646,5a0546857ecc773753327266_046cfa46be49fc1083481...
1,5a0546857ecc773753327266,046cfa46be49fc10834815c6,1578474572654,5a0546857ecc773753327266_046cfa46be49fc1083481...
2,5a0546857ecc773753327266,046cfa46be49fc10834815c6,1578474578963,5a0546857ecc773753327266_046cfa46be49fc1083481...


In [10]:
train_buildings = sorted(glob(f'./../data_ignore/input/train/*'))
train_buildings = [dir_.split('/')[-1] for dir_ in train_buildings]

In [14]:
# %%time

# TODO 全体の登場回数ではなく、waypointの近くの登場回数で絞るべき
WIFI_USE_COUT = 10

bssid = dict()

# for building in train_buildings[:3]:
for building in progress_bar(train_buildings):
    #break
    folders = sorted(glob(os.path.join(base_path,'train/'+building+'/*')))
    print(building)
    wifi = list()
    for folder in folders:
        if folder.split("/")[-1] in floor_map.keys():
            floor = floor_map[folder.split('/')[-1]]
            files = glob(os.path.join(folder, "*.txt"))
            for file in files:
                with open(file, encoding='utf-8') as f:
                    txt = f.readlines()
                    for e, line in enumerate(txt):
                        tmp = line.strip().split()
                        if tmp[1] == "TYPE_WIFI":
                            wifi.append(tmp)
    if len(wifi) == 0:
        continue
    df = pd.DataFrame(wifi)
    value_counts = df[3].value_counts()
    top_bssid = value_counts[value_counts > WIFI_USE_COUT].index.tolist() # WIFI_USE_COUT回以上登場するBSSIDのみを対象にする。
    print(len(top_bssid))
    bssid[building] = top_bssid
    del df
    del wifi
    gc.collect()

5a0546857ecc773753327266
3276
5c3c44b80379370013e0fd2b
3003
5cd56865eb294480de7167b6
657
5cd56b5ae2acfd2d33b58544
478
5cd56b5ae2acfd2d33b58546
697
5cd56b5ae2acfd2d33b58548
199
5cd56b5ae2acfd2d33b58549
1096
5cd56b5ae2acfd2d33b5854a
1278
5cd56b61e2acfd2d33b58d20
612
5cd56b63e2acfd2d33b591c2
845
5cd56b64e2acfd2d33b59246
498
5cd56b64e2acfd2d33b592b3
459
5cd56b64e2acfd2d33b5932f
1856
5cd56b67e2acfd2d33b596bd
265
5cd56b6ae2acfd2d33b59c90
184
5cd56b6ae2acfd2d33b59ccb
5203
5cd56b6ae2acfd2d33b59ccc
2085
5cd56b6be2acfd2d33b59d1f
166
5cd56b6ee2acfd2d33b5a247
398
5cd56b6fe2acfd2d33b5a386
542
5cd56b70e2acfd2d33b5a44e
370
5cd56b70e2acfd2d33b5a552
510
5cd56b75e2acfd2d33b5af29
355
5cd56b76e2acfd2d33b5b0be
674
5cd56b77e2acfd2d33b5b22b
964
5cd56b77e2acfd2d33b5b310
574
5cd56b79e2acfd2d33b5b74e
367
5cd56b79e2acfd2d33b5b77c
491
5cd56b7de2acfd2d33b5c14b
369
5cd56b83e2acfd2d33b5cab0
2149
5cd56b86e2acfd2d33b5cf97
695
5cd56b89e2acfd2d33b5d61e
214
5cd56b89e2acfd2d33b5d759
982
5cd56b89e2acfd2d33b5d75a
82
5cd56b8

In [15]:
@noglobal
def create_wifi_df(path_id):
    wifi = list()
    
    with open(f'./../data_ignore/input/test/{path_id}.txt', encoding='utf-8') as f:
        txt = f.readlines()

    for line in txt:
        line = line.strip().split()
        if line[1] == "TYPE_WIFI":
            wifi.append(line)
            
    return pd.DataFrame(np.array(wifi), columns=['timestamp', 'data_type', 'ssid', 'bssid', 'rssi', 'frequency', 'last_seen_timestamp'])
            

In [16]:
# %%time
wifi_base_df_counts_dfs = []
for path_id, path_df in progress_bar(ssubm_df.groupby('path_id')):
    
    site_id = path_df.iloc[0]['site_id']
    bssids = bssid[site_id]
    
    wifi_base_df = create_wifi_df(path_id)
    wifi_base_df = wifi_base_df[wifi_base_df['bssid'].isin(bssids)]
    
    wifi_base_df_counts = wifi_base_df['bssid'].value_counts().reset_index()
    wifi_base_df_counts = wifi_base_df_counts.rename(columns={'index':'bssid', 'bssid':'count'})
    wifi_base_df_counts['site_id'] = site_id
    wifi_base_df_counts_dfs.append(wifi_base_df_counts)
    
#     break

In [17]:
wifi_base_df_counts_df = pd.concat(wifi_base_df_counts_dfs).reset_index(drop=True)
wifi_base_df_counts_df.head()

Unnamed: 0,bssid,count,site_id
0,98d67fadac518296992afddd24e97a2855af9472,46,5da1389e4db8ce0c98bd0547
1,2c0de2d831941a1ed5ed2873805e9f20bcc6776f,46,5da1389e4db8ce0c98bd0547
2,ea26617bc19a2b6a0a3798c1f3aa1de25442e716,46,5da1389e4db8ce0c98bd0547
3,c83a81634993ed9d129ca0176cdd01ccc9a8788d,46,5da1389e4db8ce0c98bd0547
4,4a2924201ab4aaf5aedc8ca8f485bf1b343028cd,46,5da1389e4db8ce0c98bd0547


In [18]:
test_bssid = dict()
for building in progress_bar(used_buildings):
    print(building)
    wifi_base_df_counts_df_unisite = wifi_base_df_counts_df[wifi_base_df_counts_df['site_id'] == building]
    wifi_base_df_counts_df_unisite_groupby = wifi_base_df_counts_df_unisite.groupby('bssid').sum().reset_index()
    top_bssid = wifi_base_df_counts_df_unisite_groupby[wifi_base_df_counts_df_unisite_groupby['count'] >= WIFI_USE_COUT]['bssid'].tolist()
    test_bssid[building] = top_bssid
    print(len(top_bssid))

5a0546857ecc773753327266
2310
5c3c44b80379370013e0fd2b
993
5d27075f03f801723c2e360f
988
5d27096c03f801723c31e5e0
891
5d27097f03f801723c320d97
1032
5d27099f03f801723c32511d
535
5d2709a003f801723c3251bf
816
5d2709b303f801723c327472
1123
5d2709bb03f801723c32852c
1456
5d2709c303f801723c3299ee
4106
5d2709d403f801723c32bd39
1482
5d2709e003f801723c32d896
875
5da138274db8ce0c98bbd3d2
267
5da1382d4db8ce0c98bbe92e
1737
5da138314db8ce0c98bbf3a0
879
5da138364db8ce0c98bc00f1
472
5da1383b4db8ce0c98bc11ab
1045
5da138754db8ce0c98bca82f
1015
5da138764db8ce0c98bcaa46
1157
5da1389e4db8ce0c98bd0547
372
5da138b74db8ce0c98bd4774
2258
5da958dd46f8266d0737457b
2770
5dbc1d84c1eb61796cf7c010
3452
5dc8cea7659e181adb076a3f
2260


## train作成

In [21]:
TIME_DIFF_THRESHOLD = 8000 # 抽出対象とする最大時間（ミリ秒）
WIFI_NUM = 80 # WiFi抽出件数
WIFI_MIN_NUM = 10 # waypointに紐づくWiFiの最低数（この数以下の場合は学習データから除外する）
DUMMY_RECORD = [9999999999999,	'DUMMY', 'NONE', 'NONE', -999, 0, 9999999999999] # waypointに紐づくWiFi数が100件に満たない場合に穴埋めするためのダミー


train_wifi_count = []
train_wifitime_diffs = []
train_time_diffs = []
# rows = []
columns = [f'ssid_{i}' for i in range(WIFI_NUM)] + [f'bssid_{i}' for i in range(WIFI_NUM)] + [f'rssi_{i}' for i in range(WIFI_NUM)] + [f'frequency_{i}' for i in range(WIFI_NUM)] +  ['wp_tmestamp', 'x', 'y', 'floor', 'floor_str', 'path_id', 'site_id']

# for building in progress_bar(train_buildings[:3]):
for building in progress_bar(train_buildings):
# for building in progress_bar(used_buildings[:2]):
#     bssids = test_bssid[building]

    rows = []
    for folder in sorted(glob(os.path.join(base_path,'train', building +'/*'))):
        floor_str = folder.split('/')[-1]
        if folder.split("/")[-1] in floor_map.keys():
            floor = floor_map[floor_str]

            for file in glob(os.path.join(folder, "*.txt")):
                wifi = list()
                waypoint = list()

                with open(file, encoding='utf-8') as f:
                    txt = f.readlines()

                for line in txt:
                    line = line.strip().split()
                    if line[1] == "TYPE_WAYPOINT":
                        waypoint.append(line)
                    if line[1] == "TYPE_WIFI":
                        wifi.append(line)

                if len(wifi) <= 0:
                    continue

                wifi_base_df = pd.DataFrame(np.array(wifi), columns=['timestamp', 'data_type', 'ssid', 'bssid', 'rssi', 'frequency', 'last_seen_timestamp'])
                wifi_timestamps = wifi_base_df['timestamp'].unique()
                waypoint = np.array(waypoint)


                for wifi_timestamp in wifi_timestamps:

                    wifi_time_uni = wifi_base_df[wifi_base_df['timestamp'] == wifi_timestamp]

                    diff = np.long(wifi_timestamp) - waypoint[:, 0].astype(np.long)
    #                 diff = np.where(diff < 0, 1e16, diff)

                    min_idx = np.abs(diff).argmin()
                    way_time = waypoint[min_idx, 0]
                    x_ = waypoint[min_idx, 2]
                    y_ = waypoint[min_idx, 3]

                    train_time_diffs.extend((wifi_time_uni['last_seen_timestamp'].astype(int) - np.long(way_time)).to_list())
                    train_wifitime_diffs.append(np.min(diff))

                    diff_abs = np.abs(wifi_time_uni['last_seen_timestamp'].astype(int) - np.long(way_time))
                    wifi_time_uni = wifi_time_uni[diff_abs <= TIME_DIFF_THRESHOLD]
                    train_wifi_count.append(len(wifi_time_uni))

                    if len(wifi_time_uni) < WIFI_MIN_NUM:
                        continue

                    if len(wifi_time_uni) < WIFI_NUM:
                        dummy_count = WIFI_NUM - len(wifi_time_uni)
                        dummy_df = pd.DataFrame(np.tile(DUMMY_RECORD, (dummy_count, 1)), 
                                                columns=['timestamp', 'data_type', 'ssid', 'bssid', 'rssi', 'frequency', 'last_seen_timestamp'])
                        wifi_time_uni = pd.concat([wifi_time_uni, dummy_df])

                    wifi_time_uni = wifi_time_uni.head(WIFI_NUM)

                    row = np.concatenate([wifi_time_uni['ssid'].to_numpy(),
                                          wifi_time_uni['bssid'].to_numpy(), 
                                          wifi_time_uni['rssi'].to_numpy(), 
                                          wifi_time_uni['frequency'].to_numpy(), 
                                          [way_time, # waypoint timestamp
                                          x_, # x
                                          y_, # y
                                          floor, # floor number
                                          floor_str, # floor string
                                          file.split('/')[-1].split('.')[0], # path_id
                                          building]
                                         ])
                    rows.append(row)
    train_df = pd.DataFrame(rows, columns=columns)
    train_df.to_csv(f'{DIR_SAVE_IGNORE}{building}_train_wifi_num_{WIFI_NUM}.csv', index=False)

                    

## test作成

In [22]:
ssubm_df_diff = pd.read_csv('from_nikaido_test_path_timediff.csv')

In [23]:
ssubm_df_diff[ssubm_df_diff['path_id'] == path_id]['time_diff'].values[0]

1578465315137

In [24]:
rows = []
test_wifi_count = []
test_time_diffs = []
# for path_id, path_df in progress_bar(ssubm_df.groupby('path_id')):
for path_id, path_df in progress_bar(ssubm_df.groupby('path_id')):
    site_id = path_df.iloc[0]['site_id']
    bssids = test_bssid[site_id]
    
    wifi_base_df = create_wifi_df(path_id)
    wifi_base_df = wifi_base_df[wifi_base_df['bssid'].isin(bssids)]
    
    wifi_timestamps = wifi_base_df['timestamp'].unique()
    time_diff = ssubm_df_diff[ssubm_df_diff['path_id'] == path_id]['time_diff'].values[0]
    
    for i, path_row in path_df.iterrows():
        wifi_df = wifi_base_df.copy()
        site_path_timestamp = path_row['site_path_timestamp']
        waypoint_timestamp = path_row['waypoint_timestamp']
        building = path_row['site_id']
        path_id = path_row['path_id']
        
        min_idx = np.abs(wifi_timestamps.astype(np.long) + time_diff - np.long(waypoint_timestamp)).argmin()
        
        wifi_time_uni = wifi_base_df[wifi_base_df['timestamp'] == wifi_timestamps[min_idx]]
        diff_abs = np.abs(wifi_time_uni['last_seen_timestamp'].astype(int) - np.long(waypoint_timestamp))
        test_time_diffs.extend((wifi_time_uni['last_seen_timestamp'].astype(int) - np.long(waypoint_timestamp)).to_list())

        wifi_time_uni = wifi_time_uni[diff_abs <= TIME_DIFF_THRESHOLD]        

        test_wifi_count.append(len(wifi_time_uni))
        if len(wifi_time_uni) < WIFI_MIN_NUM:
            continue
        if len(wifi_time_uni) < WIFI_NUM:
            dummy_count = WIFI_NUM - len(wifi_time_uni)
            dummy_df = pd.DataFrame(np.tile(DUMMY_RECORD, (dummy_count, 1)), 
                                    columns=['timestamp', 'data_type', 'ssid', 'bssid', 'rssi', 'frequency', 'last_seen_timestamp'])
            wifi_time_uni = pd.concat([wifi_time_uni, dummy_df])

        wifi_time_uni = wifi_time_uni.head(WIFI_NUM)
        
        row = np.concatenate([wifi_time_uni['ssid'].to_numpy(),
                              wifi_time_uni['bssid'].to_numpy(), 
                              wifi_time_uni['rssi'].to_numpy(), 
                              wifi_time_uni['frequency'].to_numpy(), 
                              [waypoint_timestamp, # waypoint timestamp
#                               x_, # x
#                               y_, # y
#                               floor, # floor number
#                               floor_str, # floor string
                              path_id, # path_id
                              building]
                             ])
        rows.append(row)
        
columns = [f'ssid_{i}' for i in range(WIFI_NUM)] + [f'bssid_{i}' for i in range(WIFI_NUM)] + [f'rssi_{i}' for i in range(WIFI_NUM)] + [f'frequency_{i}' for i in range(WIFI_NUM)] +  ['wp_tmestamp', 'path_id', 'site_id']
test_df = pd.DataFrame(rows, columns=columns)

In [25]:
test_df.head()

Unnamed: 0,ssid_0,ssid_1,ssid_2,ssid_3,ssid_4,ssid_5,ssid_6,ssid_7,ssid_8,ssid_9,...,frequency_73,frequency_74,frequency_75,frequency_76,frequency_77,frequency_78,frequency_79,wp_tmestamp,path_id,site_id
0,3c1e7602176e050694e3a5cf8ba5f6f725e3ec51,5c072340f8e500f7e62819ab82bb8998ecd0ef4e,a4e38996343460efde1140975529e97c9f9aa60b,d0af9d9c2709796ee07a0432de0e26298a64e3e8,a4e38996343460efde1140975529e97c9f9aa60b,da39a3ee5e6b4b0d3255bfef95601890afd80709,208255716aecce985d8f7a2bb8117b4dee2d53c5,da39a3ee5e6b4b0d3255bfef95601890afd80709,07b466ea3782f19a25830149eb173cfc5c049246,da39a3ee5e6b4b0d3255bfef95601890afd80709,...,0,0,0,0,0,0,0,1573190310706,00ff0c9a71cc37a2ebdd0f05,5da1389e4db8ce0c98bd0547
1,5c072340f8e500f7e62819ab82bb8998ecd0ef4e,d0af9d9c2709796ee07a0432de0e26298a64e3e8,3c1e7602176e050694e3a5cf8ba5f6f725e3ec51,da39a3ee5e6b4b0d3255bfef95601890afd80709,a4e38996343460efde1140975529e97c9f9aa60b,208255716aecce985d8f7a2bb8117b4dee2d53c5,a4e38996343460efde1140975529e97c9f9aa60b,da39a3ee5e6b4b0d3255bfef95601890afd80709,07b466ea3782f19a25830149eb173cfc5c049246,25055861b712f167ba58320e846a4aa70f811204,...,0,0,0,0,0,0,0,1573190314744,00ff0c9a71cc37a2ebdd0f05,5da1389e4db8ce0c98bd0547
2,d0af9d9c2709796ee07a0432de0e26298a64e3e8,3c1e7602176e050694e3a5cf8ba5f6f725e3ec51,f2921f9fa47c704da162c06fc348a98e83464d71,5c072340f8e500f7e62819ab82bb8998ecd0ef4e,da39a3ee5e6b4b0d3255bfef95601890afd80709,a4e38996343460efde1140975529e97c9f9aa60b,a4e38996343460efde1140975529e97c9f9aa60b,208255716aecce985d8f7a2bb8117b4dee2d53c5,f2921f9fa47c704da162c06fc348a98e83464d71,fae4d5efee5c89ade8adbdf72d3e888074d22a29,...,0,0,0,0,0,0,0,1573190323222,00ff0c9a71cc37a2ebdd0f05,5da1389e4db8ce0c98bd0547
3,3c1e7602176e050694e3a5cf8ba5f6f725e3ec51,a4e38996343460efde1140975529e97c9f9aa60b,f2921f9fa47c704da162c06fc348a98e83464d71,da39a3ee5e6b4b0d3255bfef95601890afd80709,d0af9d9c2709796ee07a0432de0e26298a64e3e8,5c072340f8e500f7e62819ab82bb8998ecd0ef4e,a4e38996343460efde1140975529e97c9f9aa60b,208255716aecce985d8f7a2bb8117b4dee2d53c5,da39a3ee5e6b4b0d3255bfef95601890afd80709,07b466ea3782f19a25830149eb173cfc5c049246,...,0,0,0,0,0,0,0,1573190336238,00ff0c9a71cc37a2ebdd0f05,5da1389e4db8ce0c98bd0547
4,3c1e7602176e050694e3a5cf8ba5f6f725e3ec51,d0af9d9c2709796ee07a0432de0e26298a64e3e8,5c072340f8e500f7e62819ab82bb8998ecd0ef4e,da39a3ee5e6b4b0d3255bfef95601890afd80709,208255716aecce985d8f7a2bb8117b4dee2d53c5,f2921f9fa47c704da162c06fc348a98e83464d71,a4e38996343460efde1140975529e97c9f9aa60b,a4e38996343460efde1140975529e97c9f9aa60b,da39a3ee5e6b4b0d3255bfef95601890afd80709,f2921f9fa47c704da162c06fc348a98e83464d71,...,0,0,0,0,0,0,0,1573190347830,00ff0c9a71cc37a2ebdd0f05,5da1389e4db8ce0c98bd0547


In [26]:
test_df['site_path_timestamp'] = test_df['site_id'] + '_' + test_df['path_id'] +'_' + test_df['wp_tmestamp']

In [27]:
test_df = test_df.sort_values('site_path_timestamp').reset_index(drop=True)

In [28]:
test_df.to_csv(f'{DIR_SAVE_IGNORE}test_wifi_num_{WIFI_NUM}.csv', index=False)