In [1]:
import numpy as np
import pandas as pd
import os

os.chdir('../')

In [2]:
def parse_csv(path, columns, sep=';', chunk_size=5, head_size=4, tail_columns_count = 0):
    data = []
    with open(path) as file:
        for line in file:
            measurement = line.split(sep)
            head = measurement[:head_size]

            if len(measurement[head_size:]) % chunk_size != tail_columns_count:
                raise Exception('Row size incorrect for line: ' + line)

            for chunk_num in range(len(measurement[head_size:]) // chunk_size):
                data.append(head + measurement[head_size + chunk_size * chunk_num:head_size + chunk_size * chunk_num + chunk_size])

    return pd.DataFrame(data, columns=columns)

# Covert WiFi files into Parquet format

In [5]:
settings = {
    'columns': ['Epoch time [ms]', 'Ignore1', 'Ignore2', 'Unknown', 'BSSID', 'SSID', 'RSSI', 'Frequency [MHz]', 'Capabilities'],
}
df = parse_csv(
    'data/validate/WiFi.txt',
    **settings
)
display(df)

df.to_parquet('data/validate/WiFi.parquet')
parse_csv('data/train/WiFi.txt', **settings).to_parquet('data/train/WiFi.parquet')
parse_csv('data/test/WiFi.txt', **settings).to_parquet('data/test/WiFi.parquet')

Unnamed: 0,Epoch time [ms],Ignore1,Ignore2,Unknown,BSSID,SSID,RSSI,Frequency [MHz],Capabilities
0,1497426495755,94062335923,25013238017,11,70:50:af:2d:ed:d9,SKY484B1,-36,2462,[WPA2-PSK-CCMP][WPS][ESS]
1,1497426495755,94062335923,25013238017,11,74:44:01:f9:12:4c,virginmedia7072875,-66,2437,[WPA-PSK-CCMP+TKIP][WPA2-PSK-CCMP+TKIP][WPS][ESS]
2,1497426495755,94062335923,25013238017,11,22:03:d8:03:39:62,BTWifi-X,-66,2462,[WPA-EAP-CCMP+TKIP][WPA2-EAP-CCMP+TKIP-preauth...
3,1497426495755,94062335923,25013238017,11,1c:a5:32:a2:fd:1b,EXT2-VM7640780,-72,2437,[WPA2-PSK-CCMP][ESS]
4,1497426495755,94062335923,25013238017,11,00:03:d8:03:39:62,BTHub3-PHSH,-65,2462,[WPA-PSK-CCMP+TKIP][WPA2-PSK-CCMP+TKIP][WPS][ESS]
...,...,...,...,...,...,...,...,...,...
1486473,1499462768865,39302909359643,225472967673,8,92:4d:4a:2a:f5:7c,BTWifi-X,-82,2462,[WPA2-EAP-CCMP-preauth][ESS]
1486474,1499462768865,39302909359643,225472967673,8,64:0f:29:0d:f7:b1,Albion Dental,-70,2432,[WPA2-PSK-CCMP][ESS]
1486475,1499462768865,39302909359643,225472967673,8,92:4d:4a:2a:f5:7f,BTWifi-with-FON,-82,2462,[ESS]
1486476,1499462768865,39302909359643,225472967673,8,64:0f:29:0d:f7:b4,BTOpenzone,-70,2432,[ESS]


In [6]:
# Covert GSM files into Parquet format

In [7]:
settings = {
    'columns': ['Epoch time [ms]', 'Ignore1', 'Ignore2' , 'ID', 'SNR', 'Azimuth [degrees]', 'Elevation [degrees]'],
    'head_size': 3,
    'chunk_size': 4,
    'sep': ' ',
    'tail_columns_count': 1,
}
df = parse_csv(
    'data/validate/GPS.txt',
    **settings,
)
display(df)

df.to_parquet('data/validate/GPS.parquet')
parse_csv('data/train/GPS.txt', **settings).to_parquet('data/train/GPS.parquet')
parse_csv('data/test/GPS.txt', **settings).to_parquet('data/test/GPS.parquet')

Unnamed: 0,Epoch time [ms],Ignore1,Ignore2,ID,SNR,Azimuth [degrees],Elevation [degrees]
0,1497426497900,96207437485,25013238017,168,27.0,109.0,23.0
1,1497426498504,96810820297,25013238017,17,36.0,61.0,33.0
2,1497426498504,96810820297,25013238017,168,27.0,109.0,23.0
3,1497426499510,97817632276,25013238017,15,22.0,179.0,51.0
4,1497426499510,97817632276,25013238017,17,37.0,61.0,33.0
...,...,...,...,...,...,...,...
1516194,1497460951343,19834947491818,-14929687822,169,25.0,132.0,40.0
1516195,1497460952344,19835948809005,-14929687822,10,22.0,99.0,57.0
1516196,1497460952344,19835948809005,-14929687822,169,25.0,132.0,40.0
1516197,1497460953344,19836948478797,-14929687822,10,22.0,99.0,57.0
