# WiFi Localisation Starting Notebook
## Cyberthon 2022 organised by CSIT, DCO and HCI

This notebook is to help people interact with the data, you may use it if you want to or not.

ALT-TAB LABS LLP &copy; 2019 - 2022 All Rights Reserved

In [1]:
# import libraries
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statistics import mean
import math

# feel free to add any library you need

In [2]:
# load training data
with open('wifi_train.json', 'r') as f:
    train_json = json.load(f)
df_train = pd.read_csv('train.csv', index_col=0, dtype={'scanid': str, 'locationX': np.float64, 'locationY': np.float64})
scanid = df_train['scanid'].values
locationX = df_train['locationX'].values
locationY = df_train['locationY'].values
location_train = list(zip(locationX, locationY))

In [3]:
# preprocess your data

timestamp_train = []
mac_list = set()


def rssi_to_distance(rssi):
    return pow(10, ((27.55-20*math.log10(2.4e9)+rssi)/20))  # 10^((measured power-20*log(base10)(frequency)+signal strength)/20)


for ssid in scanid:
    timestamp_train.append(train_json[ssid]['timestamp']['value']['_seconds'])
    data = train_json[ssid]['data']
    mac_list.update(data.keys())

rssi_train = {mac: [] for mac in mac_list}
for ssid in scanid:
    for mac in mac_list:
        if mac in train_json[ssid]['data'].keys():
            d = [rssi_to_distance(r) for r in train_json[ssid]['data'][mac]]
            d = mean(d)
            rssi_train[mac] += [d]
        else:
            rssi_train[mac] += [0.0]  # fill NA with 0

assert all([len(rssi) == len(location_train) for rssi in rssi_train.values()])
features_df = pd.concat([pd.DataFrame({'timestamp': timestamp_train}), pd.DataFrame(rssi_train)], axis=1)
assert len(features_df) == len(location_train)

In [4]:
from sklearn import *

# model = neighbors.KNeighborsRegressor(n_neighbors=5, weights='distance', p=30, n_jobs=-1)
model2 = ensemble.ExtraTreesRegressor(n_estimators=200, verbose=1, n_jobs=-1)
model3 = neighbors.KNeighborsRegressor(n_neighbors=3, weights='distance', p=30, n_jobs=-1)
# model = multioutput.MultiOutputRegressor(model, n_jobs=-1)
model2 = multioutput.MultiOutputRegressor(model2, n_jobs=-1)
model3 = multioutput.MultiOutputRegressor(model3, n_jobs=-1)

# model.fit(features_df, location_train)
model2.fit(features_df, location_train)
model3.fit(features_df, location_train)

MultiOutputRegressor(estimator=KNeighborsRegressor(n_jobs=-1, n_neighbors=3,
                                                   p=30, weights='distance'),
                     n_jobs=-1)

In [5]:
# load test data
with open('wifi_test.json', 'r') as f:
    test_json = json.load(f)

In [6]:
# preprocess your test data

timestamp_test = []
df_test = pd.read_csv('submission.csv')
scanid_test = df_test['scanid'].values

for ssid in scanid_test:
    timestamp_test.append(test_json[ssid]['timestamp']['value']['_seconds'])
    data = test_json[ssid]['data']

rssi_test = {mac: [] for mac in mac_list}
for ssid in scanid_test:
    for mac in mac_list:
        if mac in test_json[ssid]['data'].keys():
            d = [rssi_to_distance(r) for r in test_json[ssid]['data'][mac]]
            d = mean(d)
            rssi_test[mac] += [d]
        else:
            rssi_test[mac] += [0.0]  # fill NA with 0

features_df_test = pd.concat([pd.DataFrame({'timestamp': timestamp_test}), pd.DataFrame(rssi_test)], axis=1)

In [7]:
# predict
# prediction1 = model.predict(features_df_test)
prediction2 = model2.predict(features_df_test)
prediction3 = model3.predict(features_df_test)

In [8]:
prediction = prediction2 * 0.1 + prediction3 * 0.9
# prediction = prediction1
df_test['locationX'] = prediction[:, 0]
df_test['locationY'] = prediction[:, 1]
df_test.to_csv('submission.csv', index=False)

# Submitting to the CTFSG DataScience Grader
ALT-TAB LABS LLP &copy; 2020-2021 All Rights Reserved

In [9]:
# Connect to graders
import sys
# insert at 1, 0 is the script path (or '' in REPL)
sys.path.insert(1, 'C:/Users/alien/Documents/PyCharm Projects/Cyberthon 2021/pyctfsglib.py')
import pyctfsglib as ctfsg
import random

USER_TOKEN = "WrlLCkymxwtgFwRHZsdmKfSwcdqIpnqoXEtRkciVRZJfBJUgcEJoxVZjNTQRdqkR" # You need to fill this up
GRADER_URL = random.choice([
  "http://chals.cyberthon22t.ctf.sg:50401/",
  "http://chals.cyberthon22t.ctf.sg:50402/"
])

grader = ctfsg.DSGraderClient(GRADER_URL, USER_TOKEN)
grader.submitFile('submission.csv')

DSGraderClient: Successfully Connected!
[SERVER] MOTD: CHECK your USER_TOKEN and GRADER_URL HTTP address! I'm WIFI_LOCATION @ed623a2c0b67
ProofOfWork Challenge =>  ('CTFSGRBae014fdfc1fe3f3afa973c7a0fcc32e1', 22)
ProofOfWork Answer Found! =>  4683687


'{"challenge":{"name":"WiFi Triangulation"},"id":"cl2bs7dmwid5r082710r9yy9k","status":"PARTIALLY_CORRECT","multiplier":0.9996,"submittedBy":{"username":"hci-69"},"createdAt":"2022-04-23T11:31:52Z"}'