In this notebook, I share the method to get a more correct waypoint corresponding to wifi timestamp by linear interpolation.  

If there are some bug or my misunderstanding, please teach me!

In [None]:
import pandas as pd
import numpy as np
import json
import glob
from PIL import Image
import matplotlib.pyplot as plt
from scipy import interpolate

## Read sample data from published dataset

I use [a great dataset](https://www.kaggle.com/kokitanisaka/indoorunifiedwifids) by [@kouki](https://www.kaggle.com/kokitanisaka).
Thanks for sharing.

In [None]:
SAMPLE_SITE = '5a0546857ecc773753327266'
SAMPLE_PATH = '5e158ecff4c3420006d52164'

In [None]:
# get sample path dataframe from kouki's dataset.
sample = pd.read_csv(f'../input/indoorunifiedwifids/{SAMPLE_SITE}_train.csv')
path_df = sample[sample['path']==SAMPLE_PATH]
path_df.loc[:, 'x':]

In [None]:
# visalize waypoint
plt.scatter(path_df['x'], path_df['y'])

In [None]:
print(f'number of wifi data: {len(path_df)}')
print(f'number of wifi unique data: {path_df["x"].nunique()}')

In this sample path, there are 27 wifi data but unique wifi data are only 8, because this dataset are assigning the given waypoint to the closest wifi data.  
This approach is reasonable, but x,y corresponding to some wifi data are not correct values.  
Then, I calculate a more correct waypoint corresponding to the wifi timestamp by linear interpolation.

## Read raw data

In [None]:
# https://github.com/location-competition/indoor-location-competition-20/blob/master/io_f.py
from dataclasses import dataclass

@dataclass
class ReadData:
    acce: np.ndarray
    acce_uncali: np.ndarray
    gyro: np.ndarray
    gyro_uncali: np.ndarray
    magn: np.ndarray
    magn_uncali: np.ndarray
    ahrs: np.ndarray
    wifi: np.ndarray
    ibeacon: np.ndarray
    waypoint: np.ndarray
        

def read_data_file(data_filename):
    acce = []
    acce_uncali = []
    gyro = []
    gyro_uncali = []
    magn = []
    magn_uncali = []
    ahrs = []
    wifi = []
    ibeacon = []
    waypoint = []

    with open(data_filename, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    for line_data in lines:
        line_data = line_data.strip()
        if not line_data or line_data[0] == '#':
            continue

        line_data = line_data.split('\t')

        if line_data[1] == 'TYPE_ACCELEROMETER':
            acce.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_ACCELEROMETER_UNCALIBRATED':
            acce_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_GYROSCOPE':
            gyro.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_GYROSCOPE_UNCALIBRATED':
            gyro_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_MAGNETIC_FIELD':
            magn.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_MAGNETIC_FIELD_UNCALIBRATED':
            magn_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_ROTATION_VECTOR':
            ahrs.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_WIFI':
            sys_ts = line_data[0]
            ssid = line_data[2]
            bssid = line_data[3]
            rssi = line_data[4]
            lastseen_ts = line_data[6]
            wifi_data = [sys_ts, ssid, bssid, rssi, lastseen_ts]
            wifi.append(wifi_data)
            continue

        if line_data[1] == 'TYPE_BEACON':
            ts = line_data[0]
            uuid = line_data[2]
            major = line_data[3]
            minor = line_data[4]
            rssi = line_data[6]
            ibeacon_data = [ts, '_'.join([uuid, major, minor]), rssi]
            ibeacon.append(ibeacon_data)
            continue

        if line_data[1] == 'TYPE_WAYPOINT':
            waypoint.append([int(line_data[0]), float(line_data[2]), float(line_data[3])])

    acce = np.array(acce)
    acce_uncali = np.array(acce_uncali)
    gyro = np.array(gyro)
    gyro_uncali = np.array(gyro_uncali)
    magn = np.array(magn)
    magn_uncali = np.array(magn_uncali)
    ahrs = np.array(ahrs)
    wifi = np.array(wifi)
    ibeacon = np.array(ibeacon)
    waypoint = np.array(waypoint)

    return ReadData(acce, acce_uncali, gyro, gyro_uncali, magn, magn_uncali, ahrs, wifi, ibeacon, waypoint)

In [None]:
# read raw data 
path_file = glob.glob(f'../input/indoor-location-navigation/train/{SAMPLE_SITE}/*/{SAMPLE_PATH}.txt')[0]
example = read_data_file(path_file)
floor = path_file.split('/')[-2]

In [None]:
# visualize waypoint(same as above)
trajectory = example.waypoint
trajectory_timestamp = trajectory[:,0]
trajectory_waypoint = trajectory[:, 1:]
observed_x = trajectory_waypoint[:, 0]
observed_y = trajectory_waypoint[:, 1]

plt.scatter(observed_x, observed_y)

In [None]:
# visualize with map
# json
path_json = f'../input/indoor-location-navigation/metadata/{SAMPLE_SITE}/{floor}/floor_info.json'
with open(path_json) as json_file:
    json_data = json.load(json_file)
width = json_data["map_info"]["width"]
height = json_data["map_info"]["height"]

# image
path_img = f'../input/indoor-location-navigation/metadata/{SAMPLE_SITE}/{floor}/floor_image.png'
im = Image.open(path_img)

fig, ax = plt.subplots(1, 1, figsize=(10, 10), dpi=100)
ax.imshow(np.asarray(im), extent=(0, width, 0, height))
ax.plot(observed_x, observed_y, linewidth=2, marker='o', markersize=7, color='red', label='observed')
plt.legend()
margin=10
ax.set_xlim(observed_x.min()-margin, observed_x.max()+margin)
ax.set_ylim(observed_y.min()-margin, observed_y.max()+margin)
plt.show()

In [None]:
# get wifi timestamp
wifi = example.wifi
wifi_timestamp = np.unique(wifi[:, 0]).astype(int)

## Linear interpolation

In [None]:
def get_waypoint_by_linear_interpolation(
    observed_timestamp: np.ndarray, 
    observed_x:np.ndarray, 
    observed_y:np.ndarray, 
    target_timestamp:np.ndarray, 
    delta_time=500
    ):
    
    """
    observed: known Information
    observed timesatmp, x and y are same shape
    """
    target_waypoint_list = []
    num_interpolation = len(observed_timestamp) - 1 
    # 
    for i in range(num_interpolation):
        # create latent timestamp
        n_split = int((max(observed_timestamp[i:i+2]) - min(observed_timestamp[i:i+2])) / delta_time)
        latent_timestamp = np.linspace(min(observed_timestamp[i:i+2]), max(observed_timestamp[i:i+2]), n_split).astype(int) 
        
        # when x is ascending, latent is ascending too.
        if observed_x[i] < observed_x[i+1]:
            latent_x = np.linspace(min(observed_x[i:i+2]), max(observed_x[i:i+2]), n_split)
        else:
            latent_x = np.linspace(min(observed_x[i:i+2]), max(observed_x[i:i+2]), n_split)[::-1]
        
        # fitting
        fitting_func = interpolate.interp1d(observed_x[i:i+2], observed_y[i:i+2])

        target_x = []
        target_y = []

        # only 
        if i == num_interpolation-1:
            target_idx = min(observed_timestamp[i:i+2]) <= target_timestamp
        elif i == 0:
            target_idx = target_timestamp < max(observed_timestamp[i:i+2])
        else:
            target_idx = (min(observed_timestamp[i:i+2]) <= target_timestamp) & (target_timestamp < max(observed_timestamp[i:i+2]))
        target_use_timestamp = target_timestamp[target_idx]

        # get the target waypoint with the closest timestamp
        for t in target_use_timestamp:
            idx = np.abs(latent_timestamp - t).argmin()
            target_x.append(latent_x[idx])
            
            # although I don't know, there are nan sometimes.
            if np.isnan(fitting_func(latent_x[idx])).sum() > 0:
                # print('y has Nan')
                idx = np.abs(observed_timestamp - t).argmin()
                target_y.append(observed_y[idx])
            else:
                target_y.append(fitting_func(latent_x[idx]))

        assert len(target_x) == len(target_y)
        target_waypoint = np.stack([target_x, target_y], axis=1)
        target_waypoint_list.append(target_waypoint)

    target_waypoint = np.concatenate(target_waypoint_list)
    return target_waypoint

In [None]:
# calc waypoint corresponding to wifi timestamp
wifi_waypoint = get_waypoint_by_linear_interpolation(
    observed_timestamp=trajectory_timestamp, 
    observed_x=observed_x, 
    observed_y=observed_y, 
    target_timestamp=wifi_timestamp, 
)
wifi_waypoint

## Result

In [None]:
wifi_x = wifi_waypoint[:,0]
wifi_y = wifi_waypoint[:,1]

In [None]:
# visualize observed waypoint(red) and interpolated wifi-waypoint(blue)
fig, ax = plt.subplots(1, 1, figsize=(10, 10), dpi=100)
ax.imshow(np.asarray(im), extent=(0, width, 0, height))
ax.plot(observed_x, observed_y, linewidth=2, marker='o', alpha=1.0, markersize=7, color='red', label='observed')
ax.plot(wifi_x, wifi_y, linewidth=2, marker='o', alpha=0.5, markersize=7, color='blue', label='wifi')
plt.legend()
margin=10
ax.set_xlim(wifi_x.min()-margin, wifi_x.max()+margin)
ax.set_ylim(wifi_y.min()-margin, wifi_y.max()+margin)
plt.show()

In [None]:
path_df['wifi_x'] = wifi_x
path_df['wifi_y'] = wifi_y
path_df.loc[:, 'x':]

↑  
  
x,y: waypoint before interpolation  
wifi_x,wifi_y: waypoint after interpolation  
  
**NOTE:** be careful to make sure the order of waypoint. I recommend to merge based on timestamp. 

The difference between before and after interpolation is as follows.

In [None]:
plt.hist(path_df['x'] - path_df['wifi_x'])

In [None]:
plt.hist(path_df['y'] - path_df['wifi_y'])

You can interpolate the waypoint of all wifi data by applying the same process to other paths. Thanks!