# KNN

* WiFi Fingerprint-based Indoor Localization using `kNN` regression.

In [1]:
import os
import sys

PROJECT_HOME = "./../.."
# Add utilities library for Microsoft Indoor Localization 2.0 Dataset
CODE_DIR = os.path.join(PROJECT_HOME, "code")
print("The code directory is located at", CODE_DIR)
sys.path.append(CODE_DIR)

The code directory is located at ./../../code


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from collections import defaultdict
from sklearn.neighbors import KNeighborsRegressor
from sklearn.decomposition import PCA
from sklearn.decomposition import KernelPCA
from sklearn.preprocessing import MaxAbsScaler
from tqdm import tqdm

from py_indoor_loc.floor_map import read_floor_data, extract_floor_map_geometries

In [3]:
%matplotlib inline

In [4]:
np.random.seed(1)

In [5]:
FLOOR_MAP_BASE_DIR = "../../data/metadata/"
DATA_IN_DIR = "../../data/wifi_fingerprint_manual/train/"
DATA_OUT_DIR = "../../data/knn_manual/"

## Read Train/Test Assignment

In [6]:
def get_track_filepaths(assignment_df: pd.DataFrame, base_data_dir: str) -> list[str]:
  result = []

  for _, row in assignment_df.iterrows():
    file_path = f"{base_data_dir}{row['site_id']}/{row['floor_id']}/{row['track_id']}_wifi_fingerprint.csv"
    if not os.path.exists(file_path):
      print(f"File {file_path} does not exist.")
      continue
    result.append(file_path)
    
  return result

In [7]:
def get_band(freq):
  d2400 = np.abs(freq - 2400)
  d5000 = np.abs(freq - 5000)
  band = np.zeros_like(freq)
  band[d2400 < d5000] = 2400
  band[d2400 >= d5000] = 5000
  return band


def extract_bssid_set(wifi_fingerprint_df_list: list[pd.DataFrame], min_times: int = 1000) -> set[str]:
  """
  Extract the set of bssid which occurs at least a specified number of times.

  Args:
    wifi_fingerprint_df_list: a list of 

  Refs:
    https://www.kaggle.com/code/devinanzelmo/wifi-features/notebook
  """
  bssid_sample_count = defaultdict(int)

  for df in wifi_fingerprint_df_list:
    df_bssid_sample_count = df["bssid"].value_counts().to_dict()
    for bssid, count in df_bssid_sample_count.items():
      bssid_sample_count[bssid] += count
  
  bssid_set = {k for k, v in bssid_sample_count.items() if v >= min_times}

  return bssid_set


def create_fingerprint_vector(group_data: pd.DataFrame,
                              bssid_vector: np.ndarray,
                              not_seen_rssi: float = -1000) -> np.ndarray:
  v = np.zeros_like(bssid_vector, dtype=np.float32) + not_seen_rssi

  for bssid, rssi in group_data[["bssid", "rssi"]].values:
    v[bssid_vector == bssid] = rssi

  return v


def extract_fingerprint_df(fingerprint_files, 
                           wifi_band=(2400, 5000), 
                           min_samples: int = 0,
                           not_seen_rssi: float = -1000,
                           max_scan_time_gap_ms: float = 2000):
  if isinstance(wifi_band, int):
    wifi_band = {wifi_band}

  wifi_fingerprint_df_list = [pd.read_csv(file) for file in fingerprint_files]

  # Adding frequency band
  for df in wifi_fingerprint_df_list:
    df["freq_band"] = get_band(df["freq"].values)

  wifi_fingerprint_fb_df_list = [
      df[df["freq_band"].isin(wifi_band) & (df["sys_ts"] - df["last_seen_ts"] <= max_scan_time_gap_ms)] for df in wifi_fingerprint_df_list
  ]

  bssid_set = extract_bssid_set(wifi_fingerprint_fb_df_list, min_times=min_samples)
  print(f"The number of BSSIDs with at least {min_samples} samples: {len(bssid_set)}")
  bssid_vector = np.array(list(bssid_set))

  fingerprint_tuples = []
  for df in tqdm(wifi_fingerprint_fb_df_list):
    for (sys_ts, x, y), group_data in df.groupby(["sys_ts", "x", "y"]):
      fingerprint_vector = create_fingerprint_vector(group_data,
                                                     bssid_vector,
                                                     not_seen_rssi=not_seen_rssi)
      fingerprint_tuples.append((x, y, fingerprint_vector))

  print(f"The number of fingerprints: {len(fingerprint_tuples)}")

  return pd.DataFrame(fingerprint_tuples, columns=["x", "y", "v"]), bssid_vector

In [8]:
def translator(test_bssid, train_bssid, unseen_rssi=-100):
  """
  A translator translates the X_test in the test space into the associated vector in the train space.
  """
  def c(X_test):
    X_test_translated = np.zeros((X_test.shape[0], train_bssid.shape[0])) + unseen_rssi
    for i, train_b in enumerate(train_bssid):
      indices = np.where(test_bssid == train_b)[0]

      if len(indices) == 1:
        X_test_translated[:, i] = X_test[:, indices[0]]
    
    return X_test_translated
  
  return c
    

### Read Train/Test Assignment

In [9]:
train_test_assignment = pd.read_csv("../../data/output/train_test_assignment.csv")
train_test_assignment["site_id"] = train_test_assignment["site_id"].astype("category")
train_test_assignment["floor_id"] = train_test_assignment["floor_id"].astype("category")
train_test_assignment["dataset"] = train_test_assignment["dataset"].astype("category")

In [10]:
site_and_floor = train_test_assignment[["site_id", "floor_id"]].drop_duplicates().values

In [11]:
print(f"The number of (site, floor) pairs: {len(site_and_floor)}")

The number of (site, floor) pairs: 981


In [23]:
def read_train_test_data(site_id: str, floor_id: str, train_test_assignment: pd.DataFrame, base_data_dir: str, **kwargs) -> tuple[pd.DataFrame, np.ndarray, pd.DataFrame, np.ndarray] | None:

  ap_not_seen_rssi = float(kwargs.get("ap_not_seen_rssi", "-1000"))
  print(f"[INFO] Reading track files for site_id={site_id}, floor_id={floor_id}")

  track_assignment = train_test_assignment[(train_test_assignment["site_id"] == site_id) & (train_test_assignment["floor_id"] == floor_id)]
  
  if track_assignment.shape[0] <= 1:
    print(f"[WARN] There must be at least 2 tracks, but found {track_assignment.shape[0]}.")
    return None
  
  train_tracks = track_assignment[track_assignment["dataset"] == "train"]
  test_tracks = track_assignment[track_assignment["dataset"] == "test"]
  train_files = get_track_filepaths(train_tracks, base_data_dir)
  test_files = get_track_filepaths(test_tracks, base_data_dir)

  print(f"[INFO] The number of train files: {len(train_files)}")
  print(f"[INFO] The number of test files: {len(test_files)}")
  
  train_fingerprint_df, train_bssid = extract_fingerprint_df(train_files, min_samples=0, not_seen_rssi=ap_not_seen_rssi)
  test_fingerprint_df, test_bssid = extract_fingerprint_df(test_files, min_samples=0, not_seen_rssi=ap_not_seen_rssi)

  return train_fingerprint_df, train_bssid, test_fingerprint_df, test_bssid

## Visualize train/test data with floormap

In [13]:
def visualize_and_save_fig(site_id, floor_id, train_fingerprint_df, test_fingerprint_df):
  
  floor_info, floor_map = read_floor_data(os.path.join(FLOOR_MAP_BASE_DIR, site_id, floor_id))

  transform_func = lambda coords: coords
  (
    floor_polygons,
    store_polygons,
    x_min,
    y_min,
    x_max,
    y_max,
    width_meter,
    height_meter,
  ) = extract_floor_map_geometries(floor_map, floor_info, transform=transform_func)
  fig, ax = plt.subplots(1, 1, figsize=(18, 12))

  for floor_polygon in floor_polygons:
    x, y = floor_polygon.exterior.xy
    _ = ax.plot(x, y, color="green")

  for store_polygon in store_polygons:
    x, y = store_polygon.exterior.xy
    _ = ax.plot(x, y, color="blue")

  _ = ax.set_xlabel("x")
  _ = ax.set_ylabel("y")
  _ = ax.set_title("Floormap")

  train_fingerprint_location = transform_func(train_fingerprint_df[["x", "y"]].values)
  test_fingerprint_location = transform_func(test_fingerprint_df[["x", "y"]].values)
  _ = ax.scatter(train_fingerprint_location[:, 0],
            train_fingerprint_location[:, 1],
            marker="o",
            color="red",
            label="train")
  _ = ax.scatter(test_fingerprint_location[:, 0],
            test_fingerprint_location[:, 1],
            marker="^",
            color="blue",
            label="test")
  _ = ax.legend()

  _ = plt.ticklabel_format(useOffset=False)
  _ = plt.savefig(os.path.join(DATA_OUT_DIR, site_id, floor_id, "floor_map_with_waypoints.png"))

  plt.close()

## `kNN` Regression

In [14]:
def run_knn_regression(train_fingerprint_df, train_bssid, test_fingerprint_df, test_bssid, results) -> bool:

  try:
    X_train = np.vstack(train_fingerprint_df["v"].values.tolist())
    y_train = train_fingerprint_df[["x", "y"]].values

    X_test = np.vstack(test_fingerprint_df["v"].values.tolist())
    y_test = test_fingerprint_df[["x", "y"]].values

    assert len(X_train) == len(y_train)
    assert len(X_test) == len(y_test)

    train_test_translate = translator(test_bssid, train_bssid, unseen_rssi=-1000)
    X_test_translated = train_test_translate(X_test)

    assert X_test_translated.shape[1] == X_train.shape[1]

    print("[INFO] Scaling data using MaxAbsScaler")
    scaler = MaxAbsScaler().fit(X_train)
    X_train_scaled = scaler.transform(X_train)
    X_test_scaled = scaler.transform(X_test_translated)
    print("[INFO] PCA: Choosing the number of components.")
    pca = PCA()
    _ = pca.fit(X_train_scaled)
    explained_variance_ratio_csum = np.cumsum(pca.explained_variance_ratio_)
    n_components = np.where(explained_variance_ratio_csum >= 0.9)[0][0] + 1
    print(f"[INFO] PCA: n_components={n_components} -> explained variance ratio: {explained_variance_ratio_csum[n_components - 1]:.2f}")

    pca = PCA(n_components=n_components)
    _ = pca.fit(X_train_scaled)
    X_train_pca = pca.transform(X_train_scaled)
    X_test_pca = pca.transform(X_test_scaled)

    print(f"[INFO] Running kNN regressor and reporting errors")
    k_values = [1, 2, 3, 4]
    model_list = []
    error_list = []

    for n_neighbors in k_values:
      knn = KNeighborsRegressor(n_neighbors=n_neighbors, weights="uniform", algorithm="ball_tree")
      knn.fit(X_train_pca, y_train)
      y_pred = knn.predict(X_test_pca)
      errors = np.sqrt(np.sum((y_test - y_pred) ** 2, axis=1))
      
      model_list.append(knn)
      error_list.append(errors)
    

    for k, errors in zip(k_values, error_list):
      results.append({
        "site_id": site_id,
        "floor_id": floor_id,
        "algorithm": f"{k}-NN",
        "error_mean": errors.mean(),
        "error_std": errors.std(),
        "test_size": errors.shape[0]
      })

    return True
  
  except Exception as e:
    print(f"[WARN] Failed to run knn regressions, caused by {str(e)}")
    return False


## Pipeline

In [15]:
import gc

In [33]:
result_frames = []

for site_id, floor_id in tqdm(site_and_floor):
  track_assignment = train_test_assignment[(train_test_assignment["site_id"] == site_id) & (train_test_assignment["floor_id"] == floor_id)]
  if track_assignment.shape[0] <= 1:
    print("[WARN] There must be at least 2 tracks, found {}".format(track_assignment.shape[0]))
    continue

  if os.path.exists(os.path.join(DATA_OUT_DIR, site_id, floor_id, "knn_results.csv")):
    try:
      knn_result_df = pd.read_csv(os.path.join(DATA_OUT_DIR, site_id, floor_id, "knn_results.csv"))
      result_frames.append(knn_result_df)
    except Exception as ignored:
      print(os.path.join(DATA_OUT_DIR, site_id, floor_id, "knn_results.csv"))
    continue
  
  knn_results = []

  print(f"[INFO] Starting to run kNN for site {site_id} / floor {floor_id}")
  print("[INFO] Creating directory to store output data (if not exists)")
  os.makedirs(os.path.join(DATA_OUT_DIR, site_id, floor_id), exist_ok=True)

  print("[INFO] Reading train/test data")
  data_read_result = read_train_test_data(site_id, floor_id, train_test_assignment, DATA_IN_DIR)
  if data_read_result is None:
    continue
  
  train_fingerprint_df, train_bssid, test_fingerprint_df, test_bssid = data_read_result

  print("[INFO] Creating and saving visualization")
  if not os.path.exists(os.path.join(DATA_OUT_DIR, site_id, floor_id, "floor_map_with_waypoints.png")):
    visualize_and_save_fig(site_id, floor_id, train_fingerprint_df, test_fingerprint_df)

  print("[INFO] Running kNN regressor with PCA")
  _ = run_knn_regression(train_fingerprint_df, train_bssid, test_fingerprint_df, test_bssid, knn_results)

  knn_result_df = pd.DataFrame(knn_results)
  knn_result_df.to_csv(os.path.join(DATA_OUT_DIR, site_id, floor_id, "knn_results.csv"), index=False)
  result_frames.append(knn_result_df)

  # Enforce garbage collection
  del train_fingerprint_df
  del train_bssid
  del test_fingerprint_df
  del test_bssid
  gc.collect()

  0%|          | 0/981 [00:00<?, ?it/s]

  9%|▉         | 93/981 [00:00<00:02, 296.86it/s]

[WARN] There must be at least 2 tracks, found 1
[WARN] There must be at least 2 tracks, found 1
[WARN] There must be at least 2 tracks, found 1
[WARN] There must be at least 2 tracks, found 1
[WARN] There must be at least 2 tracks, found 1


 13%|█▎        | 123/981 [00:00<00:02, 290.02it/s]

[WARN] There must be at least 2 tracks, found 1
[INFO] Starting to run kNN for site 5cd56b8be2acfd2d33b5db68 / floor B1
[INFO] Creating directory to store output data (if not exists)
[INFO] Reading train/test data
[INFO] Reading track files for site_id=5cd56b8be2acfd2d33b5db68, floor_id=B1
[INFO] The number of train files: 1
[INFO] The number of test files: 3
The number of BSSIDs with at least 0 samples: 4


100%|██████████| 1/1 [00:00<00:00, 379.85it/s]


The number of fingerprints: 2
The number of BSSIDs with at least 0 samples: 2


100%|██████████| 3/3 [00:00<00:00, 470.30it/s]


The number of fingerprints: 3
[INFO] Creating and saving visualization
[INFO] Running kNN regressor with PCA
[INFO] Scaling data using MaxAbsScaler
[INFO] PCA: Choosing the number of components.
[INFO] PCA: n_components=1 -> explained variance ratio: 1.00
[INFO] Running kNN regressor and reporting errors
[WARN] Failed to run knn regressions, caused by Expected n_neighbors <= n_samples,  but n_samples = 2, n_neighbors = 3


 17%|█▋        | 163/981 [00:00<00:03, 225.60it/s]

[WARN] There must be at least 2 tracks, found 1


 19%|█▉        | 188/981 [00:00<00:03, 213.15it/s]

[WARN] There must be at least 2 tracks, found 1
[WARN] There must be at least 2 tracks, found 1
[WARN] There must be at least 2 tracks, found 1
[INFO] Starting to run kNN for site 5cd56b96e2acfd2d33b5ef8f / floor F5
[INFO] Creating directory to store output data (if not exists)
[INFO] Reading train/test data
[INFO] Reading track files for site_id=5cd56b96e2acfd2d33b5ef8f, floor_id=F5
[INFO] The number of train files: 0
[INFO] The number of test files: 2
The number of BSSIDs with at least 0 samples: 0


0it [00:00, ?it/s]


The number of fingerprints: 0
The number of BSSIDs with at least 0 samples: 100


100%|██████████| 2/2 [00:00<00:00, 43.37it/s]

The number of fingerprints: 48
[INFO] Creating and saving visualization
[INFO] Running kNN regressor with PCA
[WARN] Failed to run knn regressions, caused by need at least one array to concatenate



 22%|██▏       | 211/981 [00:01<00:04, 165.26it/s]

[WARN] There must be at least 2 tracks, found 1
[WARN] There must be at least 2 tracks, found 1
[WARN] There must be at least 2 tracks, found 1


 25%|██▌       | 249/981 [00:01<00:03, 210.74it/s]

[INFO] Starting to run kNN for site 5cd56ba5e2acfd2d33b60e03 / floor B1
[INFO] Creating directory to store output data (if not exists)
[INFO] Reading train/test data
[INFO] Reading track files for site_id=5cd56ba5e2acfd2d33b60e03, floor_id=B1
[INFO] The number of train files: 0
[INFO] The number of test files: 2
The number of BSSIDs with at least 0 samples: 0


0it [00:00, ?it/s]


The number of fingerprints: 0
The number of BSSIDs with at least 0 samples: 87


100%|██████████| 2/2 [00:00<00:00, 51.18it/s]


The number of fingerprints: 43
[INFO] Creating and saving visualization
[INFO] Running kNN regressor with PCA
[WARN] Failed to run knn regressions, caused by need at least one array to concatenate
[WARN] There must be at least 2 tracks, found 1
[WARN] There must be at least 2 tracks, found 1
[WARN] There must be at least 2 tracks, found 1
[INFO] Starting to run kNN for site 5cd56babe2acfd2d33b61826 / floor F4
[INFO] Creating directory to store output data (if not exists)
[INFO] Reading train/test data
[INFO] Reading track files for site_id=5cd56babe2acfd2d33b61826, floor_id=F4
[INFO] The number of train files: 0
[INFO] The number of test files: 2
The number of BSSIDs with at least 0 samples: 0


0it [00:00, ?it/s]

The number of fingerprints: 0





The number of BSSIDs with at least 0 samples: 136


100%|██████████| 2/2 [00:00<00:00, 28.70it/s]

The number of fingerprints: 79
[INFO] Creating and saving visualization
[INFO] Running kNN regressor with PCA
[WARN] Failed to run knn regressions, caused by need at least one array to concatenate



 40%|████      | 396/981 [00:01<00:02, 251.21it/s]

[WARN] There must be at least 2 tracks, found 1


 44%|████▎     | 429/981 [00:01<00:02, 261.61it/s]

[WARN] There must be at least 2 tracks, found 1
[INFO] Starting to run kNN for site 5cd56c03e2acfd2d33b69c1f / floor F4
[INFO] Creating directory to store output data (if not exists)
[INFO] Reading train/test data
[INFO] Reading track files for site_id=5cd56c03e2acfd2d33b69c1f, floor_id=F4
[INFO] The number of train files: 0
[INFO] The number of test files: 2
The number of BSSIDs with at least 0 samples: 0


0it [00:00, ?it/s]


The number of fingerprints: 0
The number of BSSIDs with at least 0 samples: 301


100%|██████████| 2/2 [00:00<00:00, 31.09it/s]

The number of fingerprints: 61
[INFO] Creating and saving visualization
[INFO] Running kNN regressor with PCA
[WARN] Failed to run knn regressions, caused by need at least one array to concatenate



 47%|████▋     | 461/981 [00:02<00:02, 197.82it/s]

[WARN] There must be at least 2 tracks, found 1
[WARN] There must be at least 2 tracks, found 1
[INFO] Starting to run kNN for site 5cd56c17e2acfd2d33b6c161 / floor F2
[INFO] Creating directory to store output data (if not exists)
[INFO] Reading train/test data
[INFO] Reading track files for site_id=5cd56c17e2acfd2d33b6c161, floor_id=F2
[INFO] The number of train files: 0
[INFO] The number of test files: 2
The number of BSSIDs with at least 0 samples: 0


0it [00:00, ?it/s]


The number of fingerprints: 0
The number of BSSIDs with at least 0 samples: 64


100%|██████████| 2/2 [00:00<00:00, 62.90it/s]
 50%|████▉     | 487/981 [00:02<00:02, 176.67it/s]

The number of fingerprints: 36
[INFO] Creating and saving visualization
[INFO] Running kNN regressor with PCA
[WARN] Failed to run knn regressions, caused by need at least one array to concatenate


 57%|█████▋    | 555/981 [00:02<00:01, 238.22it/s]

[WARN] There must be at least 2 tracks, found 1
[WARN] There must be at least 2 tracks, found 1
[WARN] There must be at least 2 tracks, found 1
[WARN] There must be at least 2 tracks, found 1
[WARN] There must be at least 2 tracks, found 1
[INFO] Starting to run kNN for site 5cd969b839e2fc0b4afe6edc / floor F1
[INFO] Creating directory to store output data (if not exists)
[INFO] Reading train/test data
[INFO] Reading track files for site_id=5cd969b839e2fc0b4afe6edc, floor_id=F1
[INFO] The number of train files: 0
[INFO] The number of test files: 2
The number of BSSIDs with at least 0 samples: 0


0it [00:00, ?it/s]


The number of fingerprints: 0
The number of BSSIDs with at least 0 samples: 366


100%|██████████| 2/2 [00:00<00:00, 19.48it/s]


The number of fingerprints: 114
[INFO] Creating and saving visualization
[INFO] Running kNN regressor with PCA
[WARN] Failed to run knn regressions, caused by need at least one array to concatenate
[INFO] Starting to run kNN for site 5cd969b839e2fc0b4afe6edc / floor F2
[INFO] Creating directory to store output data (if not exists)
[INFO] Reading train/test data
[INFO] Reading track files for site_id=5cd969b839e2fc0b4afe6edc, floor_id=F2
[INFO] The number of train files: 0
[INFO] The number of test files: 3
The number of BSSIDs with at least 0 samples: 0


0it [00:00, ?it/s]


The number of fingerprints: 0
The number of BSSIDs with at least 0 samples: 326


100%|██████████| 3/3 [00:00<00:00, 39.75it/s]


The number of fingerprints: 84
[INFO] Creating and saving visualization
[INFO] Running kNN regressor with PCA
[WARN] Failed to run knn regressions, caused by need at least one array to concatenate


 60%|█████▉    | 584/981 [00:03<00:02, 132.50it/s]

[INFO] Starting to run kNN for site 5cd969bb39e2fc0b4afe7079 / floor L5
[INFO] Creating directory to store output data (if not exists)
[INFO] Reading train/test data
[INFO] Reading track files for site_id=5cd969bb39e2fc0b4afe7079, floor_id=L5
[INFO] The number of train files: 0
[INFO] The number of test files: 2
The number of BSSIDs with at least 0 samples: 0


0it [00:00, ?it/s]


The number of fingerprints: 0
The number of BSSIDs with at least 0 samples: 104


100%|██████████| 2/2 [00:00<00:00, 19.55it/s]


The number of fingerprints: 129
[INFO] Creating and saving visualization
[INFO] Running kNN regressor with PCA
[WARN] Failed to run knn regressions, caused by need at least one array to concatenate
[INFO] Starting to run kNN for site 5cd969bc39e2fc0b4afe71ac / floor F2
[INFO] Creating directory to store output data (if not exists)
[INFO] Reading train/test data
[INFO] Reading track files for site_id=5cd969bc39e2fc0b4afe71ac, floor_id=F2
[INFO] The number of train files: 0
[INFO] The number of test files: 2
The number of BSSIDs with at least 0 samples: 0


0it [00:00, ?it/s]


The number of fingerprints: 0
The number of BSSIDs with at least 0 samples: 204


100%|██████████| 2/2 [00:00<00:00, 36.95it/s]


The number of fingerprints: 50
[INFO] Creating and saving visualization
[INFO] Running kNN regressor with PCA
[WARN] Failed to run knn regressions, caused by need at least one array to concatenate
[INFO] Starting to run kNN for site 5cd969bc39e2fc0b4afe71ac / floor F3
[INFO] Creating directory to store output data (if not exists)
[INFO] Reading train/test data
[INFO] Reading track files for site_id=5cd969bc39e2fc0b4afe71ac, floor_id=F3
[INFO] The number of train files: 0
[INFO] The number of test files: 3
The number of BSSIDs with at least 0 samples: 0


0it [00:00, ?it/s]


The number of fingerprints: 0
The number of BSSIDs with at least 0 samples: 185


100%|██████████| 3/3 [00:00<00:00, 58.57it/s]


The number of fingerprints: 47
[INFO] Creating and saving visualization
[INFO] Running kNN regressor with PCA
[WARN] Failed to run knn regressions, caused by need at least one array to concatenate


 62%|██████▏   | 606/981 [00:03<00:04, 79.24it/s] 

[INFO] Starting to run kNN for site 5cd969c339e2fc0b4afe7775 / floor B1
[INFO] Creating directory to store output data (if not exists)
[INFO] Reading train/test data
[INFO] Reading track files for site_id=5cd969c339e2fc0b4afe7775, floor_id=B1
[INFO] The number of train files: 0
[INFO] The number of test files: 2
The number of BSSIDs with at least 0 samples: 0


0it [00:00, ?it/s]


The number of fingerprints: 0
The number of BSSIDs with at least 0 samples: 76


100%|██████████| 2/2 [00:00<00:00, 57.93it/s]
 64%|██████▎   | 623/981 [00:03<00:04, 79.29it/s]

The number of fingerprints: 37
[INFO] Creating and saving visualization
[INFO] Running kNN regressor with PCA
[WARN] Failed to run knn regressions, caused by need at least one array to concatenate
[WARN] There must be at least 2 tracks, found 1
[INFO] Starting to run kNN for site 5cd969c839e2fc0b4afe7ff0 / floor F10
[INFO] Creating directory to store output data (if not exists)
[INFO] Reading train/test data
[INFO] Reading track files for site_id=5cd969c839e2fc0b4afe7ff0, floor_id=F10
[INFO] The number of train files: 0
[INFO] The number of test files: 2
The number of BSSIDs with at least 0 samples: 0


0it [00:00, ?it/s]


The number of fingerprints: 0
The number of BSSIDs with at least 0 samples: 65


100%|██████████| 2/2 [00:00<00:00, 78.12it/s]


The number of fingerprints: 24
[INFO] Creating and saving visualization
[INFO] Running kNN regressor with PCA
[WARN] Failed to run knn regressions, caused by need at least one array to concatenate


 65%|██████▍   | 637/981 [00:04<00:04, 75.22it/s]

[WARN] There must be at least 2 tracks, found 1
[WARN] There must be at least 2 tracks, found 1


 70%|███████   | 689/981 [00:04<00:02, 125.48it/s]

[WARN] There must be at least 2 tracks, found 1
[WARN] There must be at least 2 tracks, found 1
[WARN] There must be at least 2 tracks, found 1
[WARN] There must be at least 2 tracks, found 1
[WARN] There must be at least 2 tracks, found 1
[WARN] There must be at least 2 tracks, found 1
[WARN] There must be at least 2 tracks, found 1
[WARN] There must be at least 2 tracks, found 1


 73%|███████▎  | 716/981 [00:04<00:01, 150.68it/s]

[WARN] There must be at least 2 tracks, found 1


 80%|████████  | 786/981 [00:04<00:01, 187.31it/s]

[INFO] Starting to run kNN for site 5cdac625e403deddaf4680d2 / floor F4
[INFO] Creating directory to store output data (if not exists)
[INFO] Reading train/test data
[INFO] Reading track files for site_id=5cdac625e403deddaf4680d2, floor_id=F4
[INFO] The number of train files: 0
[INFO] The number of test files: 2
The number of BSSIDs with at least 0 samples: 0


0it [00:00, ?it/s]


The number of fingerprints: 0
The number of BSSIDs with at least 0 samples: 201


100%|██████████| 2/2 [00:00<00:00, 29.81it/s]


The number of fingerprints: 50
[INFO] Creating and saving visualization
[INFO] Running kNN regressor with PCA
[WARN] Failed to run knn regressions, caused by need at least one array to concatenate


 82%|████████▏ | 808/981 [00:05<00:01, 137.87it/s]

[INFO] Starting to run kNN for site 5cdac625e403deddaf4680e6 / floor F1
[INFO] Creating directory to store output data (if not exists)
[INFO] Reading train/test data
[INFO] Reading track files for site_id=5cdac625e403deddaf4680e6, floor_id=F1
[INFO] The number of train files: 1
[INFO] The number of test files: 1
The number of BSSIDs with at least 0 samples: 33


100%|██████████| 1/1 [00:00<00:00, 145.01it/s]


The number of fingerprints: 3
The number of BSSIDs with at least 0 samples: 47


100%|██████████| 1/1 [00:00<00:00, 68.63it/s]


The number of fingerprints: 8
[INFO] Creating and saving visualization
[INFO] Running kNN regressor with PCA
[INFO] Scaling data using MaxAbsScaler
[INFO] PCA: Choosing the number of components.
[INFO] PCA: n_components=2 -> explained variance ratio: 1.00
[INFO] Running kNN regressor and reporting errors
[WARN] Failed to run knn regressions, caused by Expected n_neighbors <= n_samples,  but n_samples = 3, n_neighbors = 4
[WARN] There must be at least 2 tracks, found 1


 84%|████████▍ | 826/981 [00:05<00:01, 108.93it/s]

[WARN] There must be at least 2 tracks, found 1
[WARN] There must be at least 2 tracks, found 1
[INFO] Starting to run kNN for site 5d27075f03f801723c2e360f / floor F6
[INFO] Creating directory to store output data (if not exists)
[INFO] Reading train/test data
[INFO] Reading track files for site_id=5d27075f03f801723c2e360f, floor_id=F6
[INFO] The number of train files: 0
[INFO] The number of test files: 3
The number of BSSIDs with at least 0 samples: 0


0it [00:00, ?it/s]


The number of fingerprints: 0
The number of BSSIDs with at least 0 samples: 126


100%|██████████| 3/3 [00:00<00:00, 14.43it/s]
 92%|█████████▏| 900/981 [00:05<00:00, 130.37it/s]

The number of fingerprints: 180
[INFO] Creating and saving visualization
[INFO] Running kNN regressor with PCA
[WARN] Failed to run knn regressions, caused by need at least one array to concatenate


100%|██████████| 981/981 [00:06<00:00, 156.93it/s]


In [36]:
result_df = pd.concat(result_frames)