In [None]:
import os
import json
import numpy as np
import pandas as pd
from pathlib import Path
import multiprocessing
from tqdm import tqdm
import cv2
import math
import glob

In [None]:
SEED = 1111

FLOOR_MAP = {"B2":-2, "B1":-1, "F1":0, "F2":1, "F3":2, "F4":3, "F5":4, "F6":5, "F7":6, "F8":7, "F9":8,
             "1F":0, "2F":1, "3F":2, "4F":3, "5F":4, "6F":5, "7F":6, "8F":7, "9F":8}

WAYPOINTS_DF = pd.read_csv('/kaggle/input/indoor-supplementals-for-postprocessing/waypoint.csv')

In [None]:
def metadata_dir():
    return Path('/kaggle/input/indoor-location-navigation/metadata')

def floor2strs(floor):
    return [key for key, val in FLOOR_MAP.items() if val == floor]

def get_map_info(site, floor):
    for floor_str in floor2strs(floor):
        json_path = metadata_dir() / site / floor_str / "floor_info.json"
        if json_path.exists():
            break
    with open(json_path, "r") as f:
        info = json.load(f)
    height = info['map_info']['height']
    width  = info['map_info']['width']
    return height, width

def find_nearest_waypoints(xy, waypoints):
    r = np.sum((waypoints - xy)**2, axis=1)
    j = np.argmin(r)
    return waypoints[j, :]

def coodinate_to_pixel(x, y, height, width, shape):
    p_x = int((x / width)  * shape[1])
    p_y = int((1 - y / height) * shape[0])
    p_x = max(0, min(shape[1] - 1, p_x))
    p_y = max(0, min(shape[0] - 1, p_y))
    return p_x, p_y

In [None]:
def extract_permitted_area_from_map(site, floor):
    for floor_str in floor2strs(floor):
        floor_image_path = metadata_dir() / site / floor_str / "floor_image.png"
        if floor_image_path.exists():
            break
    img = cv2.imread(str(floor_image_path), cv2.IMREAD_UNCHANGED)
    height, width, channel = img.shape
    _, thimg_soft = cv2.threshold(img[:,:,3], 1, 1, cv2.THRESH_BINARY)
    _, thimg_hard = cv2.threshold(img[:,:,3], 254, 1, cv2.THRESH_BINARY_INV)
    thimg_soft[0, :] = 0
    thimg_soft[height - 1, :] = 0
    thimg_soft[:, 0] = 0
    thimg_soft[:, width - 1] = 0
    mask_img = np.zeros_like(thimg_soft).astype(np.uint8)
    contours, hierarchy = cv2.findContours(thimg_soft, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    contours = [contour for contour in contours if cv2.contourArea(contour) > 1000]
    cv2.fillPoly(mask_img, contours, 1)
    permitted_img = np.minimum(mask_img, thimg_hard)
    return img, cv2.blur(permitted_img, (5, 5))

In [None]:
def generate_grid_point(args):
    site, floor = args

    floor_image, permitted_mask = extract_permitted_area_from_map(site, floor)

    waypoints = WAYPOINTS_DF[(WAYPOINTS_DF['site'] == site) & (WAYPOINTS_DF['floor'] == floor)][['x', 'y']].values
    
    height, width = get_map_info(site, floor)
    extra_grid_points = np.zeros((0, 2))
    
    rgen = np.random.default_rng(SEED)
    
    for i in range(10000):
        x = rgen.uniform(low=0.0, high=width)
        y = rgen.uniform(low=0.0, high=height)
        p_x, p_y = coodinate_to_pixel(x, y, height, width, permitted_mask.shape)
        if permitted_mask[p_y, p_x] == 1:
            xy = np.array([x, y])
            xy_near_1 = find_nearest_waypoints(xy, waypoints)
            r1 = np.sqrt(np.sum((xy - xy_near_1)**2))
            if extra_grid_points.shape[0] > 0:
                xy_near_2 = find_nearest_waypoints(xy, extra_grid_points)
                r2 = np.sqrt(np.sum((xy - xy_near_2)**2))
            else:
                r2 = float('inf')
            if (r1 > 5.0) and (r2 > 2.5):
                extra_grid_points = np.concatenate([extra_grid_points, np.expand_dims(xy, axis=0)])
                
    if extra_grid_points.shape[0] == 0:
        return None
    else:
        out_df = pd.DataFrame({
            'x' : extra_grid_points[:, 0],
            'y' : extra_grid_points[:, 1],
        })
        out_df['site']  = site
        out_df['floor'] = floor
        return out_df

In [None]:
sub = pd.read_csv('../input/wifi-features-with-lightgbm-kfold/submission_baseline.csv')
tmp = sub['site_path_timestamp'].apply(lambda x: pd.Series(x.split('_')))
sub['site'] = tmp[0]
site_floor = sub[['site', 'floor']].drop_duplicates().values
processes = multiprocessing.cpu_count()
with multiprocessing.Pool(processes=processes) as pool:
    dfs = pool.imap_unordered(generate_grid_point, site_floor)
    dfs = tqdm(dfs)
    dfs = [df for df in dfs if df is not None]
df = pd.concat(dfs).sort_values(['site', 'floor'])
df.to_csv('extra_grid_points.csv', index=False)