# Problematic splitting

In [4]:
import os
import sys
import shutil
import zipfile
import numpy as np
import pandas as pd
import pickle
# import pdal
import json
import laspy
import subprocess
from copy import deepcopy
from datetime import datetime
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor, as_completed
from functools import partial
from omegaconf import OmegaConf
from scipy.spatial import cKDTree
from sklearn.decomposition import PCA

### utils

In [36]:
def transform_with_pca(pointcloud, verbose=False):
    # fit PCA
    pca = PCA(n_components=2)

    # compute pointcloud in new axes
    transformed = pca.fit_transform(pointcloud)

    # principal axes
    components = pca.components_  
    if verbose:
        print("PCA components (axes):\n", components)
        print("PCA-transformed points:\n", transformed)
    
    return transformed

def split_instances(pointcloud, maskA, maskB):
    intersection_mask = maskA & maskB
    pc_x = np.reshape(np.array(getattr(pointcloud, 'x')), (-1,1))
    pc_y = np.reshape(np.array(getattr(pointcloud, 'y')), (-1,1))

    pc_A = np.concatenate((pc_x[maskA], pc_y[maskA]), axis=1)
    pc_B = np.concatenate((pc_x[maskB], pc_y[maskB]), axis=1)

    intersection = np.concatenate((pc_x[intersection_mask], pc_y[intersection_mask]), axis=1)        
    intersection_transformed = transform_with_pca(intersection)

    # cut
    mask_pos = intersection_transformed[:,1] > 0
    mask_neg = mask_pos == False
    mask_pos_full = np.zeros((len(intersection_mask)))
    mask_neg_full = np.zeros((len(intersection_mask)))
    small_pos = 0
    small_neg = 0
    for i in range(len(intersection_mask)):
        if intersection_mask[i]:
            mask_pos_full[i] = mask_pos[small_pos]
            small_pos += 1
    for i in range(len(intersection_mask)):
        if intersection_mask[i]:
            mask_neg_full[i] = mask_neg[small_neg]
            small_neg += 1

    # find centroids of the two clusters:
    centroid_A = np.mean(pc_A, axis=0)
    centroid_B = np.mean(pc_B, axis=0)

    centroid_pos = np.mean(intersection[mask_pos], axis=0)

    dist_pos_A = ((centroid_A[0] - centroid_pos[0])**2 + (centroid_A[1] - centroid_pos[1])**2)**0.5
    dist_pos_B = ((centroid_B[0] - centroid_pos[0])**2 + (centroid_B[1] - centroid_pos[1])**2)**0.5

    # remove intersection from masks
    anti_intersection_mask = intersection_mask == False
    maskA = maskA.astype(bool) & anti_intersection_mask.astype(bool)
    maskB = maskB.astype(bool) & anti_intersection_mask.astype(bool)

    # add part of intersection to each mask
    if dist_pos_A < dist_pos_B:
        maskA = (maskA.astype(bool) | mask_pos_full.astype(bool))
        maskB = (maskB.astype(bool) | mask_neg_full.astype(bool))
    else:
        maskA = (maskA.astype(bool) | mask_neg_full.astype(bool))
        maskB = (maskB.astype(bool) | mask_pos_full.astype(bool))
    
    return maskA, maskB

new_mask, new_other_mask = split_instances(tile, mask, other_mask)
print("New mask: ", np.sum(new_mask))
print("New other mask : ", np.sum(new_other_mask))

New mask:  3528
New other mask :  2051


### Processing problematic samples

In [16]:
# loading
src_mask = r"D:\PDM_repo\Github\PDM\results\samples_split_fail\mask3.pickle"
src_other_mask = r"D:\PDM_repo\Github\PDM\results\samples_split_fail\other_mask3.pickle"
src_tile = r"D:\PDM_repo\Github\PDM\results\samples_split_fail\tile_3.laz"

with open(src_mask, 'rb') as file:
    mask = pickle.load(file)
with open(src_other_mask, 'rb') as file:
    other_mask = pickle.load(file)

tile = laspy.read(src_tile)

intersection_mask = mask & other_mask

# infos:
print("Lenght of mask: ", np.sum(mask))
print("Lenght of other mask: ", np.sum(other_mask))
print("Lenght of intersection: ", np.sum(intersection_mask))
print("Lenght of tile: ", len(tile))


Lenght of mask:  3534
Lenght of other mask:  2056
Lenght of intersection:  11
Lenght of tile:  35160


In [20]:
# split
new_mask, new_other_mask = split_instances(tile, mask, other_mask)

In [21]:
print("New mask: ", np.sum(new_mask))
print("New other mask : ", np.sum(new_other_mask))

New mask:  35160
New other mask :  2056
