In [None]:
%load_ext autoreload

import numpy as np
from sklearn.mixture import GaussianMixture
import open3d as o3d
from chamferdist import ChamferDistance
import torch
import matplotlib.pyplot as plt
import math
import os
import pickle
from tqdm.notebook import tqdm

from src.chamfer import mahalanobis_distance_gmm

In [None]:
## Generate synthetic data
N, D = 1000, 3  # number of points and dimenstinality

if D == 2:
    # set gaussian ceters and covariances in 2D
    means = np.array([[0.5, 0.0], [0, 0], [-0.5, -0.5], [-0.8, 0.3]])
    covs = np.array(
        [
            np.diag([0.01, 0.01]),
            np.diag([0.025, 0.01]),
            np.diag([0.01, 0.025]),
            np.diag([0.01, 0.01]),
        ]
    )
elif D == 3:
    # set gaussian ceters and covariances in 3D
    means = np.array(
        [[0.5, 0.0, 0.0], [0.0, 0.0, 0.0], [-0.5, -0.5, -0.5], [-0.8, 0.3, 0.4]]
    )
    covs = np.array(
        [
            np.diag([0.01, 0.01, 0.03]),
            np.diag([0.08, 0.01, 0.01]),
            np.diag([0.01, 0.05, 0.01]),
            np.diag([0.03, 0.07, 0.01]),
        ]
    )
n_gaussians = means.shape[0]

points = []
for i in range(len(means)):
    x = np.random.multivariate_normal(means[i], covs[i], N)
    points.append(x)
points = np.concatenate(points)

In [None]:
# load point clouds

# target cloud
cloud_path = "output/elbow/test/24102.pcd"
tgt_points = np.array(o3d.io.read_point_cloud(cloud_path).points)

# source clouds
files = os.listdir("output/elbow/test/")
# limit = 500
limit = len(files)

clouds = [x for x in files if x.endswith(".pcd")]
src_points = []
for i in range(limit):
    src_points.append(
        np.array(o3d.io.read_point_cloud("output/elbow/test/" + clouds[i]).points)
    )

src_points = np.array(src_points)
print("Number of source clouds: ", len(src_points))

In [None]:
%matplotlib inline

#fit the gaussian model for a single cloud
#gmm = GaussianMixture(n_components=n_gaussians, covariance_type='diag')
gmm = GaussianMixture(n_components=128, covariance_type='full')
gmm.fit(tgt_points)
print(gmm.covariances_.shape)


In [None]:
# fit gaussian models for entire testset
def fit_gaussians(points, n_gaussians):
    means = []
    covs = []
    weights = []
    for i in tqdm(range(len(points))):
        gmm = GaussianMixture(n_components=n_gaussians, covariance_type="full")
        gmm.fit(points[i])
        means.append(gmm.means_)
        covs.append(gmm.covariances_)
        weights.append(gmm.weights_)

    return np.array(means), np.array(covs), np.array(weights)

In [None]:
ids = [int(x.split(".")[0]) for x in clouds]

means, covs, weights = fit_gaussians(src_points, 128)
print(means.shape, covs.shape)
with open("gaussians/gaussians_128.pkl", "wb") as f:
    pickle.dump([means, covs, ids, weights], f)

In [None]:
# measure mahalanobis distances from src clouds to the model
limit = 100
# limit = len(src_points)
mahal_dists_pointwise = []
for src in src_points[:limit]:
    mahal_dists_pointwise.append(np.abs(gmm.score_samples(src)))

mahal_dists_pointwise = np.array(mahal_dists_pointwise)
mahal_dists_sk = np.sum(mahal_dists_pointwise, axis=1)
print(mahal_dists_sk.shape)

In [None]:
# compute chamfer distance for comparison
chamferDist = ChamferDistance()

src_tensor = torch.from_numpy(np.float32(src_points)).cuda()
tgt_tensor = torch.from_numpy(np.float32(tgt_points)).cuda()
tgt_tensor = tgt_tensor.repeat(len(src_points), 1, 1)
# print(src_tensor.shape, tgt_tensor.shape)

chamfer_dists = chamferDist(
    src_tensor[:limit], tgt_tensor[:limit], bidirectional=False, reduction=None
)
chamfer_dists = chamfer_dists.detach().cpu().numpy()
print(chamfer_dists.shape)

# # return point-wise distances for single source cloud
# knn = chamferDist(
#     src_tensor, tgt_tensor, return_nn=True, bidirectional=False, reduction=None
# )
# chamfer_dist = knn[0].dists[0].flatten().detach().numpy()

In [None]:
# plot two distributions on same axis
def plot_dists(mahal_dist_sk, maahal_dist_cust, chamfer_dist, title):
    x = np.arange(0, len(mahal_dist_sk))

    # scale chamfer distance to be comparable with mahalanobis
    mahal_dist_max = np.max(mahal_dist_sk)
    chamfer_dist_max = np.max(chamfer_dist)
    chamfer_dist = chamfer_dist / chamfer_dist_max * mahal_dist_max

    plt.figure(figsize=(30, 6))
    plt.plot(x, mahal_dist_sk, label="mahalanobis_sklearn")
    plt.plot(x, maahal_dist_cust, label="mahalanobis_custom")
    plt.plot(x, chamfer_dist, label="chamfer")

    plt.xlabel("point cloud index")
    plt.ylabel("distance")
    plt.title(title)
    plt.legend()
    plt.show()

In [None]:
%autoreload 2

# comparison of sklearn mahal distance and custom implementation
mean = gmm.means_
cov = gmm.covariances_
mean, cov = torch.tensor(mean).cuda(), torch.tensor(cov).cuda()
mean, cov = mean.repeat(len(src_points), 1, 1), cov.repeat(len(src_points), 1, 1, 1)
print(mean.shape, cov.shape, src_tensor.shape)

mahal_dists_custom = mahalanobis_distance_gmm(src_tensor[:limit], mean[:limit], cov[:limit])
mahal_dists_custom = mahal_dists_custom.detach().cpu().numpy()
mahal_dists_custom = mahal_dists_custom/2
print(mahal_dists_custom.shape)

In [None]:
plot_dists(
    mahal_dists_sk, mahal_dists_custom, chamfer_dists, "Mahalanobis vs Chamfer Distance"
)

In [None]:
# # visualize
# if D == 2:
#     visualization.visualize_2D_gmm(
#         points, gmm.weights_, gmm.means_.T, np.sqrt(gmm.covariances_).T
#     )
# elif D == 3:
#     visualization.visualize_3d_gmm(
#         points, gmm.weights_, gmm.means_.T, np.sqrt(gmm.covariances_).T
#     )