In [1]:
%matplotlib widget

In [2]:
import numpy as np
import torch

In [3]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# Load CLIP embeddings

In [4]:
scene_data = torch.load('./logs/probe/emb_blender_paper_lego_clip_vit.pth')
embedding = scene_data['embedding']
embedding = torch.from_numpy(embedding).float()

In [5]:
plt.scatter(*embedding[:, :2].T)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.collections.PathCollection at 0x7f1f8c671850>

In [6]:
def pairwise_cosine_similarity(features):
    assert features.ndim == 2  # [B, D]
    norm = torch.norm(features, dim=1)
    features_normalized = features / norm.unsqueeze(1)
    similarity = features_normalized.mm(features_normalized.transpose(0, 1))
    return similarity

In [7]:
similarity = pairwise_cosine_similarity(embedding)
similarity

tensor([[1.0000, 0.9112, 0.8482,  ..., 0.8342, 0.8256, 0.8758],
        [0.9112, 1.0000, 0.9407,  ..., 0.9150, 0.9343, 0.9582],
        [0.8482, 0.9407, 1.0000,  ..., 0.9444, 0.9623, 0.9370],
        ...,
        [0.8342, 0.9150, 0.9444,  ..., 1.0000, 0.9338, 0.9125],
        [0.8256, 0.9343, 0.9623,  ..., 0.9338, 1.0000, 0.9413],
        [0.8758, 0.9582, 0.9370,  ..., 0.9125, 0.9413, 1.0000]])

In [8]:
plt.imshow(similarity.numpy())
plt.colorbar()

<matplotlib.colorbar.Colorbar at 0x7f1f8c5d1340>

# Pose similiarity (ground truth)

In [9]:
scene_data['poses'][0]

array([[-9.9990219e-01,  4.1922452e-03, -1.3345719e-02, -5.3798322e-02],
       [-1.3988681e-02, -2.9965907e-01,  9.5394367e-01,  3.8454704e+00],
       [-4.6566129e-10,  9.5403719e-01,  2.9968831e-01,  1.2080823e+00],
       [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  1.0000000e+00]],
      dtype=float32)

In [10]:
poses = torch.from_numpy(scene_data['poses']).float()

In [11]:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(poses[:, 0, -1], poses[:, 1, -1], poses[:, 2, -1])
fig.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [12]:
xyz = poses[:, :-1, 3]
diffs = xyz.unsqueeze(0) - xyz.unsqueeze(1)
pose_distances = torch.sqrt((diffs ** 2).sum(dim=-1))

In [13]:
thetas = torch.atan2(xyz[:, 1], xyz[:, 0])

In [14]:
pose_similiarty = pairwise_cosine_similarity(xyz)

In [15]:
# pose_similarity = pairwise_cosine_similarity(poses[:, :, 3].flatten(1))
plt.figure()
plt.imshow(pose_distances)
plt.colorbar()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.colorbar.Colorbar at 0x7f1f8c102d60>

In [21]:
colors = torch.cat([thetas for _ in range(len(thetas))])

plt.figure()
plt.scatter(pose_distances.flatten(), similarity.flatten(), c=colors)
plt.ylabel('Cosine similiarity of CLIP ViT embeddings')
plt.xlabel('Euclidean distance between cameras')
plt.colorbar()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.colorbar.Colorbar at 0x7f1f6c164e50>

In [20]:
colors = torch.cat([thetas for _ in range(len(thetas))])

plt.figure()
plt.scatter(torch.acos(pose_similiarty).flatten(), similarity.flatten(), c=colors)
plt.ylabel('Cosine similiarity of CLIP ViT embeddings')
plt.xlabel('Angle between cameras')
plt.colorbar()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.colorbar.Colorbar at 0x7f1f6c779e20>

In [28]:
colors = torch.cat([thetas for _ in range(len(thetas))])

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
idx = np.random.choice(len(thetas) ** 2, size=1000, replace=False)
ax.scatter(torch.acos(pose_similiarty).flatten()[idx], pose_distances.flatten()[idx], similarity.flatten()[idx], c=colors[idx])
ax.set_zlabel('Cosine similiarity of CLIP ViT embeddings')
ax.set_xlabel('Angle between cameras')
ax.set_ylabel('Euclidean distance between cameras')
# plt.colorbar()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 0, 'Euclidean distance between cameras')

In [60]:
plt.figure()
_ = plt.hist(pose_distances.flatten().numpy(), bins=100)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [61]:
plt.figure()
_ = plt.hist(similarity.numpy().flatten(), bins=100)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [64]:
plt.figure()
_ = plt.hist(-similarity.numpy().flatten(), bins=100, cumulative=True, density=True)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# Dimensionality reduction

In [55]:
def dim_reduction(X, color='blue', n_neighbors=10, n_components=2, mds_max_iter=100):
    from collections import OrderedDict
    from functools import partial
    from time import time

    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D
    from matplotlib.ticker import NullFormatter

    from sklearn import manifold, datasets

    # Next line to silence pyflakes. This import is needed.
    Axes3D

    n_points = len(X)

    # Create figure
    fig = plt.figure(figsize=(15, 8))
    fig.suptitle("Manifold Learning with %i points, %i neighbors"
                 % (1000, n_neighbors), fontsize=14)

    # Add 3d scatter plot
    ax = fig.add_subplot(251, projection='3d')
    ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=plt.cm.Spectral)
    ax.view_init(4, -72)

    # Set-up manifold methods
    LLE = partial(manifold.LocallyLinearEmbedding,
                  n_neighbors, n_components, eigen_solver='auto')

    methods = OrderedDict()
    methods['LLE'] = LLE(method='standard')
    methods['LTSA'] = LLE(method='ltsa')
    methods['Hessian LLE'] = LLE(method='hessian')
    methods['Modified LLE'] = LLE(method='modified')
    methods['Isomap'] = manifold.Isomap(n_neighbors, n_components)
    methods['MDS'] = manifold.MDS(n_components, max_iter=mds_max_iter, n_init=1)
    methods['SE'] = manifold.SpectralEmbedding(n_components=n_components,
                                               n_neighbors=n_neighbors)
    methods['t-SNE'] = manifold.TSNE(n_components=n_components, init='pca',
                                     random_state=0)

    # Plot results
    for i, (label, method) in enumerate(methods.items()):
        t0 = time()
        Y = method.fit_transform(X)
        t1 = time()
        print("%s: %.2g sec" % (label, t1 - t0))
        ax = fig.add_subplot(2, 5, 2 + i + (i > 3))
        ax.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral)
        ax.set_title("%s (%.2g sec)" % (label, t1 - t0))
        ax.xaxis.set_major_formatter(NullFormatter())
        ax.yaxis.set_major_formatter(NullFormatter())
        ax.axis('tight')

    plt.show()

In [53]:
dim_reduction(xyz, thetas, n_neighbors=10, n_components=2)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …



LLE: 0.03 sec
LTSA: 0.039 sec
Hessian LLE: 0.048 sec
Modified LLE: 0.035 sec
Isomap: 0.011 sec
MDS: 0.035 sec
SE: 0.016 sec
t-SNE: 0.35 sec


In [54]:
dim_reduction(poses.flatten(1), thetas, n_neighbors=10, n_components=2)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …



LLE: 0.038 sec
LTSA: 0.034 sec
Hessian LLE: 0.053 sec
Modified LLE: 0.042 sec
Isomap: 0.011 sec
MDS: 0.035 sec
SE: 0.01 sec
t-SNE: 0.44 sec


In [56]:
dim_reduction(embedding, thetas, n_neighbors=10, n_components=2, mds_max_iter=1000)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …



LLE: 0.033 sec
LTSA: 0.045 sec
Hessian LLE: 0.057 sec
Modified LLE: 0.053 sec
Isomap: 0.012 sec
MDS: 0.13 sec
SE: 0.017 sec
t-SNE: 0.41 sec


In [36]:
embedding.shape

torch.Size([138, 512])