In [None]:
import numpy as np
import os
import plotly.graph_objects as go

# Distance measures
from scipy.spatial.distance import pdist

In [None]:
distance_metrics = {
    'euclidean': 'Euclidean', 
    'minkowski': 'Minkowski, or p-norm (here p=3)',
    'cityblock': 'Manhattan',
    'correlation': 'Correlation', 
    'cosine': 'Cosine',
    'chebyshev': 'Chebyshev, or 1-norm',
    'braycurtis': 'Bray-Curtis',
    'mahalanobis': 'Mahalanobis',
    'jensenshannon': 'Jensen-Shannon', 
}

In [None]:
%%time
# Lets take points on a hypercube

dimensions = np.arange(50,500,50)
n_points = max(dimensions) * 2
n_dimensions = len(dimensions)

# Generate input data
X = {}
for dim in dimensions:
    X[dim] = np.random.rand(n_points,dim)
    
# Compute all kinds of distances
distances_dict = {}
for dim in dimensions:
    print(dim)
    dict_ = {}
    for metric in distance_metrics.keys():
        print(metric)
        if metric == 'minkowski':
            dict_[metric] = pdist(X[dim], metric, p=3)
        else:
            dict_[metric] = pdist(X[dim], metric)
    distances_dict[dim] = dict_

In [None]:
ratio_dict = {}
for metric in distance_metrics.keys():
    min_ = np.zeros(n_dimensions)
    max_ = np.zeros(n_dimensions)
    ratio = np.zeros(n_dimensions)
    for d,dim in enumerate(dimensions):
        y = distances_dict[dim][metric]
        ratio[d] = (np.max(y)-np.min(y))/np.min(y)
    ratio_dict[metric] = ratio

In [None]:
fig = go.Figure()

for metric in distance_metrics.keys():
    fig.add_trace(
        go.Scatter(
            x = dimensions,
            y = ratio_dict[metric],
            mode = 'markers+lines',
            name=distance_metrics[metric],
        )
    )

fig.update_layout(
    width=700,
    height=400,
    xaxis_title="Dimensionality of the space, k",
    yaxis_title='$(dist_{\max}^k-dist_{\min}^k) \ / \ dist_{\min}^k$',
)

fig.show()
fig.write_image(os.path.join('write_files', 'figures', 'Distances_high_dim.jpg'))