In [None]:
import numpy as np
import os 
import sys 
import matplotlib
from sklearn.manifold import MDS as classic_MDS
import time 
import scipy
import matplotlib.pyplot as mplt 
import pylab as PLT
import plotly
import plotly.tools as tls
import plotly.plotly as py
import plotly.figure_factory as ff
import plotly.graph_objs as go
from itertools import product
from sklearn.neighbors import KNeighborsClassifier
from pprint import pprint
from sklearn.metrics.pairwise import euclidean_distances
plotly.offline.init_notebook_mode()
data_dir = './'
sys.path.append('./')
import bs_dev
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import sklearn.datasets

In [None]:
# Create the distance graph for all the input 
def get_nearest_neighbors_edges(x, k=6, distance_metric = 'euclidean'):
    n_dim, n_samples = x.shape
    edges = [] 
    pairwise_distances = scipy.spatial.distance.squareform(
                         scipy.spatial.distance.pdist(x.T, metric=distance_metric))
    for s_id in np.arange(n_samples):
        neighbors = np.argsort(pairwise_distances[s_id, :])
        closest_neighbors = neighbors[:k+1]
        for neigh_id in closest_neighbors:
            edges.append([s_id, neigh_id, pairwise_distances[s_id, neigh_id]])
    return edges


def floyd_warshall(x, k=6, distance_metric='euclidean'):
    edges = get_nearest_neighbors_edges(x, k=6)
    n_dim, n_samples = x.shape
    D = np.full((n_samples, n_samples), np.finfo(np.float16).max, dtype=np.float32)
    for (st_ind, end_id, distance) in edges:
        D[st_ind][end_id] = distance
    for k in range(n_samples):
        D = np.minimum(D, D[:,int(k),np.newaxis] + D[np.newaxis,int(k),:]) 
    for i in range(n_samples):
        for j in range(n_samples):
            D[i][j] = min(D[i][j], D[j][i])
    return D

def pairwise_euclidean(X):
    return euclidean_distances(X, X)

In [None]:
mds_res = {}  
swiss_xyz, swiss_t = sklearn.datasets.make_swiss_roll(n_samples=2000, 
                                                      noise=.0, 
                                                      random_state=None)
D_target = floyd_warshall(swiss_xyz.T, k=6, 
                          distance_metric='euclidean')

In [None]:
def matplotlib_to_plotly(cmap, pl_entries):
    h = 1.0/(pl_entries-1)
    pl_colorscale = []
    
    for k in range(pl_entries):
        C = map(np.uint8, np.array(cmap(k*h)[:3])*255)
        print(list(C))
        pl_colorscale.append([k*h, 'rgb'+str((C[0], C[1], C[2]))])
        
    return pl_colorscale

def plot_initial_swissroll(X, X_t, title='Swissroll 3D'):
    p1 = go.Scatter3d(x=X[:, 0], y=X[:, 1], z=X[:, 2],
                          mode='markers', 
                          marker=dict(color=X_t, 
                                      colorscale='Viridis',
                                      size=4,
                                      showscale=False,
                                      line=dict(color='black', width=1)))
    layout = go.Layout(title = title)
    fig = dict(data=[p1], layout=layout)
    plotly.offline.iplot(fig, filename='manif_'+title, image='svg')
    
def plot_embedded_swissroll(X, X_t, title='Swissroll 2D'):
    p1 = go.Scatter(x=X[:, 0], y=X[:, 1],
                          mode='markers', 
                          marker=dict(color=X_t, 
                                      colorscale='Viridis',
                                      showscale=False,
                                      line=dict(color='black', width=1)))
    layout = go.Layout(title = title)
    fig = dict(data=[p1], layout=layout)
    plotly.offline.iplot(fig, filename='manif_'+title, image='svg')

In [None]:
plot_initial_swissroll(swiss_xyz, swiss_t, title='Swissroll 3D')

In [None]:
# plot_embedded_swissroll(Z, swiss_t, title='Pattern Search MDS Swissroll 2D')
# plot_embedded_swissroll(X_transformed, swiss_t, title='Classic MDS Swissroll 2D')

In [None]:
before = time.time()
pat_search_MDS_creator = bs_dev.MDS(n_components=2,
                                    starting_radius=5.,
                                    max_iter=100,
                                    mode='full_search',
                                    prob_thresh=0.2,
                                    initial_prob=.6,
                                    a_bs=0.05,
                                    verbose=0,
                                    dissimilarity='precomputed')
(x_low_rank,
 time_logger) = pat_search_MDS_creator.fit_transform(D_target)
now = time.time()
mds_res['FS CSMDS'] = {} 
mds_res['FS CSMDS']['time'] = now - before
mds_res['FS CSMDS']['embedding'] = x_low_rank
print(now - before)
plot_embedded_swissroll(x_low_rank, swiss_t, 
                        title='FS CSMDS ({} seconds)'.format(round(now-before, 2)))

In [None]:
before = time.time()
pat_search_MDS_creator = bs_dev.MDS(n_components=2,
                                    starting_radius=5.,
                                    max_iter=100,
                                    mode='randomized',
                                    prob_thresh=0.2,
                                    initial_prob=.7,
                                    a_bs=0.05,
                                    verbose=0,
                                    dissimilarity='precomputed')
(x_low_rank,
 time_logger) = pat_search_MDS_creator.fit_transform(D_target)
now = time.time()
mds_res['RN CSMDS'] = {} 
mds_res['RN CSMDS']['time'] = now - before
mds_res['RN CSMDS']['embedding'] = x_low_rank
print(now - before)
plot_embedded_swissroll(x_low_rank, swiss_t, 
                        title='RN CSMDS ({} seconds)'.format(round(now-before, 2)))

In [None]:
before = time.time()
pat_search_MDS_creator = bs_dev.MDS(n_components=2,
                                    starting_radius=5.,
                                    max_iter=100,
                                    mode='bootstrapped',
                                    prob_thresh=0.,
                                    initial_prob=.5,
                                    a_bs=0.05,
                                    verbose=0,
                                    dissimilarity='precomputed')
(x_low_rank,
 time_logger) = pat_search_MDS_creator.fit_transform(D_target)
now = time.time()
mds_res['BS CSMDS'] = {} 
mds_res['BS CSMDS']['time'] = now - before
mds_res['BS CSMDS']['embedding'] = x_low_rank
print(now - before)
plot_embedded_swissroll(x_low_rank, swiss_t, 
                        title='BS CSMDS ({} seconds)'.format(round(now-before, 2)))

In [None]:
before = time.time()
embedding = classic_MDS(n_components=2, n_init=1, 
                        n_jobs=1, dissimilarity='precomputed')
x_low_rank = embedding.fit_transform(D_target)
now = time.time()
mds_res['SMACOF MDS'] = {} 
mds_res['SMACOF MDS']['time'] = now - before
mds_res['SMACOF MDS']['embedding'] = x_low_rank
print(now - before)
plot_embedded_swissroll(x_low_rank, swiss_t, 
                        title='SMACOF MDS ({} seconds)'.format(round(now-before, 2)))

In [None]:
# Plot everything 
for m, v in mds_res.items():
    plot_embedded_swissroll(v['embedding'], swiss_t, 
                            title='{} ({} seconds)'.format(m, round(v['time'], 2)))