In [None]:
import numpy as np
import matplotlib.pyplot as plt

import sklearn.manifold as skmanifold

In [None]:
# Load the datasets and print headers
catalog512 = np.load('../data/physCatalog512.npy')
catalog1024 = np.load('../data/physCatalog1024.npy')
catalog2048 = np.load('../data/physCatalog2048.npy')

print(catalog512.dtype)

In [None]:
# Convert to arrays with mass, volume, radial distance, and magnitude of velocity
structureFormat = ([('ID',         np.int64),
                    ('volume',     np.float64),
                    ('mass',       np.float64),
                    ('rPosition',  np.float64),
                    ('vMag',       np.float64),
                    ('resolution', np.int64),
                    ('time',       np.int64)])



processed512 = np.array(np.hstack((catalog512['ID'], 
                         catalog512['volume'],
                         catalog512['mass'],
                         np.sqrt(catalog512['positionX']**2 + catalog512['positionY']**2 + catalog512['positionZ']**2),
                         np.sqrt(catalog512['velocityX']**2 + catalog512['velocityY']**2 + catalog512['velocityZ']**2),
                         catalog512['resolution'],
                         catalog512['time'],)), 
                        dtype=structureFormat)
processed1024 = np.array(np.hstack((catalog1024['ID'], 
                         catalog1024['volume'],
                         catalog1024['mass'],
                         np.sqrt(catalog1024['positionX']**2 + catalog1024['positionY']**2 + catalog1024['positionZ']**2),
                         np.sqrt(catalog1024['velocityX']**2 + catalog1024['velocityY']**2 + catalog1024['velocityZ']**2),
                         catalog1024['resolution'],
                         catalog1024['time'],)), 
                        dtype=structureFormat)
processed2048 = np.array(np.hstack((catalog2048['ID'], 
                         catalog2048['volume'],
                         catalog2048['mass'],
                         np.sqrt(catalog2048['positionX']**2 + catalog2048['positionY']**2 + catalog2048['positionZ']**2),
                         np.sqrt(catalog2048['velocityX']**2 + catalog2048['velocityY']**2 + catalog2048['velocityZ']**2),
                         catalog2048['resolution'],
                         catalog2048['time'],)), 
                        dtype=structureFormat)

In [None]:
# def testAndPlot(catalog):

#     from collections import OrderedDict
#     from functools import partial
#     from time import time

#     import matplotlib.pyplot as plt
#     from mpl_toolkits.mplot3d import Axes3D
#     from matplotlib.ticker import NullFormatter

#     # Next line to silence pyflakes. This import is needed.
#     Axes3D

#     n_points = 1000
#     # X, color = datasets.make_s_curve(n_points, random_state=0)
#     n_neighbors = 10
#     n_components = 2

#     # Create figure
#     fig = plt.figure(figsize=(15, 8))
#     fig.suptitle("Manifold Learning with %i points, %i neighbors"
#                  % (1000, n_neighbors), fontsize=14)

#     # Add 3d scatter plot
#     ax = fig.add_subplot(251, projection='3d')
#     ax.scatter(catalog['mass'], catalog['volume'], catalog['rPosition'], catalog['vMag'], cmap=plt.cm.Spectral)
#     ax.view_init(4, -72)

#     # Set-up manifold methods
#     LLE = partial(manifold.LocallyLinearEmbedding,
#                   n_neighbors, n_components, eigen_solver='auto')

#     methods = OrderedDict()
#     methods['LLE'] = LLE(method='standard')
#     methods['LTSA'] = LLE(method='ltsa')
#     methods['Hessian LLE'] = LLE(method='hessian')
#     methods['Modified LLE'] = LLE(method='modified')
#     methods['Isomap'] = manifold.Isomap(n_neighbors, n_components)
#     methods['MDS'] = manifold.MDS(n_components, max_iter=100, n_init=1)
#     methods['SE'] = manifold.SpectralEmbedding(n_components=n_components,
#                                                n_neighbors=n_neighbors)
#     methods['t-SNE'] = manifold.TSNE(n_components=n_components, init='pca',
#                                      random_state=0)

#     # Plot results
#     for i, (label, method) in enumerate(methods.items()):
#         t0 = time()
#         Y = method.fit_transform(np.rot90(np.vstack((catalog['mass'], catalog['volume'], catalog['rPosition'], catalog['vMag']))))
#         t1 = time()
#         print("%s: %.2g sec" % (label, t1 - t0))
#         ax = fig.add_subplot(2, 5, 2 + i + (i > 3))
#         ax.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral)
#         ax.set_title("%s (%.2g sec)" % (label, t1 - t0))
#         ax.xaxis.set_major_formatter(NullFormatter())
#         ax.yaxis.set_major_formatter(NullFormatter())
#         ax.axis('tight')

#     plt.show()

In [None]:
# testAndPlot(processed512) 

In [None]:
# Run t-SNE
# perplexity should be between 5 and 50, analogous to number of neareset neighbors
perp = 5

catalog = np.rot90(np.vstack((processed512['mass'], processed512['volume'], processed512['rPosition'], processed512['vMag'])))

result = skmanifold.TSNE(n_components=4, perplexity=perp, n_jobs=-1, method='exact').fit_transform(catalog)

plt.scatter(catalog[:, 0], catalog[:, 1], cmap=plt.cm.Spectral)
plt.show()