In [1]:
%matplotlib notebook
%reload_ext autoreload
%autoreload 2

"""
This notebook makes different manifold learning plots (i.e. t-SNE, Isomap etc.)
to illustrate that those methods are not very useful in visualization of shapes 
in terms of topology, although they are supposed to learn "manifold", LMAO.
"""

import os
import sys
import matplotlib.pyplot as plt
import numpy as np

sys.path.append('../src')
from paretoviz.utils import fmt
from paretoviz.utils import vectorops as vops
from paretoviz import decorator as dcor
from paretoviz import plotting as pplt
from paretoviz.decorator import Arrow3D

rndstate = np.random.RandomState(seed = 123)

# raw data path
c1raw = "../data/carcrash/carcrash-c1/carcrash-c1-3d.out"
c2raw = "../data/carcrash/carcrash-c2/carcrash-c2-3d.out"

c1path, c1rfname = os.path.split(c1raw)
c2path, c2rfname = os.path.split(c2raw)

c1prefix = c1rfname.split('.')[0]
c2prefix = c2rfname.split('.')[0]

# load the normalized data
c1npts = fmt.load(os.path.join(c1path, c1prefix + "-norm.out"))
c2npts = fmt.load(os.path.join(c2path, c2prefix + "-norm.out"))

# calculate tradeoffs
c1mu = [v[0] if len(v) == 1 else v for v in fmt.load(os.path.join(c1path, c1prefix + "-norm-mu.out"))]
c2mu = [v[0] if len(v) == 1 else v for v in fmt.load(os.path.join(c2path, c2prefix + "-norm-mu.out"))]

# resize by tradeoffs
c1s = dcor.rescale_by_tradeoff(c1mu)
c2s = dcor.rescale_by_tradeoff(c2mu)

# recolor by centroid
c1c = dcor.recolor_by_centroid(c1npts, factor = 4.0)
c2c = dcor.recolor_by_centroid(c2npts, factor = 1.5)

# recolor by tradeoff
(c1c, c1kidx) = dcor.recolor_by_tradeoff(c1s, c1c)
(c2c, c2kidx) = dcor.recolor_by_tradeoff(c2s, c2c)

# for scatter plot, use the original objective value
c1rpts = fmt.load(c1raw)
c2rpts = fmt.load(c2raw)

# adjust the point size for better vizualization
rpts = c2rpts + c1rpts
U = np.array(rpts)
c1s = [v * 0.33 for v in c1s]
c2s = [v * 0.33 for v in c2s]
for i in c1kidx:
    c1s[i] = c1s[i] * 3.33
for i in c2kidx:
    c2s[i] = c2s[i] * 3.33
s = c2s + c1s
c = c2c + c1c

# find the centroid of the cluster B
muc2 = vops.mean(c2rpts)
muidx = []
for i in range(len(c2rpts)):
    if vops.distlp(c2rpts[i], muc2) < 0.15:
        muidx.append(i)

# paint the centroid to orange and make it bigger
for i in muidx:
    s[i] = s[i] * 20.0
    c[i] = 'red'

In [2]:
# Some codes to separate knee points to be sent to Dr. Deb
import numpy as np

# cluster A
c1rpts_ = np.array(c1rpts)
print(c1rpts_)
np.savetxt("/Users/khaled/Documents/dm-data-points/carcrash-clusterA/data.csv", \
           c1rpts_, delimiter = ',', fmt = '%1.4e')

c1kidx_ = np.array(c1kidx)
print(c1kidx_)
np.savetxt("/Users/khaled/Documents/dm-data-points/carcrash-clusterA/knee-indices.csv", \
           c1kidx_, delimiter = ',', fmt = '%d')

c1knees = c1rpts_[c1kidx_,:]
print(c1knees)
np.savetxt("/Users/khaled/Documents/dm-data-points/carcrash-clusterA/knees.csv", \
           c1knees, delimiter = ',', fmt = '%1.4e')

# cluster B
c2rpts_ = np.array(c2rpts)
print(c2rpts_)
np.savetxt("/Users/khaled/Documents/dm-data-points/carcrash-clusterB/data.csv", \
           c2rpts_, delimiter = ',', fmt = '%1.4e')

c2kidx_ = np.array(c2kidx)
print(c2kidx_)
np.savetxt("/Users/khaled/Documents/dm-data-points/carcrash-clusterB/knee-indices.csv", \
           c2kidx_, delimiter = ',', fmt = '%d')

c2knees = c2rpts_[c2kidx_,:]
print(c2knees)
np.savetxt("/Users/khaled/Documents/dm-data-points/carcrash-clusterB/knees.csv", \
           c2knees, delimiter = ',', fmt = '%1.4e')

[[1.6617e+03 8.2997e+00 6.7601e-02]
 [1.6617e+03 8.2999e+00 6.7581e-02]
 [1.6617e+03 8.3000e+00 6.7574e-02]
 ...
 [1.6766e+03 7.4426e+00 6.0394e-02]
 [1.6719e+03 7.0524e+00 7.1801e-02]
 [1.6719e+03 7.0524e+00 7.1801e-02]]
[  94 1172 2015 3401]
[[1.6617e+03 8.3051e+00 6.7174e-02]
 [1.6663e+03 6.9460e+00 8.1871e-02]
 [1.6663e+03 6.9460e+00 8.1871e-02]
 [1.6664e+03 6.9462e+00 8.1668e-02]]
[[1.6755e+03 6.1448e+00 2.3153e-01]
 [1.6726e+03 6.6903e+00 1.8462e-01]
 [1.6712e+03 6.8586e+00 1.6180e-01]
 ...
 [1.6755e+03 6.1446e+00 2.3148e-01]
 [1.6832e+03 6.5849e+00 1.6763e-01]
 [1.6755e+03 6.1447e+00 2.3147e-01]]
[ 60 706]
[[1.6702e+03 6.9448e+00 1.4422e-01]
 [1.6755e+03 6.1446e+00 2.3124e-01]]


In [3]:
(fig, ax) = pplt.scatter(rpts, s = s, c = c, camera = [30, 30], alpha = [0.2, 0.8], label = r"$f_{:d}$")

# for IEEE CIM
ax.set_xlim(1660, 1695)
ax.set_ylim(6, 10.5)
ax.set_zlim(0.025, 0.225)
ax.set_xticks(ax.get_xticks()[::2])
ax.set_yticks(ax.get_yticks()[::2])
ax.set_zticks(ax.get_zticks()[::2])
ax.xaxis.set_rotate_label(False) 
ax.yaxis.set_rotate_label(False) 
ax.zaxis.set_rotate_label(False) 

# for IEEE CIM
tickfs, textfs = 12, 12
labelfs = round(tickfs * 1.25, 2)
ax.set_xlabel(r"$f_1$", fontname = 'Arial', fontsize = labelfs, style = 'italic')
ax.set_ylabel(r"$f_2$", fontname = 'Arial', fontsize = labelfs, style = 'italic')
ax.set_zlabel(r"$f_3$", fontname = 'Arial', fontsize = labelfs, style = 'italic')
ax.tick_params(labelsize = tickfs)

ax.text(1665, 8.50, 0.050, "Cluster A", \
        fontname = 'Arial', fontsize = textfs, style = 'italic')
ax.text(1700, 6.50, 0.180, "Cluster B", \
        fontname = 'Arial', fontsize = textfs, style = 'italic')
mutxt = "Centroid \n  of cluster B"
ax.text(rpts[muidx[0]][0] - 10, rpts[muidx[0]][1], rpts[muidx[0]][2], mutxt, \
            fontname = 'Arial', fontsize = textfs, style = 'italic')

a = Arrow3D([1665, 1670], [6.5, 6.35], [0.20, 0.18], \
            mutation_scale = 15, lw = 1, arrowstyle = "-|>", color = "black")
ax.add_artist(a)

ofname = "../data/carcrash/carcrash-3d-scatter-combined-centroid.pdf"
plt.savefig(ofname, transparent = False, dpi = 300)

plt.show()

<IPython.core.display.Javascript object>

In [None]:
import numpy as np
from sklearn.manifold import TSNE

"""
Do t-SNE on the carcrash data.
"""

tsne = TSNE(n_components = 2, verbose = True, random_state = rndstate)
F = tsne.fit_transform(U).tolist()
fmt.save(F, "../data/carcrash/carcrash-3d-tsne-combined.out")
print("Done.")

In [None]:
# for IEEE CIM
tickfs, textfs = 12, 14
labelfs = round(tickfs * 1.25, 2)

# now plot the t-SNE data points
f = fmt.load("../data/carcrash/carcrash-3d-tsne-combined.out")
(fig, ax) = pplt.scatter(f, s = s, c = c, alpha = [0.2, 0.8], label = r'$u_{:d}$')

# for IEEE CIM
ax.set_xlabel(r"$u_1$", fontname = 'Arial', fontsize = labelfs, style = 'italic', rotation = 0)
ax.set_ylabel(r"$u_2$", fontname = 'Arial', fontsize = labelfs, style = 'italic', rotation = 0)
ax.tick_params(labelsize = tickfs)

# ax.text(f[muidx[0]][0] + 10, f[muidx[0]][1] - 5, mutxt)
# ax.text(-60, -100, mutxt, fontname = 'Arial', fontsize = textfs, style = 'italic')
# ax.arrow(-33, -85, 13, 15, width = 0.1, color = "k", head_width = 3)
ax.annotate(s = mutxt, xy = (-14, -64), xytext = (-60, -100), \
            arrowprops = dict(facecolor = "black", arrowstyle = "-|>", linewidth = 2.0, mutation_scale = 20.0), \
            fontname = 'Arial', fontsize = textfs, style = 'italic')

outfile = "../data/carcrash/carcrash-3d-norm-tsne-combined.pdf"
plt.savefig(outfile, transparent = False, dpi = 300)

plt.show()

In [None]:
from sklearn.manifold import Isomap

"""
Do isomap on the carcrash data.
"""

isomap = Isomap(n_components = 2)
F = isomap.fit_transform(U).tolist()
fmt.save(F, "../data/carcrash/carcrash-3d-isomap-combined.out")
print("Done.")

In [None]:
# now plot the isomap data points

f = fmt.load("../data/carcrash/carcrash-3d-isomap-combined.out")

(fig, ax) = pplt.scatter(f, s = s, c = c, alpha = [0.2, 0.8], label = r'$u_{:d}$')
ax.text(f[muidx[0]][0], f[muidx[0]][1], mutxt)

outfile = "../data/carcrash/carcrash-3d-isomap-combined.pdf"
plt.savefig(outfile, transparent = False, dpi = 300)

plt.show()

In [None]:
from sklearn.manifold import LocallyLinearEmbedding

"""
Do Locally Linear Embedding (LLE) on the carcrash data.
"""

lle = LocallyLinearEmbedding(n_components = 2, eigen_solver = 'dense', random_state = rndstate)
F = lle.fit_transform(U).tolist()
fmt.save(F, "../data/carcrash/carcrash-3d-lle-combined.out")
print("Done.")

In [None]:
# now plot the LLE data points

f = fmt.load("../data/carcrash/carcrash-3d-lle-combined.out")

(fig, ax) = pplt.scatter(f, s = s, c = c, alpha = [0.2, 0.8], label = r'$u_{:d}$')
ax.text(f[muidx[0]][0], f[muidx[0]][1], mutxt)

outfile = "../data/carcrash/carcrash-3d-lle-combined.pdf"
plt.savefig(outfile, transparent = False, dpi = 300)

plt.show()

In [None]:
from sklearn.manifold import SpectralEmbedding

"""
Do Spectral Embedding (SE) on the carcrash data.
"""

se = SpectralEmbedding(n_components = 2, random_state = rndstate)
F = se.fit_transform(U).tolist()
fmt.save(F, "../data/carcrash/carcrash-3d-se-combined.out")
print("Done.")

In [None]:
# now plot the SE data points

f = fmt.load("../data/carcrash/carcrash-3d-se-combined.out")

(fig, ax) = pplt.scatter(f, s = s, c = c, alpha = [0.2, 0.8], label = r'$u_{:d}$')
ax.text(f[muidx[0]][0], f[muidx[0]][1], mutxt)

outfile = "../data/carcrash/carcrash-3d-se-combined.pdf"
plt.savefig(outfile, transparent = False, dpi = 300)

plt.show()

In [None]:
from sklearn.manifold import MDS

"""
Do Multidimensional Scaling (MDS) on the carcrash data.
"""

mds = MDS(n_components = 2, random_state = rndstate, verbose = 2)
F = mds.fit_transform(U).tolist()
fmt.save(F, "../data/carcrash/carcrash-3d-mds-combined.out")
print("Done.")

In [None]:
# now plot the MDS data points

f = fmt.load("../data/carcrash/carcrash-3d-mds-combined.out")

(fig, ax) = pplt.scatter(f, s = s, c = c, alpha = [0.2, 0.8], label = r'$u_{:d}$')
ax.text(f[muidx[0]][0], f[muidx[0]][1], mutxt)

outfile = "../data/carcrash/carcrash-3d-mds-combined.pdf"
plt.savefig(outfile, transparent = False, dpi = 300)

plt.show()