In [1]:
%matplotlib widget
import ipywidgets as widgets
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import datasets
from sklearn.decomposition import PCA

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import pandas as pd
import umap

sns.set(style='white', context='notebook', rc={'figure.figsize':(8,6)})


# import some data to play with
iris = datasets.load_iris()
x = iris.data[:, :2]  # we only take the first two features.
y = iris.target

# np.c_ is the numpy concatenate function
# which is used to concat iris['data'] and iris['target'] arrays 
# for pandas column argument: concat iris['feature_names'] list
# and string list (in this case one string); you can make this anything you'd like..  
# the original dataset would probably call this ['Species']
pd_iris = pd.DataFrame(data= np.c_[iris['data'], iris['target']],
                     columns= iris['feature_names'] + ['target'])

sns.pairplot(pd_iris, hue='target', palette='Dark2')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<seaborn.axisgrid.PairGrid at 0x1d1e56f1c10>

In [2]:
x_min, x_max = x[:, 0].min() - .5, x[:, 0].max() + .5
y_min, y_max = x[:, 1].min() - .5, x[:, 1].max() + .5

plt.figure(2, figsize=(8, 6))
plt.clf()

## Plot the training points
plt.scatter(x[:, 0], x[:, 1], c=y, cmap=plt.cm.Set1, edgecolor='k')
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')
plt.title('Iris data')

plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.xticks(())
plt.yticks(())

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

([], [])

In [3]:
# To getter a better understanding of interaction of the dimensions
# plot the first three PCA dimensions
fig_3 = plt.figure(3, figsize=(8, 6))
ax = Axes3D(fig_3, elev=-150, azim=110)
X_reduced = PCA(n_components=3).fit_transform(iris.data)
ax.scatter(X_reduced[:, 0], X_reduced[:, 1], X_reduced[:, 2], c=y,
              cmap=plt.cm.Set1, edgecolor='k', s=40)
ax.set_title("First three PCA directions")
ax.set_xlabel("1st eigenvector")
ax.w_xaxis.set_ticklabels([])
ax.set_ylabel("2nd eigenvector")
ax.w_yaxis.set_ticklabels([])
ax.set_zlabel("3rd eigenvector")
ax.w_zaxis.set_ticklabels([])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[Text(-0.8, 0, ''),
 Text(-0.6000000000000001, 0, ''),
 Text(-0.4, 0, ''),
 Text(-0.19999999999999996, 0, ''),
 Text(0.0, 0, ''),
 Text(0.19999999999999996, 0, ''),
 Text(0.40000000000000013, 0, ''),
 Text(0.6000000000000001, 0, ''),
 Text(0.8, 0, ''),
 Text(1.0, 0, '')]

In [4]:
reducer = umap.UMAP()

iris_data = pd_iris[
    [
        "sepal length (cm)",
        "sepal width (cm)",
        "petal length (cm)",
        "petal width (cm)",
    ]
].values
scaled_iris_data = StandardScaler().fit_transform(iris_data)

embedding = reducer.fit_transform(scaled_iris_data)
embedding.shape

plt.figure(4, figsize=(8, 6))
plt.scatter(
    embedding[:, 0],
    embedding[:, 1],
    #c=[sns.color_palette()[X] for X in pd_iris.target.map({"0":0, "1":1, "2":2})]
    c=pd_iris['target'],
    cmap=plt.cm.Set1
    )
plt.gca().set_aspect('equal', 'datalim')
plt.title('UMAP projection of the Iris dataset', fontsize=24)
#plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [5]:
umap_3d = umap.UMAP(learning_rate=1.0, local_connectivity=1.0, metric='euclidean', min_dist=0.1, n_components=3, n_neighbors=15)

fig_5 = plt.figure(5, figsize=(8, 6))
ax2 = Axes3D(fig_5, elev=-150, azim=110)
proj_3d = umap_3d.fit_transform(scaled_iris_data)
ax2.scatter(proj_3d[:, 0], proj_3d[:, 1], proj_3d[:, 2], c=pd_iris['target'],
              cmap=plt.cm.Set1, edgecolor='k', s=40)
ax2.set_title("First three UMAP directions")
ax2.set_xlabel("UMAP 1")
ax2.w_xaxis.set_ticklabels([])
ax2.set_ylabel("UMAP 2")
ax2.w_yaxis.set_ticklabels([])
ax2.set_zlabel("UMAP 3")
ax2.w_zaxis.set_ticklabels([])

plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …