In [None]:
import pandas as pd, plotly.express as px, umap

from pathlib import Path
from sklearn.datasets import load_breast_cancer

In [1]:
data, meta_data = load_breast_cancer(return_X_y = True, as_frame = True)

data.columns




Index(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error', 'fractal dimension error',
       'worst radius', 'worst texture', 'worst perimeter', 'worst area',
       'worst smoothness', 'worst compactness', 'worst concavity',
       'worst concave points', 'worst symmetry', 'worst fractal dimension'],
      dtype='object')

In [2]:
meta_data




0      0
1      0
2      0
3      0
4      0
      ..
564    0
565    0
566    0
567    0
568    1
Name: target, Length: 569, dtype: int64

In [None]:
meta_data = pd.DataFrame(meta_data)
#Let's add another bit of metadata just for the heck of it.
meta_data['big_radius'] = data['mean radius'] > data['mean radius'].mean()

In [3]:
reducer_2d = umap.UMAP(n_components = 2)
results_2d = pd.DataFrame(data = reducer_2d.fit_transform(data), columns = ['2d1','2d2'])

# Let's do it all at once, because we can
results_3d = pd.DataFrame(data = umap.UMAP(n_components = 3).fit_transform(data), columns = ['3d1','3d2','3d3'])

# WWCA means "What We Care About"
wwca = meta_data.join(results_2d).join(results_3d)
wwca




     target  big_radius        2d1       2d2        3d1       3d2       3d3
0         0        True  10.525769  2.004924  13.557387  2.851513  7.784062
1         0        True  10.171386  2.059343  13.375252  2.896229  7.982438
2         0        True  11.071662  3.334772  13.381915  4.062381  6.990562
3         0       False  -0.546467  6.016317  -1.591870  4.945582  5.073776
4         0        True  11.023631  3.711662  13.108088  4.444450  6.907868
..      ...         ...        ...       ...        ...       ...       ...
564       0        True  10.080576  1.730067  13.302131  2.439194  8.346813
565       0        True  10.871721  3.207210  13.365547  3.954024  7.257575
566       0        True  11.090828  9.521077   9.674644  9.619042  3.649511
567       0        True  10.647588  2.813657  13.464987  3.653803  7.487882
568       1       False  -4.408195 -4.986617  -3.340781 -1.224677 -2.908774

[569 rows x 7 columns]

# Initial plotting with plotly

In [None]:
fig = px.scatter(wwca, x='2d1',y='2d1')
fig.show()

# Clean up the label to something people like

In [4]:
new_data['text label'] = labels.map(lambda x: ['Benign','Malignant'][x])
new_data




            d1        d2      label text label
0    10.241105 -0.926771     Benign     Benign
1    10.563326 -1.051887     Benign     Benign
2    11.028148  0.487072     Benign     Benign
3    -0.890983  7.078265     Benign     Benign
4    11.367149  0.589767     Benign     Benign
..         ...       ...        ...        ...
564  10.580154 -1.588664     Benign     Benign
565  11.057127  0.111496     Benign     Benign
566  10.132827  8.208202     Benign     Benign
567  10.739155 -0.428516     Benign     Benign
568  -2.306198 -4.582163  Malignant  Malignant

[569 rows x 4 columns]

In [5]:
new_data




            d1        d2      label text label
0    10.241105 -0.926771     Benign     Benign
1    10.563326 -1.051887     Benign     Benign
2    11.028148  0.487072     Benign     Benign
3    -0.890983  7.078265     Benign     Benign
4    11.367149  0.589767     Benign     Benign
..         ...       ...        ...        ...
564  10.580154 -1.588664     Benign     Benign
565  11.057127  0.111496     Benign     Benign
566  10.132827  8.208202     Benign     Benign
567  10.739155 -0.428516     Benign     Benign
568  -2.306198 -4.582163  Malignant  Malignant

[569 rows x 4 columns]