# UMAP DEMO

Here is a small demo of the UMAP package in Python. 

It's modeled after: 

https://umap-learn.readthedocs.io/en/latest/basic_usage.html#digits-data

In [None]:
import os
import pickle
from io import BytesIO
from PIL import Image
import base64

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import umap

import bokeh
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import HoverTool, ColumnDataSource, CategoricalColorMapper
from bokeh.palettes import Spectral10

output_notebook()

os.chdir("D:/Documents/academics/csu/talks&posters/UMAP/")

### Load Data

We're using some of the cifar 10 image dataset

In [None]:
file = os.path.join("cifar-10-batches-py", "data_batch_1")
with open(file, 'rb') as fo:
    d = pickle.load(fo, encoding='bytes')

In [None]:
labs = d[b'labels']
data = d[b'data']

In [None]:
print(len(labs))
print(data.shape)

### Learn the Manifold

In [None]:
reducer = umap.UMAP(
        n_neighbors=5,
        min_dist=0.0,
        n_components=2,
        metric="euclidean"
)
%time embedding = reducer.fit_transform(data)
embedding.shape

### View Results

Static Image:

In [None]:
s = plt.scatter(embedding[:, 0], embedding[:, 1], c=labs, cmap="tab10", s=5)
#plt.legend(*s.legend_elements(num=None), loc="upper left", title="Label", ncol=3)


Image with tooltip:

In [None]:
# create image hovers
def embeddable_image(data):
    img_data = data.reshape(3, 32, 32).transpose(1, 2, 0)
    image = Image.fromarray(img_data, mode='RGB').resize((64, 64), Image.NEAREST)
    buffer = BytesIO()
    image.save(buffer, format='png')
    for_encoding = buffer.getvalue()
    return 'data:image/png;base64,' + base64.b64encode(for_encoding).decode()

In [None]:
lab_names = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]

img_df = pd.DataFrame(embedding, columns=('x', 'y'))
img_df['label'] = [lab_names[x] for x in labs]
img_df['image'] = list(map(embeddable_image, data))

datasource = ColumnDataSource(img_df)
color_mapping = CategoricalColorMapper(factors=lab_names,
                                       palette=Spectral10)

plot_figure = figure(
    title='UMAP projection of the Digits dataset',
    plot_width=600,
    plot_height=600,
    tools=('pan, wheel_zoom, reset')
)

plot_figure.add_tools(HoverTool(tooltips="""
<div>
    <div>
        <img src='@image' style='float: left; margin: 5px 5px 5px 5px'/>
    </div>
    <div>
        <span style='font-size: 16px; color: #224499'>Category:</span>
        <span style='font-size: 18px'>@label</span>
    </div>
</div>
"""))

plot_figure.circle(
    'x',
    'y',
    source=datasource,
    color=dict(field='label', transform=color_mapping),
    line_alpha=0.6,
    fill_alpha=0.6,
    size=4
)
show(plot_figure)