# Fashion MNIST RAPIDS UMAP

In [None]:
import os
import umap
import requests
import pandas as pd
import numpy as np
import datashader as ds
import datashader.utils as utils
import datashader.transfer_functions as tf
import matplotlib.pyplot as plt

# GPU

In [None]:
if not os.path.isfile('fashion-mnist.csv'):
    csv_data = requests.get(
        'https://www.openml.org/data/get_csv/18238735/phpnBqZGZ'
    )
    with open('fashion-mnist.csv', 'w') as f:
        f.write(csv_data.text)

In [None]:
! head fashion-mnist.csv

In [None]:
import cudf

source_df = cudf.read_csv('fashion-mnist.csv')

In [None]:
source_df.shape

In [None]:
plt.imshow(source_df.iloc[0, :784].to_array().reshape((28,28)), cmap='Greys')

In [None]:
import cuml

data = source_df.loc[:, source_df.columns[:-1]]
target = source_df["class"]

reducer = cuml.UMAP(
    n_neighbors=15,
    n_components=2,
    n_epochs=500,
    min_dist=0.1
)

In [None]:
%%time

embedding = reducer.fit_transform(data)

In [None]:
df = embedding.to_pandas()
df.columns = ["x", "y"]
df.describe()

In [None]:
df['class'] = pd.Series([str(x) for x in target.to_array()], dtype="category")

cvs = ds.Canvas(plot_width=800, plot_height=800)
agg = cvs.points(df, 'x', 'y', ds.count_cat('class'))
tf.set_background(tf.shade(agg),"black")

# CPU

In [None]:
source_df = pd.read_csv('fashion-mnist.csv')
data = source_df.iloc[:, :784].values.astype(np.float32)
target = source_df['class'].values

reducer = umap.UMAP(
    n_neighbors=15,
    n_components=2,
    n_epochs=500,
    min_dist=0.1,    
    random_state=42)

In [None]:
%%time

embedding = reducer.fit_transform(data)

df = pd.DataFrame(embedding, columns=('x', 'y'))
df['class'] = pd.Series([str(x) for x in target], dtype="category")

cvs = ds.Canvas(plot_width=400, plot_height=400)
agg = cvs.points(df, 'x', 'y', ds.count_cat('class'))
img = tf.shade(agg, color_key=color_key, how='eq_hist')

utils.export_image(img, filename='fashion-mnist2', background='black')

image = plt.imread('fashion-mnist2.png')
fig, ax = plt.subplots(figsize=(12, 12))
plt.imshow(image)
plt.setp(ax, xticks=[], yticks=[])
plt.title("Fashion MNIST data embedded\n"
          "into two dimensions by UMAP\n"
          "visualised with Datashader",
          fontsize=12)

plt.show()

In [None]:
df = pd.DataFrame(embedding, columns=('x', 'y'))
df['class'] = pd.Series([str(x) for x in target], dtype="category")

cvs = ds.Canvas(plot_width=800, plot_height=800)
agg = cvs.points(df, 'x', 'y', ds.count_cat('class'))
tf.set_background(tf.shade(agg),"black")