# Interactive t-SNE ROOTS dimensionality reduction

In [None]:
import pandas as pd
import colorcet as cc
import holoviews as hv
import panel as pn

import warnings
warnings.filterwarnings(action="ignore")
import datashader as ds
from holoviews.operation.datashader import rasterize
pn.extension()
hv.extension('bokeh')


*Visualization of t-Distributed Stochastic Neighbor Embedding of ROOTS language Corpus.*

Trained data from Christopher Akiki (dataset `cakiki/roots-tsne-data` on Hugging Face. A version of this notebook that runs color-mixing on the server-side can be found [here](./t_sne_roots_datashaded.ipynb).

## Load the data

In [None]:
tsne_embedding = pd.read_parquet('./data/train-00000-of-00001-9a434d9cf7fd233e.parquet')
df = pd.DataFrame(data=tsne_embedding, columns=['x','y','language'])

## Datashade and add hover layer

Note that until client-side colormixing is supported, the `datashade` operation is used with an invisible hover layer overlaid on top.

In [None]:
points = hv.Points(df, ['x','y'], ['language'])
op = rasterize(points,aggregator=ds.by('language', ds.count())).opts(
    show_legend=False, frame_width=600, aspect=1, cnorm='eq_hist', cmap=cc.glasbey_light[:47])
hover_layer = rasterize(points, selector=ds.first("x"), x_sampling=2, y_sampling=2).opts(tools=["hover"], alpha=0)

### Render

In [None]:
op  * hover_layer

### Build a small dashboard with panel

In [None]:
text = """
### Visualization of t-Distributed Stochastic Neighbor Embedding of ROOTS language Corpus. <br> Trained data from Christopher Akiki (dataset `cakiki/roots-tsne-data` on Hugging Face
"""

template = pn.template.FastListTemplate(
    title="t-SNE clustering of ROOTS language corpus"
)

template.main.append(
    pn.Column(
        pn.pane.Markdown(text),
        pn.Row(pn.HSpacer(), 
               op  * hover_layer,
               pn.HSpacer())
    )
)
template.servable();