In [1]:
from bokeh.plotting import figure, output_file, show, save
from bokeh.io import output_notebook, export_png
from bokeh.palettes import colorblind
from bokeh.models import (
    CategoricalColorMapper,
    ColumnDataSource,
    LassoSelectTool,
    WheelZoomTool,
    ZoomInTool,
    BoxZoomTool,
    ResetTool,
)
from bokeh.layouts import gridplot
from bokeh.resources import CDN
from bokeh.embed import file_html

import numpy as np
import pandas as pd

In [2]:
features_train = pd.read_csv("features/features_train_clean.csv")
features_test = pd.read_csv("features/features_test_clean.csv")

In [3]:
labels_train = features_train["label"]
labels_test = features_test["label"]

In [4]:
filenames_train = features_train["filename"]
filenames_test = features_test["filename"]

In [5]:
labels = [0, 1]

In [6]:
train_embedding = np.load("train_embedding.npy")
test_embedding = np.load("test_embedding.npy")

In [7]:
idx_tr = labels_train[labels_train == 0].index.tolist()

In [10]:
TOOLTIPS = [
    ("index", "$index"),
    ("(x,y)", "($x, $y)"),
    ("filename", "@filename"),
]

mycols = colorblind["Colorblind"][4]

p = figure(
    plot_width=600,
    plot_height=600,
    tooltips=TOOLTIPS,
    tools="save",
    toolbar_location="left",
)


p.title.align = "center"
p.title.text_color = "black"
p.title.text_font_size = "25px"

size = 6


for col, label in zip(mycols, labels):
    # train
    idx_tr = labels_train[labels_train == label].index.tolist()
    filenames_train_label = filenames_train.iloc[idx_tr]
    train_embedding_label = train_embedding[idx_tr,:]

    data_tr = {"x": train_embedding_label[:,0], "y": train_embedding_label[:,1], "filename": filenames_train_label}

    source_tr = ColumnDataSource(
        data=data_tr)
    
    p.circle(
        x="x",
        y="y",
        size=size,
        source=source_tr,
        color=col,
        alpha=0.8,
        legend_label=str(label) + " train",
    )

    # test
    idx_ts = labels_test[labels_test == label].index.tolist()
    filenames_test_label = filenames_test.iloc[idx_ts]
    test_embedding_label = test_embedding[idx_ts,:]

    data_ts = {"x": test_embedding_label[:,0], "y": test_embedding_label[:,1], "filename": filenames_test_label}

    source_ts = ColumnDataSource(
        data=data_ts)
    
    p.triangle(x="x", y="y", size=size, source=source_ts, color=col, alpha=0.8, legend_label=str(label) + " test")



p.add_tools(LassoSelectTool())
p.add_tools(WheelZoomTool())
p.add_tools(ZoomInTool())
p.add_tools(ResetTool())
p.add_tools(BoxZoomTool())


p.legend.label_text_font_size = "20pt"
p.yaxis.major_label_text_font_size = "15pt"
p.xaxis.major_label_text_font_size = "15pt"
p.legend.location = "top_left"
p.legend.click_policy = "hide"
# p.title()


show(p)