In [4]:
import pandas as pd
import umap

# Initialize UMAP. Reduce dimensionality to 2D for easy visualization.
# Create a UMAP instance with custom parameters
reducer = umap.UMAP(
    n_neighbors=50,
    n_components=2,
    metric='euclidean',
    min_dist=0.5,
    spread=0.5,
    learning_rate=1.0,
    n_epochs=200,
    init='spectral'
)

## Embeddings from Specs

In [5]:
# --------- Load Spec Data and Apply UMAP ----------

# Load the CSV file into a DataFrame
df = pd.read_csv('embeddings/spec_embeddings.csv')

# Assume 'filename' is the column to exclude from embeddings
filename_spec = df['filename']

# Extract all other columns as embeddings
embeddings_spec = df.drop('filename', axis=1)

# Apply UMAP 
umap_embeddings_spec = reducer.fit_transform(embeddings_spec)


# --------- Prepare labels ---------

import re
def extract_substring(s):
    # Use a regular expression to find the point at which to stop
    match = re.search(r'_(p|m|sw|s)', s)
    if match:
        return s[:match.start()]
    return s  # Return the whole string if no match is found

def attach_image(s):    
    return "https://raw.githubusercontent.com/huyen-nguyen/spec-image-embeddings/main/screenshots/" + s[:-4] + "png"  # Return the whole string if no match is found

# Apply the function to the 'Label' column
df['label'] = df['filename'].apply(extract_substring)

# Display the DataFrame to see the original and trimmed labels
# print(df)

# ----------- Apply labels -------------

# Convert the embeddings to a DataFrame
umap_embeddings_spec = pd.DataFrame(umap_embeddings_spec, columns=['UMAP_1', 'UMAP_2'])

# Add the labels to the DataFrame
umap_embeddings_spec['Label'] = df['label']

umap_embeddings_spec["url"] = df['filename'].apply(attach_image)


# Display the first few rows of the DataFrame
# print(umap_embeddings_spec.head())

# ----------- Config -------------

# API Reference: https://github.com/flekschas/jupyter-scatter
# and also https://github.com/flekschas/regl-scatterplot/#properties
config = {
    "size": 7,
    "axes_labels": True,
    "height": 800,
    "background": "dark",
    "legend": True,
    # "aspectRatio": 1,
    "opacity": 0.8,
    "axes_grid": True
}

# ----------- Plotting the results using jupyter scatter -----------
import jscatter

jscatter.Scatter(
    data=umap_embeddings_spec, x='UMAP_1', y='UMAP_2', color_by='Label', **config, 
    tooltip=True,
    tooltip_preview="url",
    tooltip_preview_type="image",
    tooltip_preview_image_background_color="white",
    tooltip_properties=["color"],

).show()

HBox(children=(VBox(children=(Button(button_style='primary', icon='arrows', layout=Layout(width='36px'), style…