In [None]:
%pip install -r requirements.txt -q

In [None]:
import numpy as np
import plotly.express as px
from sentence_transformers import SentenceTransformer
from sklearn.manifold import TSNE
from pprint import pprint

# Load data from CSV file

In [None]:
# The data format should be a list of values with a header [Label, Value]
# Label is the name of the data point and Value is the value of the data point

# Use numpy to load CSV file, CSV file contains [Label, Value] column of data with header
data = np.genfromtxt('data.csv', delimiter=',', dtype='str')

# Remove header from data
data = data[1:]

pprint(data)

# Load pre-trained model

In [None]:
model = SentenceTransformer('all-MiniLM-L6-v2',)


# Embeddings

In [None]:
embeddings = model.encode(
  data[:,1],
  convert_to_tensor=True,
  normalize_embeddings=True
  )
pprint(embeddings)


# Perform TSNE to reduce to 3 components

In [None]:
#
# Assuming embeddings is already defined
n_samples = embeddings.shape[0]

# Set perplexity to a value less than the number of samples
perplexity_value = min(30, n_samples - 1)

tsne_model = TSNE(
  n_components=3,
  random_state=42,
  perplexity=perplexity_value,)
tsne_embeddings = tsne_model.fit_transform(embeddings)

pprint(tsne_embeddings)

# Plot the 3D graph

In [None]:
# plot the tsne embeddings
fig = px.scatter_3d(
  tsne_embeddings,
  x=0,
  y=1,
  z=2,
  text=data[:,0],
  hover_name=data[:,1],
  title='t-SNE 3D Visualization',
  color=data[:,0],
  )

fig.update_traces(marker=dict(size=10))  # Increase the marker size uniformly

fig.update_layout(
    scene=dict(
        xaxis=dict(showticklabels=False, title=''),
        yaxis=dict(showticklabels=False, title=''),
        zaxis=dict(showticklabels=False, title=''),
    ),
    #showlegend=False,
    autosize=True,
    #width=600,  # Width of the plot
    #height=600,  # Height of the plot
    margin=dict(l=50, r=50, b=50, t=50, pad=4)  # Margins
)

fig.show()


# Distance between embeddings

In [None]:
# Compare each embedding with the other embeddings
compare_embeddings = model.similarity(embeddings, embeddings)
pprint(compare_embeddings)