In [1]:
from sentence_transformers import SentenceTransformer
import torch
from pathlib import Path
import sys
import importlib
from IPython.display import display, HTML
from ipywidgets import FloatProgress, VBox, Label

# --- Repo setup ---
repo_root = Path.cwd().parent
sys.path.insert(0, str(repo_root))

import scripts.allPrompts as allPrompts

importlib.reload(allPrompts)

# --- Load prompts ---
df = allPrompts.load_prompts("../assets/conversations.json")
titles = df["title"].dropna().tolist()
# print(df.head())

# --- Device ---
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

# --- Load model ---
model = SentenceTransformer("all-MiniLM-L6-v2", device=device)

# --- Custom progress bar setup ---
# Optional: dark background with bright cyan bar
display(
    HTML(
        """
<style>
.cell-output-ipywidget-background {
    background-color: transparent !important;  /* or transparent */
}
</style>
"""
    )
)

batch_size = 64  # tune this for your GPU memory
embeddings = []

progress_label = Label(value=f"Encoding titles: 0 / {len(titles)}")
progress_label.style.text_color = "white"
progress_bar = FloatProgress(value=0, min=0, max=len(titles))
progress_bar.layout.width = "100%"
progress_bar.style.bar_color = "#4282ba"
display(VBox([progress_label, progress_bar]))

for i in range(0, len(titles), batch_size):
    batch = titles[i : i + batch_size]
    batch_emb = model.encode(batch, convert_to_tensor=True)
    embeddings.append(batch_emb)
    progress_bar.value = min(i + batch_size, len(titles))
    progress_label.value = (
        f"Encoding titles: {min(i + batch_size, len(titles))} / {len(titles)}"
    )

embeddings = torch.cat(embeddings)
print("Done!")

Using device: cuda


VBox(children=(Label(value='Encoding titles: 0 / 59076', style=LabelStyle(text_color='white')), FloatProgress(…

Done!
