In [2]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

embeddings_func = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
db = Chroma(persist_directory="./chroma_db", embedding_function=embeddings_func)

all_docs = db.get()['documents']
all_texts = [chunk for chunk in all_docs]

# Generiraj embeddinge direktno za te tekstove
all_embeddings = embeddings_func.embed_documents(all_texts)


In [2]:
from sklearn.manifold import TSNE
import numpy as np

if all_embeddings is None:
	raise ValueError("all_embeddings is None. Please ensure embeddings are generated and available.")

embeddings_array = np.array(all_embeddings)
if embeddings_array.ndim != 2 or embeddings_array.shape[0] == 0:
	raise ValueError(f"Invalid embeddings_array shape: {embeddings_array.shape}")

tsne = TSNE(n_components=2, random_state=42)
embeddings_2d = tsne.fit_transform(embeddings_array)

In [6]:
import plotly.express as px
import pandas as pd

df = pd.DataFrame({
    "x": embeddings_2d[:,0],
    "y": embeddings_2d[:,1],
    "text": [t.split('\n')[0] for t in all_docs]
})

fig = px.scatter(df, x="x", y="y", hover_data=["text"], title="t-SNE embedding vizualizacija")
fig.show()
