In [None]:
# Step 2.1 Load the embeddings model
from langchain_community.embeddings import LlamaCppEmbeddings

embedding_model = LlamaCppEmbeddings(model_path="models/all-MiniLM-L6-v2-Q6_K.gguf")

In [None]:
# Step 2.2. load vector database that was persisted in the past 

import chromadb
from langchain.vectorstores import Chroma

persist_folder = 'chroma_db_c1000o200_docs_textbooks' # <-- replace this with the name of your persist folder from part 1
client = chromadb.PersistentClient(path=persist_folder) 
vectordb = Chroma(
    client=client,
    embedding_function=embedding_model
)

In [None]:
# Step 2.3 retrieve all embeddings and metadata from the vector database
data = vectordb.get(include=['embeddings','metadatas'])
c_ems = data['embeddings']
c_meta = data['metadatas']
colors = [ item['source'] for item in c_meta ] # for visualizations, assign color to each source


In [None]:
# Step 2.4 visualize what embeddings look like in 2D

# 2d visualization of embeddings
import umap
import plotly.express as px
import numpy as np

#reduce to 2d
reducer = umap.UMAP(n_components=2)
embeddings_2d = reducer.fit_transform(c_ems)

#create 2d scatter plot
fig = px.scatter(
    x=embeddings_2d[:, 0],
    y=embeddings_2d[:, 1],
    color=colors,
    opacity=0.5,
    title='2D Visualization of Embeddings',
    labels={'x': 'X', 'y': 'Y'},
    template='plotly_dark'
)
fig.update_traces(marker=dict(size=4),
                  selector=dict(mode='markers'))
fig.update_layout(showlegend=False)
fig.show()

In [None]:
# Step 2.5 visualize what embeddings look like in 3D

# 3d visualization of embeddings
import umap
import plotly.express as px
import numpy as np

#reduce to 3d
reducer = umap.UMAP(n_components=3)
embeddings_3d = reducer.fit_transform(c_ems)

#create 3d scatter plot
fig = px.scatter_3d(
    x=embeddings_3d[:, 0],
    y=embeddings_3d[:, 1],
    z=embeddings_3d[:, 2],
    color=colors,
    opacity=0.5,
    title='3D Visualization of Embeddings',
    labels={'x': 'X', 'y': 'Y', 'z': 'Z'},
    template='plotly_dark',
    width=1400, height=800,
)
fig.update_traces(marker=dict(size=2),
                  selector=dict(mode='markers'))
fig.update_layout(showlegend=False)
fig.show()

In [None]:
# the end!