https://www.bentoml.com/blog/a-guide-to-open-source-embedding-models

In [1]:
import pickle
import requests
import networkx

In [2]:
with open("../data/graph.pkl", "rb") as f:
    G = pickle.load(f)


In [3]:
import requests

# API address of Ollama on the host machine
# OLLAMA_BASE_URL = "http://host.docker.internal:11434"  # If using Docker on Linux, check the alternative below
OLLAMA_BASE_URL = "http://localhost:11434"

def get_embedding(text):
    """Gets embeddings for a given text using Ollama"""
    url = f"{OLLAMA_BASE_URL}/api/embeddings"
    payload = {
        "model": "bge-m3",  # Make sure you have this model downloaded in Ollama
        "prompt": text
    }
    response = requests.post(url, json=payload)
    
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error {response.status_code}: {response.text}")
        return None

# Test with a sample text
embedding = get_embedding("***")
print(embedding)


{'embedding': [-1.6167062520980835, -0.36916884779930115, -1.324873685836792, 0.10496003925800323, -0.5085816383361816, -0.6559687852859497, 0.8341795802116394, 1.473646879196167, -0.29673513770103455, 0.41089606285095215, 0.7573739886283875, 1.1329209804534912, -0.13601182401180267, -0.013740446418523788, 1.3187133073806763, -1.6694451570510864, 1.040123462677002, -0.08517423272132874, 0.8772013187408447, -0.7995467185974121, -0.8771259188652039, -0.23145601153373718, -0.06231669336557388, 1.1984612941741943, 0.3176096975803375, 0.15809568762779236, 0.04244368150830269, -0.7236093878746033, -1.6375433206558228, -0.4657614231109619, 0.27378857135772705, -0.9877020120620728, -0.033766284584999084, -0.9883822798728943, -1.0301696062088013, -1.1154265403747559, -0.6944770812988281, 1.0368667840957642, -2.1396827697753906, 0.17176881432533264, -0.07348373532295227, -0.3024083077907562, 0.6346084475517273, -0.5592426061630249, 0.5155187249183655, -1.3062057495117188, -0.059213846921920776, 

In [4]:
# Iterate over graph nodes and update embeddings
for node, data in G.nodes(data=True):
    if data.get("type") == "chunk" and "text" in data:
        data["embedding"] = get_embedding(data["text"])

In [6]:
with open("../data/graph_w_embeddings.pkl", "wb") as f:
    pickle.dump(G, f)