In [3]:
# --- Path bootstrap so "from utils.config import ..." works no matter where Jupyter's CWD is ---
import sys
from pathlib import Path

def add_codebase_to_path(marker_dir_name="code_base"):
    cwd = Path.cwd().resolve()
    # walk up until we find "code_base" that contains utils/config.py
    for p in [cwd, *cwd.parents]:
        if p.name == marker_dir_name and (p / "utils" / "config.py").exists():
            sys.path.insert(0, str(p))  # add code_base itself
            return p
        # if we're at repo root and "code_base" is a child, handle that too
        if (p / marker_dir_name / "utils" / "config.py").exists():
            sys.path.insert(0, str(p / marker_dir_name))
            return p / marker_dir_name
    raise RuntimeError("Could not locate code_base/ with utils/config.py")

CODEBASE_DIR = add_codebase_to_path()
print("Using code base at:", CODEBASE_DIR)


Using code base at: /storage/ice1/1/0/vchopra37/projects/edge_glass/code_base


In [4]:
import torch
from utils.similarity import l2_normalize, cosine_sim, cosine_self
from utils.io import save_embeddings, load_embeddings, ensure_dir

In [5]:
# make a tiny fake embedding
ids = [f"id_{i}" for i in range(5)]
X = torch.randn(5, 16)

# cosine tests
S = cosine_self(X)
print("Self-cos shape:", S.shape)
print("Diag mean (≈1):", torch.diag(S).mean().item())


Self-cos shape: torch.Size([5, 5])
Diag mean (≈1): 1.0


In [6]:

# save/load roundtrip
ensure_dir("../experiments/embeddings")
path = "../experiments/embeddings/fake_text.pt"
save_embeddings(ids, X, path, meta={"modality": "text", "D": X.shape[1]})
ids2, X2, meta2 = load_embeddings(path)
print("Reload OK:", len(ids2)==len(ids), X2.shape==X.shape, meta2["modality"]=="text")


Reload OK: True True True
