In [None]:
import sys
import subprocess

if "google.colab" in sys.modules:
    print("Detected Google Colab runtime. Installing dependencies...")
    packages = ["streamlit", "pandas", "numpy", "scikit-learn", "requests"]
    subprocess.check_call([sys.executable, "-m", "pip", "install", *packages])


# Embeddings API Example

Goal: generate local TF-IDF embeddings and compute similarities via `EmbeddingsClient`.

Why it matters: offers a reproducible, dependency-light stand-in for remote embedding services.

How to run and adapt: run cells sequentially; adjust `max_features` or swap in a different vectorizer to test other embedding strategies.

In [None]:
import sys
from pathlib import Path

repo_root = Path.cwd()
for candidate in [repo_root, repo_root.parent, repo_root.parent.parent]:
    if (candidate / "api" / "python" / "client_embeddings.py").exists():
        sys.path.append(str(candidate))
        break

from api.python.client_embeddings import EmbeddingsClient

texts = [
    "Synthetic research abstract about reproducibility.",
    "Notes on experimental design and treatment arms.",
    "Overview of responsible AI documentation practices.",
]

client = EmbeddingsClient(max_features=32)
embeddings = client.embed(texts)
embeddings.shape


## Pairwise similarity

In [None]:
similarity = client.similarity(texts)
similarity
