# Llama.cpp Embeddings Test

This notebook tests the llama.cpp server embeddings endpoint running in Docker.

In [None]:
import requests
import json

## Configuration

Set the llama.cpp server endpoint:

In [None]:
# For running from host machine
LLAMA_SERVER_URL = "http://localhost:8080"

# For running from inside Docker network, use:
# LLAMA_SERVER_URL = "http://llama:8080"

## Test 1: Simple Embedding Request

In [None]:
def get_embedding(text: str):
    """Get embedding vector for given text"""
    response = requests.post(
        f"{LLAMA_SERVER_URL}/embedding",
        headers={"Content-Type": "application/json"},
        json={"input": text}
    )
    return response.json()

# Test with simple text
result = get_embedding("Hello embeddings")
print("Response keys:", result.keys())
print("Embedding dimension:", len(result.get('embedding', [])))
print("First 10 values:", result.get('embedding', [])[:10])

## Test 2: Multiple Texts

In [None]:
texts = [
    "Hello embeddings",
    "Natural language processing",
    "Machine learning models",
    "Vector databases"
]

embeddings = []
for text in texts:
    result = get_embedding(text)
    embeddings.append(result.get('embedding', []))
    print(f"Text: '{text}' -> Embedding dim: {len(result.get('embedding', []))}")

## Test 3: Cosine Similarity

Calculate similarity between embeddings:

In [None]:
import numpy as np

def cosine_similarity(vec1, vec2):
    """Calculate cosine similarity between two vectors"""
    vec1 = np.array(vec1)
    vec2 = np.array(vec2)
    return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

# Compare all pairs
print("Similarity matrix:")
print(f"{'':30s}", end="")
for t in texts:
    print(f"{t[:15]:15s}", end=" ")
print()

for i, text1 in enumerate(texts):
    print(f"{text1:30s}", end="")
    for j, text2 in enumerate(texts):
        similarity = cosine_similarity(embeddings[i], embeddings[j])
        print(f"{similarity:15.4f}", end=" ")
    print()

## Test 4: Server Health Check

In [None]:
# Check server health
health_response = requests.get(f"{LLAMA_SERVER_URL}/health")
print("Health status:", health_response.status_code)
print("Response:", health_response.json())

## Test 5: Using curl (shell command)

In [None]:
%%bash
curl --request POST \
    --url http://localhost:8080/embedding \
    --header "Content-Type: application/json" \
    --data '{"input": "Hello embeddings"}' \
    --silent | jq '.embedding | length'