# BGE endpoints on Colab

This notebook exposes HTTP endpoints for embeddings and reranking:
- `GET /health`
- `POST /embed` (BAAI/bge-m3)
- `POST /rerank` (BAAI/bge-reranker-v2-m3)

Request formats:
- `POST /embed` `{'texts': ['text1', 'text2']}`
- `POST /rerank` `{'query': 'q', 'documents': ['d1', 'd2']}`

Public URL is created via ngrok. Set `NGROK_AUTH_TOKEN` in the environment.

Optional protection: set `BGE_API_KEY` and pass header `X-API-Key`.


In [None]:
!pip -q install "fastapi==0.110.0" "uvicorn[standard]==0.27.1" "sentence-transformers==2.7.0" "transformers==4.38.0" "pyngrok==7.1.6"

In [None]:
import os
from pyngrok import ngrok

NGROK_AUTH_TOKEN = os.environ.get("NGROK_AUTH_TOKEN", "")
if NGROK_AUTH_TOKEN:
    ngrok.set_auth_token(NGROK_AUTH_TOKEN)

In [None]:
import torch
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForSequenceClassification

EMBED_MODEL_NAME = "BAAI/bge-m3"
RERANK_MODEL_NAME = "BAAI/bge-reranker-v2-m3"

device = "cuda" if torch.cuda.is_available() else "cpu"

embed_model = SentenceTransformer(EMBED_MODEL_NAME, device=device)
rerank_tokenizer = AutoTokenizer.from_pretrained(RERANK_MODEL_NAME)
rerank_model = AutoModelForSequenceClassification.from_pretrained(RERANK_MODEL_NAME)
rerank_model.eval()
rerank_model.to(device)

In [None]:
import os
import torch
from fastapi import FastAPI, Header, HTTPException
from pydantic import BaseModel
from typing import List, Optional

app = FastAPI(title="bge-endpoints")

API_KEY = os.environ.get("BGE_API_KEY", "")

def _auth(x_api_key: Optional[str]) -> None:
    if API_KEY and x_api_key != API_KEY:
        raise HTTPException(status_code=401, detail="invalid api key")

class EmbedRequest(BaseModel):
    texts: List[str]

class RerankRequest(BaseModel):
    query: str
    documents: List[str]

@app.get("/health")
def health(x_api_key: Optional[str] = Header(default=None, alias="X-API-Key")):
    _auth(x_api_key)
    return {
        'status': 'ok',
        'embed_model': EMBED_MODEL_NAME,
        'rerank_model': RERANK_MODEL_NAME,
        'device': device,
    }

@app.post("/embed")
def embed(req: EmbedRequest, x_api_key: Optional[str] = Header(default=None, alias="X-API-Key")):
    _auth(x_api_key)
    vectors = embed_model.encode(req.texts, normalize_embeddings=True).tolist()
    return {'embeddings': vectors}

@app.post("/rerank")
def rerank(req: RerankRequest, x_api_key: Optional[str] = Header(default=None, alias="X-API-Key")):
    _auth(x_api_key)
    pairs = [(req.query, doc) for doc in req.documents]
    inputs = rerank_tokenizer(
        pairs,
        padding=True,
        truncation=True,
        return_tensors="pt",
        max_length=512,
    )
    inputs = {k: v.to(device) for k, v in inputs.items()}
    with torch.no_grad():
        scores = rerank_model(**inputs).logits.view(-1)
    return {'scores': scores.detach().cpu().tolist()}


In [None]:
import threading
import uvicorn

def run():
    uvicorn.run(app, host="0.0.0.0", port=8000)

thread = threading.Thread(target=run, daemon=True)
thread.start()

In [None]:
if not NGROK_AUTH_TOKEN:
    print("Set NGROK_AUTH_TOKEN to enable ngrok public URL.")
else:
    public_url = ngrok.connect(8000).public_url
    print("Public base URL:", public_url)
    print("Embed endpoint:", public_url + "/embed")
    print("Rerank endpoint:", public_url + "/rerank")

## Quick test (curl)

If `BGE_API_KEY` is set, add header `X-API-Key: $BGE_API_KEY`.

```
curl -X GET "$PUBLIC_URL/health"

curl -X POST "$PUBLIC_URL/embed" -H "Content-Type: application/json" \
  -d '{"texts": ["hello world", "bitrix docs"]}'

curl -X POST "$PUBLIC_URL/rerank" -H "Content-Type: application/json" \
  -d '{"query": "bitrix user", "documents": ["CUser class", "CRM lead"]}'
```
