# BGE endpoints on Colab

This notebook exposes HTTP endpoints for embeddings and reranking:
- `GET /health`
- `POST /embed` (BAAI/bge-m3)
- `POST /rerank` (BAAI/bge-reranker-v2-m3)

Request formats:
- `POST /embed` `{'texts': ['text1', 'text2']}`
- `POST /rerank` `{'query': 'q', 'documents': ['d1', 'd2']}`

Public URL is created via ngrok. Set `NGROK_AUTH_TOKEN` in the environment.

Optional protection: set `BGE_API_KEY` and pass header `X-API-Key`.


In [None]:
# --- 1. Install Dependencies ---
!pip -q install fastapi "uvicorn[standard]" sentence-transformers transformers pyngrok --upgrade

In [None]:
# --- 2. Setup Ngrok Auth ---
import os
from pyngrok import ngrok
from google.colab import userdata

# Try to get token from Colab secrets
try:
    NGROK_AUTH_TOKEN = userdata.get("NGROK_AUTH_TOKEN")
except Exception:
    NGROK_AUTH_TOKEN = os.environ.get("NGROK_AUTH_TOKEN", "")

if NGROK_AUTH_TOKEN:
    ngrok.set_auth_token(NGROK_AUTH_TOKEN)
    print("Ngrok token set successfully.")
else:
    print("[Warning] NGROK_AUTH_TOKEN not found. Public URL will not be created.")

In [None]:
# --- 3. Load ML Models ---
import torch
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForSequenceClassification

EMBED_MODEL_NAME = "BAAI/bge-m3"
RERANK_MODEL_NAME = "BAAI/bge-reranker-v2-m3"

device = "cuda" if torch.cuda.is_available() else "cpu"

embed_model = SentenceTransformer(EMBED_MODEL_NAME, device=device)
rerank_tokenizer = AutoTokenizer.from_pretrained(RERANK_MODEL_NAME)
rerank_model = AutoModelForSequenceClassification.from_pretrained(RERANK_MODEL_NAME)
rerank_model.eval()
rerank_model.to(device)

In [None]:
# --- 4. Define FastAPI App ---
import os
import torch
from fastapi import FastAPI, Header, HTTPException
from pydantic import BaseModel
from typing import List, Optional
from google.colab import userdata

app = FastAPI(title="bge-endpoints")

# Try to get API key from secrets
try:
    API_KEY = userdata.get("BGE_API_KEY")
except Exception:
    API_KEY = os.environ.get("BGE_API_KEY", "")

def _auth(x_api_key: Optional[str]) -> None:
    if API_KEY and x_api_key != API_KEY:
        raise HTTPException(status_code=401, detail="invalid api key")

class EmbedRequest(BaseModel):
    texts: List[str]

class RerankRequest(BaseModel):
    query: str
    documents: List[str]

@app.get("/health")
def health(x_api_key: Optional[str] = Header(default=None, alias="X-API-Key")):
    _auth(x_api_key)
    return {
        'status': 'ok',
        'embed_model': EMBED_MODEL_NAME,
        'rerank_model': RERANK_MODEL_NAME,
        'device': device,
    }

@app.post("/embed")
def embed(req: EmbedRequest, x_api_key: Optional[str] = Header(default=None, alias="X-API-Key")):
    _auth(x_api_key)
    vectors = embed_model.encode(req.texts, normalize_embeddings=True).tolist()
    return {'embeddings': vectors}

@app.post("/rerank")
def rerank(req: RerankRequest, x_api_key: Optional[str] = Header(default=None, alias="X-API-Key")):
    _auth(x_api_key)
    pairs = [(req.query, doc) for doc in req.documents]
    inputs = rerank_tokenizer(
        pairs,
        padding=True,
        truncation=True,
        return_tensors="pt",
        max_length=512,
    )
    inputs = {k: v.to(device) for k, v in inputs.items()}
    with torch.no_grad():
        scores = rerank_model(**inputs).logits.view(-1)
    return {'scores': scores.detach().cpu().tolist()}

In [None]:
# --- 5. Start Server Background Thread ---
import threading
import uvicorn

def run():
    uvicorn.run(app, host="0.0.0.0", port=8000)

thread = threading.Thread(target=run, daemon=True)
thread.start()

In [None]:
if not NGROK_AUTH_TOKEN:
    print("Set NGROK_AUTH_TOKEN to enable ngrok public URL.")
else:
    public_url = ngrok.connect(8000).public_url
    print("Public base URL:", public_url)
    print("Embed endpoint:", public_url + "/embed")
    print("Rerank endpoint:", public_url + "/rerank")

## Quick test (curl)

If `BGE_API_KEY` is set, add header `X-API-Key: $BGE_API_KEY`.

```
curl -X GET "$PUBLIC_URL/health"

curl -X POST "$PUBLIC_URL/embed" -H "Content-Type: application/json" \
  -d '{"texts": ["hello world", "bitrix docs"]}'

curl -X POST "$PUBLIC_URL/rerank" -H "Content-Type: application/json" \
  -d '{"query": "bitrix user", "documents": ["CUser class", "CRM lead"]}'
```


In [None]:
# --- 7. Info & Configuration Summary ---
print("="*50)
print("API INFO & CONFIGURATION")
print("="*50)

if 'public_url' in globals() and public_url:
    print(f"\nBase URL:\n   {public_url}")

    print("\nEndpoints:")
    print(f"   * [GET]  Health: {public_url}/health")
    print(f"   * [POST] Embed:  {public_url}/embed")
    print(f"   * [POST] Rerank: {public_url}/rerank")

    print("\nAuthentication:")
    if 'API_KEY' in globals() and API_KEY:
        print(f"   Header: X-API-Key: {API_KEY}")
    else:
        print("   [!] No API Key configured! The endpoint is public.")

    print("\nUsage Example (Python requests):")
    print("-"*30)
    example_key = API_KEY if ('API_KEY' in globals() and API_KEY) else 'YOUR_KEY'
    print(f"import requests\n")
    print(f"url = '{public_url}/embed'")
    print(f"headers = {{'X-API-Key': '{example_key}'}}")
    print(f"payload = {{'texts': ['Hello world']}}")
    print(f"response = requests.post(url, json=payload, headers=headers)")
    print(f"print(response.json())")
    print("-"*30)

    print("\nUsage Example (cURL):")
    print("-"*30)
    print(f"curl -X POST {public_url}/embed \\")
    print(f"  -H 'X-API-Key: {example_key}' \\")
    print(f"  -H 'Content-Type: application/json' \\")
    print(f"  -d '{{\"texts\": [\"Hello world\"]}}'")
    print("-"*30)
else:
    print("[!] Public URL is not generated yet. Please run the ngrok cell above.")

In [None]:
# --- 6. Test API Endpoints ---
import requests
import json

# Check if URL is defined
if 'public_url' not in globals():
    # Try to fetch tunnel if not set
    tunnels = ngrok.get_tunnels()
    if tunnels:
        public_url = tunnels[0].public_url
    else:
        print("[!] Please run ngrok cell first.")
        public_url = None

if public_url:
    print(f"Testing API at: {public_url}")

    # Setup headers
    headers = {"Content-Type": "application/json"}
    if 'API_KEY' in globals() and API_KEY:
        headers["X-API-Key"] = API_KEY
        print(f"[Key] Using API Key: {API_KEY[:3]}***")

    # 1. Health Check
    print("\n--- 1. Health Check ---")
    try:
        resp = requests.get(f"{public_url}/health", headers=headers)
        print(f"Status: {resp.status_code}")
        print("Response:", resp.json())
    except Exception as e:
        print(f"Failed: {e}")

    # 2. Embed Test
    print("\n--- 2. Embed Test ---")
    try:
        payload = {"texts": ["hello world", "testing api"]}
        resp = requests.post(f"{public_url}/embed", json=payload, headers=headers)
        if resp.status_code == 200:
            data = resp.json()
            vec_len = len(data['embeddings'][0])
            print(f"[OK] Success! Generated {len(data['embeddings'])} vectors. Dimension: {vec_len}")
        else:
            print("[ERR] Error:", resp.text)
    except Exception as e:
        print(f"Failed: {e}")

    # 3. Rerank Test
    print("\n--- 3. Rerank Test ---")
    try:
        payload = {"query": "what is colab?", "documents": ["Colab is a jupyter notebook service", "Apples are red"]}
        resp = requests.post(f"{public_url}/rerank", json=payload, headers=headers)
        if resp.status_code == 200:
            print("[OK] Success! Scores:", resp.json()['scores'])
        else:
            print("[ERR] Error:", resp.text)
    except Exception as e:
        print(f"Failed: {e}")