# OpenCLIP Embedding Service (Colab)

Run a minimal FastAPI service to serve text embeddings on GPU.


In [1]:
%pip install -q uv pyngrok
!uv pip install -q fastapi uvicorn open-clip-torch torch

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.6/19.6 MB[0m [31m58.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import torch
from fastapi import FastAPI
from pydantic import BaseModel
import uvicorn
import open_clip

MODEL_ID = 'hf-hub:openai/clip-vit-base-patch32'
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

model, _, preprocess = open_clip.create_model_and_transforms(MODEL_ID, device=DEVICE)
model.eval()
model = model.to(torch.float16)
tokenizer = open_clip.get_tokenizer(MODEL_ID)

def l2_normalize(t: torch.Tensor) -> torch.Tensor:
    return torch.nn.functional.normalize(t, p=2, dim=-1)

class EmbedRequest(BaseModel):
    texts: list[str]
    normalize: bool = True

class EmbedResponse(BaseModel):
    embeddings: list[list[float]]

app = FastAPI()

@app.get("/")
def root():
    return {"status": "ok"}

@app.post('/embed', response_model=EmbedResponse)
def embed(req: EmbedRequest):
    tokens = tokenizer(req.texts).to(DEVICE)
    with torch.inference_mode():
        feats = model.encode_text(tokens)
        if req.normalize:
            feats = l2_normalize(feats)
        feats = feats.detach().float().cpu().numpy()
    return EmbedResponse(embeddings=feats.tolist())



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


open_clip_config.json:   0%|          | 0.00/600 [00:00<?, ?B/s]

open_clip_model.safetensors:   0%|          | 0.00/1.79G [00:00<?, ?B/s]

In [4]:
import getpass
from pyngrok import ngrok, conf
import threading

print("Enter your authtoken, which can be copied from https://dashboard.ngrok.com/get-started/your-authtoken")
conf.get_default().auth_token = getpass.getpass()

# Start uvicorn in background
config = uvicorn.Config(app, host='0.0.0.0', port=8000, log_level='info')
server = uvicorn.Server(config)
thread = threading.Thread(target=server.run, daemon=True)
thread.start()

# Expose via ngrok
tunnel = ngrok.connect(8000, "http")
print("Public URL:", tunnel.public_url)
print("POST", tunnel.public_url + "/embed")


Enter your authtoken, which can be copied from https://dashboard.ngrok.com/get-started/your-authtoken
··········


INFO:     Started server process [895]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
ERROR:    [Errno 98] error while attempting to bind on address ('0.0.0.0', 8000): [errno 98] address already in use
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.


Public URL: https://cbc4ae08de71.ngrok-free.app
POST https://cbc4ae08de71.ngrok-free.app/embed
