### Embedding Model

In [None]:
from vllm import LLM

documents = [
    "Paris is the capital of France.",
    "Berlin is the capital of Germany.",
    "Madrid is a beautiful Spanish city."
]

model = LLM(model="Qwen/Qwen3-Embedding-0.6B", task="embed")

outputs = model.embed(documents)
for doc, output in zip(documents, outputs):
    embeds = output.outputs.embedding
    embeds_trimmed = (
        (str(embeds[:16])[:-1] + ", ...]") if len(embeds) > 16 else embeds
    )
    print(f"Doc: {doc!r} \nEmbeddings: {embeds_trimmed} (size={len(embeds)})")

### Reranker

In [None]:
from vllm import LLM

query = "What is the capital of France?"
documents = [
    "Paris is the capital of France.",
    "Berlin is the capital of Germany.",
    "Madrid is a beautiful Spanish city."
]
pairs = list(zip([query] * len(documents), documents))

model = LLM(
    model="BAAI/bge-reranker-v2-m3",
    task="score",
    trust_remote_code=True
)

outputs = model.score(query, documents)
results = [(doc, out.outputs.score) for doc, out in zip(documents, outputs)]
results.sort(key=lambda x: x[1], reverse=True)

print("Ranked results:")
for doc, score in results:
    print(f"{score:.4f} — {doc}")

### Language Model

In [None]:
from vllm import LLM, SamplingParams

prompts = [
    "Hello, my name is",
    "The president of the United States is",
    "The capital of France is",
    "The future of AI is",
]
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
llm = LLM(model="google/gemma-3-4b-it", max_model_len=24000)

outputs = llm.generate(prompts, sampling_params)
for output in outputs:
    prompt = output.prompt
    generated_text = output.outputs[0].text
    print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")