In [None]:
!pip install -q "transformers[torch]" scipy


In [1]:
import torch
import numpy as np
from transformers import AutoTokenizer, AutoModel
from scipy.spatial.distance import cdist

# choose GPU if available
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Running on: {DEVICE}")


Running on: cuda


In [2]:
MODEL_NAME = "Qwen/Qwen3-Embedding-0.6B"
from google.colab import userdata
userdata.get('hugg')
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
model = AutoModel.from_pretrained(MODEL_NAME)
model.to(DEVICE)
model.eval()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/727 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.19G [00:00<?, ?B/s]

Qwen3Model(
  (embed_tokens): Embedding(151669, 1024)
  (layers): ModuleList(
    (0-27): 28 x Qwen3DecoderLayer(
      (self_attn): Qwen3Attention(
        (q_proj): Linear(in_features=1024, out_features=2048, bias=False)
        (k_proj): Linear(in_features=1024, out_features=1024, bias=False)
        (v_proj): Linear(in_features=1024, out_features=1024, bias=False)
        (o_proj): Linear(in_features=2048, out_features=1024, bias=False)
        (q_norm): Qwen3RMSNorm((128,), eps=1e-06)
        (k_norm): Qwen3RMSNorm((128,), eps=1e-06)
      )
      (mlp): Qwen3MLP(
        (gate_proj): Linear(in_features=1024, out_features=3072, bias=False)
        (up_proj): Linear(in_features=1024, out_features=3072, bias=False)
        (down_proj): Linear(in_features=3072, out_features=1024, bias=False)
        (act_fn): SiLUActivation()
      )
      (input_layernorm): Qwen3RMSNorm((1024,), eps=1e-06)
      (post_attention_layernorm): Qwen3RMSNorm((1024,), eps=1e-06)
    )
  )
  (norm): Qwen3RM

In [3]:
def encode_texts(text_list, max_len: int = 128):

    # tokenize batch
    batch = tokenizer(
        text_list,
        padding=True,
        truncation=True,
        max_length=max_len,
        return_tensors="pt",
    ).to(DEVICE)

    with torch.no_grad():
        outputs = model(**batch, return_dict=True)

    # last_hidden_state: (batch, seq_len, hidden_size)
    hidden = outputs.last_hidden_state       # [B, L, D]
    mask = batch["attention_mask"].unsqueeze(-1)  # [B, L, 1]

    # masked mean pooling
    hidden = hidden * mask
    summed = hidden.sum(dim=1)                     # [B, D]
    counts = mask.sum(dim=1).clamp(min=1e-9)       # [B, 1]
    embeddings = summed / counts                   # [B, D]

    # move to CPU and normalize
    emb_np = embeddings.cpu().numpy()
    norms = np.linalg.norm(emb_np, axis=1, keepdims=True) + 1e-12
    emb_np = emb_np / norms
    return emb_np


In [4]:
texts = [
    "Qwen3 Embedding is useful for semantic search.",
    "Transformers are powerful models for natural language processing.",
    "Diffusion models can generate high quality images.",
    "We can compare text similarity using vector embeddings."
]

doc_embeddings = encode_texts(texts)
doc_embeddings.shape


(4, 1024)

In [12]:
def search_similar(query, documents, doc_embs, top_k: int = 3):
    query_emb = encode_texts([query])[0:1]  # shape (1, dim)

    # cosine distance via cdist; smaller = more similar
    distances = cdist(query_emb, doc_embs, metric="cosine")[0]
    indices = np.argsort(distances)[:top_k]

    print(f"Query: {query}\n")
    for rank, idx in enumerate(indices, start=1):
        print(f"#{rank} (score={1 - distances[idx]:.4f}) → {documents[idx]}")

# example query
search_similar(
    "How can I compare text meaning using vectors?",
    texts,
    doc_embeddings,
    top_k=3,
)


Query: How can I compare text meaning using vectors?

#1 (score=0.9138) → We can compare text similarity using vector embeddings.
#2 (score=0.7379) → Qwen3 Embedding is useful for semantic search.
#3 (score=0.6829) → Transformers are powerful models for natural language processing.


In [19]:
import pandas as pd

df = pd.read_csv("/content/qwen.csv")
corpus = df["text"].astype(str).tolist()

# compute and cache embeddings once
corpus_embs = encode_texts(corpus)

# test a query
search_similar(
    "Is ronaldo better than messi",
    corpus,
    corpus_embs,
    top_k=5,
)


Query: Is ronaldo better than messi

#1 (score=0.8391) → Ronaldo scores goals grater than messi
#2 (score=0.7789) → Ronaldo Plays for portugual
#3 (score=0.7779) → Leonardo messi plays soccer
#4 (score=0.6203) → Aegentina won FIFA worldcup


In [16]:
import pandas as pd

df = pd.read_csv("/content/qwen.csv")
docs = df["text"].astype(str).tolist()

doc_embs = encode_texts(docs)   # using the encode_texts() helper I gave you earlier
