In [None]:
import torch
from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM

In [None]:
def format_instruction(instruction, query, doc):
    if instruction is None:
        instruction = 'Given a question, retrieve relevant passages that answer the question'
    output = "<Instruct>: {instruction}\n<Query>: {query}\n<Document>: {doc}".format(instruction=instruction,query=query, doc=doc)
    return output

In [None]:
def process_inputs(pairs):
    inputs = tokenizer(
        pairs, padding=False, truncation='longest_first',
        return_attention_mask=False, max_length=max_length - len(prefix_tokens) - len(suffix_tokens)
    )
    for i, ele in enumerate(inputs['input_ids']):
        inputs['input_ids'][i] = prefix_tokens + ele + suffix_tokens
    inputs = tokenizer.pad(inputs, padding=True, return_tensors="pt", max_length=max_length)
    for key in inputs:
        inputs[key] = inputs[key].to(model.device)
    return inputs

In [None]:
@torch.no_grad()
def compute_logits(inputs, **kwargs):
    batch_scores = model(**inputs).logits[:, -1, :]
    true_vector = batch_scores[:, token_true_id]
    false_vector = batch_scores[:, token_false_id]
    batch_scores = torch.stack([false_vector, true_vector], dim=1)
    batch_scores = torch.nn.functional.log_softmax(batch_scores, dim=1)
    scores = batch_scores[:, 1].exp().tolist()
    return scores

In [None]:
#model_name = "Qwen/Qwen3-Reranker-4B"
model_name = "Qwen/Qwen3-Reranker-0.6B"
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side='left')
model = model = AutoModelForCausalLM.from_pretrained(model_name, dtype=torch.float16, attn_implementation="flash_attention_2").cuda().eval()

In [None]:
from datasets import load_dataset
train_dataset = load_dataset("sentence-transformers/all-nli", "pair-score", split="train")

In [None]:
df = train_dataset.to_pandas()

In [None]:
import pandas as pd
pd.set_option('display.max_colwidth', None)

In [None]:
df.head(20).style.background_gradient(cmap='coolwarm')

In [None]:
df["sentence1"].nunique()

In [None]:
df["sentence2"].nunique()

In [None]:
token_false_id = tokenizer.convert_tokens_to_ids("no")
token_true_id = tokenizer.convert_tokens_to_ids("yes")

In [None]:
max_length = 8192

prefix = "<|im_start|>system\nJudge whether the documents are similar . Note that the answer can only be \"yes\" or \"no\".<|im_end|>\n<|im_start|>user\n"
suffix = "<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n"
prefix_tokens = tokenizer.encode(prefix, add_special_tokens=False)
suffix_tokens = tokenizer.encode(suffix, add_special_tokens=False)

In [None]:
dft = pd.concat([df.head(20), df.sample(20, random_state=42)])

In [None]:
pairs = [format_instruction("Given two sentences, calculate their similarity", query, doc) 
           for query, doc in zip(dft["sentence1"], dft["sentence2"])]

In [None]:
%%time
# Tokenize the input texts
max_length = 8192
inputs = process_inputs(pairs)
scores = compute_logits(inputs)

In [None]:
dft["scores"] = scores

In [None]:
dft.style.background_gradient(cmap='coolwarm')