In [None]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from torch.cuda.amp import autocast
from peft import PeftModel, PeftConfig
from tqdm.std import tqdm
import json
import pandas as pd
import os

In [None]:
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [None]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using GPU:", torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print("Using CPU")

In [None]:
# device = torch.device("cpu")

In [None]:
tokenizer_name = "meta-llama/Llama-2-7b-hf"
model_name = "castorini/rankllama-v1-7b-lora-passage"
data_path = "../passage_ranking_input_true_data/passage_ranking_query.tsv"
output_path = "../passage_output_result/rankllama_result.tsv"
batch_size = 20 

## loading dataset

In [None]:
dataset = pd.read_csv(data_path,sep='\t')

In [None]:
dataset

## loading model and tokenizer

In [None]:
def get_model(peft_model_name):
    config = PeftConfig.from_pretrained(peft_model_name)
    base_model = AutoModelForSequenceClassification.from_pretrained(
        config.base_model_name_or_path, 
        device_map={'': 'cuda:0'}, 
        # device_map="auto",
        torch_dtype=torch.bfloat16, 
        num_labels=1
    )
    model = PeftModel.from_pretrained(base_model, peft_model_name)
    model = model.merge_and_unload()
    model.eval()
    model.to(device) 
    return model

In [None]:
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})

if tokenizer.unk_token is None:
    tokenizer.add_special_tokens({'unk_token': '[UNK]'})

In [None]:
model = get_model(model_name)

In [None]:
model.resize_token_embeddings(len(tokenizer))

In [None]:
model.config.pad_token_id = tokenizer.pad_token_id

In [None]:
def get_scores(batch_queries, batch_passages):
    inputs = tokenizer(
        batch_queries,
        batch_passages,
        padding=True,
        truncation=True,
        max_length=512,
        return_tensors='pt'
    )

    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    with torch.no_grad():
        with autocast():
            outputs = model(**inputs)
            logits = outputs.logits
            scores = logits.squeeze(-1).cpu().tolist() 

    del inputs, outputs, logits
    torch.cuda.empty_cache()

    return scores

## ranking

In [None]:
chunk_size = 100
scores = []
docids = []

ranked_docids = []
ranked_scores = []

batch_queries = []
batch_passages = []
batch_docids = []

for i in tqdm(range(len(dataset))):
    query = f'query: {dataset.at[i, "query"]}'
    passage = f'document: {dataset.at[i, "passage"]}'
    docid = dataset.at[i, "pid"]

    batch_queries.append(query)
    batch_passages.append(passage)
    batch_docids.append(docid)

    if len(batch_queries) == batch_size or i == len(dataset) - 1:

        batch_scores = get_scores(batch_queries, batch_passages)
        scores.extend(batch_scores)
        docids.extend(batch_docids)

        batch_queries = []
        batch_passages = []
        batch_docids = []

    if len(scores) == chunk_size:
        
        sorted_scores_docids = sorted(zip(scores, docids), reverse=True, key=lambda x: x[0])
        sorted_scores = [score for score, docid in sorted_scores_docids]
        sorted_docids = [docid for score, docid in sorted_scores_docids]

        ranked_docids.extend(sorted_docids)
        ranked_scores.extend(sorted_scores)
        scores = []
        docids = []


In [None]:
dataset["ranked_pid"] = ranked_docids
dataset["scores"] = ranked_scores

In [None]:
dataset = dataset[["qid","pid","ranked_pid","scores"]]

In [None]:
dataset.to_csv(output_path,sep="\t",index=False)