In [1]:
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline,
    logging,
)



In [2]:
# model_name = "/data/opt-350m"
model_name = "/data/llama-hf/Llama-2-7b-hf"
# model_name = "/data/Mistral-7B-Instruct-v0.2"
# model_name = "mistralai/Mistral-7B-v0.1"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=False,
)
device_map = {"": 0}
# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)
model.eval()

model.config.use_cache = False
model.config.pretraining_tp = 1

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
if "Llama-2" in model_name or "Mistral" in model_name:
    tokenizer.pad_token = tokenizer.bos_token

bos = tokenizer.bos_token_id
eos = tokenizer.eos_token_id
pad = tokenizer.pad_token_id
print("pre-trained model's BOS EOS and PAD token id:",bos,eos,pad," => It should be 1 2 None")

##### tokenizer.pad_token_id = 0  # unk. we want this to be different from the eos token
tokenizer.padding_side = "right"



Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /opt/conda/envs/mytorch/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda118.so
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/targets/x86_64-linux/lib/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 118
CUDA SETUP: Loading binary /opt/conda/envs/mytorch/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda118.so...


  warn(msg)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

pre-trained model's BOS EOS and PAD token id: 1 2 1  => It should be 1 2 None


In [51]:
dataset_name = "super_glue"  # The MMLU dataset name in Hugging Face Datasets
dataset = load_dataset(dataset_name, 'boolq')#["train"]
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['question', 'passage', 'idx', 'label'],
        num_rows: 9427
    })
    validation: Dataset({
        features: ['question', 'passage', 'idx', 'label'],
        num_rows: 3270
    })
    test: Dataset({
        features: ['question', 'passage', 'idx', 'label'],
        num_rows: 3245
    })
})


In [59]:
# for split in dataset:
#     print(split)
labels = dataset["train"].features["label"]
def create_label_str(batch):
    return {"label_str": labels.int2str(batch["label"])}
print(labels)

d1 = dataset["train"].map(create_label_str)   
print(d1)

# idx = 0
# # print(dataset[idx]['sentence1'])
# # print(dataset[idx]['sentence2'])
# # print(dataset[idx]['label'])
# print(dataset['question'][:3])
# print(create_label_str(dataset[:10]))

ClassLabel(names=['False', 'True'], id=None)
Dataset({
    features: ['question', 'passage', 'idx', 'label', 'label_str'],
    num_rows: 9427
})


In [74]:

prompt = ""
for idx in range(6):
    # prompt = "Are the following two sentences consistent? Yes or No?\n\n"
    prompt += "Passage: " + d1[idx]['passage'] + '\n'
    prompt += "Question: " + d1[idx]['question'] + '\n'
    # prompt += "Based on the passage, " + dataset[idx]['question'] + "? Yes or No?" + '\n'
    if idx == 5:
        prompt += "Answer:"
    else:
        prompt += "Answer:" + d1[idx]['label_str'] + '\n\n'


# # print(prompt)
# pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length= 200)
# result = pipe(f"<s>[INST] {prompt} [/INST]")
# print(result[0]['generated_text'])

input_ids = tokenizer(prompt, return_tensors='pt').input_ids
input_ids = input_ids.to(device=0)

with torch.no_grad():
    output = model(input_ids)
    answers = output.logits.squeeze()



In [78]:
print(tokenizer.decode(torch.argmax(answers[-1])))
print(d1[5]['label_str'])
# print(input_ids.size(),answers.size())

False
False


In [8]:
s = tokenizer('False', add_special_tokens=False)
print(tokenizer.convert_ids_to_tokens(s.input_ids))

['▁False']
