In [7]:
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline,
    logging,
)



In [11]:
# model_name = "/data/opt-350m"
model_name = "meta-llama/Llama-2-7b-hf"
# model_name = "/data/Mistral-7B-Instruct-v0.2"
# model_name = "mistralai/Mistral-7B-v0.1"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=False,
)
device_map = {"": 0}
# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)
model.eval()

model.config.use_cache = False
model.config.pretraining_tp = 1

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
if "Llama-2" in model_name or "Mistral" in model_name:
    tokenizer.pad_token = tokenizer.bos_token

bos = tokenizer.bos_token_id
eos = tokenizer.eos_token_id
pad = tokenizer.pad_token_id
print("pre-trained model's BOS EOS and PAD token id:",bos,eos,pad," => It should be 1 2 None")

##### tokenizer.pad_token_id = 0  # unk. we want this to be different from the eos token
tokenizer.padding_side = "right"



Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /home/hshapour/.pyenv/versions/3.10.12/envs/pytorch/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda115.so
CUDA SETUP: CUDA runtime path found: /usr/lib/x86_64-linux-gnu/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.0
CUDA SETUP: Detected CUDA version 115
CUDA SETUP: Loading binary /home/hshapour/.pyenv/versions/3.10.12/envs/pytorch/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda115.so...


Loading checkpoint shards: 100%|██████████| 2/2 [00:05<00:00,  2.98s/it]


pre-trained model's BOS EOS and PAD token id: 1 2 1  => It should be 1 2 None


In [None]:
from datasets import load_dataset, concatenate_datasets
import json
import torch
from datasets.utils.logging import disable_progress_bar,enable_progress_bar

dataset_name = "super_glue"  # The MMLU dataset name in Hugging Face Datasets
dataset = load_dataset(dataset_name, name='multirc')#["train"]
# print(dataset)

labels = dataset["train"].features["label"]
# def create_label_str(batch):
#     return {"label_str": labels.int2str(batch["label"])}
def create_label_str(example):
    return {"label_str": "True" if example["label"]== 1 else "False"}
# print(labels)


In [99]:
# data_train = dataset["train"].map(create_label_str)   
# data_test = dataset["validation"].map(create_label_str)   
# # print(d1)
# # dd = d1.select(range(5)).shuffle()

d1 = dataset["train"].map(create_label_str)   
d2 = dataset["validation"].map(create_label_str)   

d_all = concatenate_datasets([d1,d2])
train_size = 1000
data_train = d_all.select(range(train_size))
data_test = d_all.select(range(train_size,d_all.num_rows))

print(data_train.num_rows, data_test.num_rows)

1000 31091


In [100]:
# prepare examples
example_prompt = []
for ex in range(6):
    dd = data_train.filter(lambda example: example['idx']['question']==ex)
    prompt = "Paragraph: " + dd[0]['paragraph'] + '\n'
    prompt += "Question: " + dd[0]['question'] + '\n'
    # prompt += "Candidate answers:\n"
    # prompt += "Statement: "
    prompt += "Determine whether the following items are True or False. \n"
    for idx in range(dd.num_rows):
    # for idx in [0]:
        prompt += dd[idx]['answer'] + '\n'
    prompt += "Answer:"
    if ex< 4:
        for idx in range(dd.num_rows-1):
        # for idx in [0]:
            prompt += dd[idx]['label_str'] # + ','
        prompt += dd[idx]['label_str'] #+ '\n\n'
    example_prompt.append(prompt)

print(example_prompt[4])


Filter: 100%|██████████| 1000/1000 [00:00<00:00, 51921.27 examples/s]


Filter: 100%|██████████| 1000/1000 [00:00<00:00, 55989.75 examples/s]
Filter: 100%|██████████| 1000/1000 [00:00<00:00, 61053.35 examples/s]
Filter: 100%|██████████| 1000/1000 [00:00<00:00, 57671.10 examples/s]
Filter: 100%|██████████| 1000/1000 [00:00<00:00, 60610.45 examples/s]
Filter: 100%|██████████| 1000/1000 [00:00<00:00, 60266.45 examples/s]

Paragraph: While this process moved along, diplomacy continued its rounds. Direct pressure on the Taliban had proved unsuccessful. As one NSC staff note put it, "Under the Taliban, Afghanistan is not so much a state sponsor of terrorism as it is a state sponsored by terrorists." In early 2000, the United States began a high-level effort to persuade Pakistan to use its influence over the Taliban. In January 2000, Assistant Secretary of State Karl Inderfurth and the State Department's counterterrorism coordinator, Michael Sheehan, met with General Musharraf in Islamabad, dangling before him the possibility of a presidential visit in March as a reward for Pakistani cooperation. Such a visit was coveted by Musharraf, partly as a sign of his government's legitimacy. He told the two envoys that he would meet with Mullah Omar and press him on  Bin Laden. They left, however, reporting to Washington that Pakistan was unlikely in fact to do anything," given what it sees as the benefits of Taliba




In [118]:
torch.randperm(4)

tensor([3, 0, 1, 2])

In [113]:
# prepare examples
example_prompt = []
ex_inds = torch.randint(0, 80,(6,))
print(ex_inds)
for i, ex in enumerate(ex_inds):
    dd = data_train.filter(lambda example: example['idx']['question']==ex)
    idx = 0
    prompt = "Paragraph: " + dd[idx]['paragraph'] + '\n'
    for idx in [0]:
        prompt += "Question: " + dd[idx]['question'] + '\n'
        prompt += "Response: " + dd[idx]['answer'] + '\n'
        if i< 4:
            prompt += "Answer:" + dd[idx]['label_str'] + '\n'
        # prompt += dd[idx]['label_str'] #+ '\n\n'
    example_prompt.append(prompt)

print(example_prompt[1])


tensor([43, 79, 26, 71, 62, 10])


Filter: 100%|██████████| 1000/1000 [00:00<00:00, 40736.43 examples/s]
Filter: 100%|██████████| 1000/1000 [00:00<00:00, 45193.83 examples/s]
Filter: 100%|██████████| 1000/1000 [00:00<00:00, 45636.89 examples/s]
Filter: 100%|██████████| 1000/1000 [00:00<00:00, 45428.30 examples/s]

Paragraph: (CNN) -- Declaring 2010 "The best year in safety performance in our company's history," Transocean Ltd., owner of the Gulf of Mexico oil rig that exploded, killing 11 workers, has awarded its top executives hefty bonuses and raises, according to a recent filing with the U.S. Securities and Exchange Commission. That includes a $200,000 salary increase for Transocean president and chief executive officer Steven L. Newman, whose base salary will increase from $900,000 to $1.1 million, according to the SEC report. Newman's bonus was $374,062, the report states. Newman also has a $5.4 million long-term compensation package the company awarded him upon his appointment as CEO in March 2010, according to the SEC filing. The latest cash awards are based in part on the company's "performance under safety," the Transocean filing states. "Notwithstanding the tragic loss of life in the Gulf of Mexico, we achieved an exemplary statistical safety record as measured by our total recordable 




In [114]:
query = ''
for i in range(5):
    query += example_prompt[i] + '\n\n'
query += example_prompt[5]

n1 = len(tokenizer(query).input_ids)
print(n1)

pipe = pipeline(task="text-generation", model=model,\
                tokenizer=tokenizer, do_sample=False,\
                max_length= n1+10)
# result = pipe(f"<s>[INST]{query}[/INST]")
result = pipe(query)
print(result[0]['generated_text'])


2586




Paragraph: Alexander II's death caused a great setback for the reform movement. One of his last ideas was to draft plans for an elected parliament, or Duma, which were completed the day before he died but not yet released to the Russian people. In a matter of 48 hours, Alexander II planned to release his plan for the duma to the Russian people. Had he lived, Russia might have followed a path to constitutional monarchy instead of the long road of oppression that defined his successor's reign. The first action Alexander III took after his father's death was to tear up those plans. A Duma would not come into fruition until 1905, when Alexander II's grandson, Nicholas II, commissioned the Duma following extreme pressure on the monarchy as a result of the Russian Revolution of 1905. The assassination triggered major suppression of civil liberties in Russia, and police brutality burst back in full force after experiencing some restraint under the reign of Alexander II, whose death was witnes

In [67]:
input_ids = tokenizer(query, return_tensors='pt').input_ids
input_ids = input_ids.to(device=0)

input_ids.size()


torch.Size([1, 3157])

In [75]:
input_ids = torch.concat([input_ids, torch.argmax(answers[-1]).view(1,1)],dim = 1)
with torch.no_grad():
    output = model(input_ids)
    answers = output.logits.squeeze()

print(tokenizer.decode(torch.argmax(answers[-1])))


Par
