# Purpose of this notebook
- to check if any code change is needed to use qwen 2.5 models

In [1]:
import os
from pathlib import Path
from evaluation.prompted_sampling.evaluate import conditional_perplexity

from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import pandas as pd



# Load model
- when loading 14B model, need to load as float16
    - when float16, 30.7gb
    - when float32, 61.4gb

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [8]:
model_name = "Qwen/Qwen2.5-14B"

In [3]:
eval_model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype = torch.float16)
eval_model = eval_model.to(device)

Downloading shards:   0%|          | 0/8 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [9]:
eval_tokenizer = AutoTokenizer.from_pretrained(model_name)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [4]:
print(torch.cuda.get_device_name(0))
print('Memory Usage:')
f = torch.cuda.get_device_properties(0).total_memory
a = torch.cuda.memory_allocated(0)
r = torch.cuda.memory_reserved(0)
print('Free: ', round((f-a)/1024**3,1), 'GB')
print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')

NVIDIA RTX A6000
Memory Usage:
Free:  16.9 GB
Allocated: 30.7 GB
Cached:    30.7 GB


# Run conditional perplexity code

In [5]:
generations_df = pd.read_json('/data/hyeryung/mucoco/new_module/llm_experiments/generate_with_llm/baselm_gens/gpt-3.5-turbo-0125/nontoxic/gpt-3.5-turbo-0125_realtoxicityprompts_0shot_150.jsonl',lines=True)

In [17]:
output_file = '/data/hyeryung/mucoco/new_module/llm_experiments/generate_with_llm/baselm_gens/gpt-3.5-turbo-0125/nontoxic/gpt-3.5-turbo-0125_realtoxicityprompts_noprompt_150.jsonl'
output_dir = Path(os.path.dirname(output_file))

In [20]:

torch.cuda.empty_cache()
with torch.no_grad():
    ppl, total_ppl = conditional_perplexity(generations_df, eval_model, eval_tokenizer, device=device, write_file=output_dir / (output_file+".ppl-big"))


Evaluating PPL:   0%|                                                    | 0/250 [00:00<?, ?it/s]We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)
Evaluating PPL: 100%|██████████████████████████████████████████| 250/250 [02:44<00:00,  1.52it/s]

5.687756477163672 2500 2500 2500





In [21]:
ppl, total_ppl

(5.687756477163672, 4.99912035925632)

# check tokenization
- no special bos token; eos token exists but not added to a single sentence
- padding token equals eos token; 

In [16]:
# special tokens
eval_tokenizer.special_tokens_map

{'eos_token': '<|endoftext|>',
 'pad_token': '<|endoftext|>',
 'additional_special_tokens': ['<|im_start|>',
  '<|im_end|>',
  '<|object_ref_start|>',
  '<|object_ref_end|>',
  '<|box_start|>',
  '<|box_end|>',
  '<|quad_start|>',
  '<|quad_end|>',
  '<|vision_start|>',
  '<|vision_end|>',
  '<|vision_pad|>',
  '<|image_pad|>',
  '<|video_pad|>']}

In [13]:
# check if any special token added at the beginning & end 
eval_tokenizer(generations_df['generations'][0][0]['text'], add_special_tokens=True)

{'input_ids': [49680, 13, 1084, 594, 537, 429, 358, 47869, 26830, 26, 304, 2097, 11, 358, 1477, 1105, 5008, 80289, 9898, 13, 1988, 369, 1045, 2874, 11, 279, 4522, 315, 1660, 389, 264, 15223, 594, 1182, 3643, 752, 2666, 82272, 13, 18765, 432, 594, 279, 8679, 315, 15679, 476, 1101, 537, 8266, 304, 2524, 13, 32639, 279, 2874, 11, 15223, 1419, 19837, 374, 537, 458, 5702, 358, 4669, 13], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

In [14]:
# no special token added
eval_tokenizer.decode(eval_tokenizer(generations_df['generations'][0][0]['text'])['input_ids'], skip_special_tokens=False)

"riding. It's not that I dislike horses; in fact, I find them quite majestic animals. But for some reason, the idea of being on a horse's back makes me feel uneasy. Perhaps it's the fear of falling or just not feeling in control. Whatever the reason, horseback riding is not an activity I enjoy."

In [19]:
outputs = eval_tokenizer([gen['text'] for gen in generations_df['generations'][0]], add_special_tokens=True, padding=True, truncation=True,return_tensors="pt")

In [23]:
# padding occurs -- token id 151643
print(outputs['input_ids']), print(eval_tokenizer.decode([151643]))

tensor([[ 49680,     13,   1084,    594,    537,    429,    358,  47869,  26830,
             26,    304,   2097,     11,    358,   1477,   1105,   5008,  80289,
           9898,     13,   1988,    369,   1045,   2874,     11,    279,   4522,
            315,   1660,    389,    264,  15223,    594,   1182,   3643,    752,
           2666,  82272,     13,  18765,    432,    594,    279,   8679,    315,
          15679,    476,   1101,    537,   8266,    304,   2524,     13,  32639,
            279,   2874,     11,  15223,   1419,  19837,    374,    537,    458,
           5702,    358,   4669,     13, 151643, 151643, 151643, 151643, 151643,
         151643],
        [ 49680,     13,    576,   3381,    315,  11699,    389,    264,   3460,
           9864,    646,    387,  63030,    369,   1045,   1251,     11,    714,
           1052,    525,   3800,    879,   1477,    432,  90726,   1095,     13,
          13139,    432,    594,    279,  53804,    315,  15369,    385,   9989,
          

(None, None)