In [1]:
# Imports and constants

import datasets # type: ignore
import torch
import sys
sys.path.append('..')  # Add the parent directory to the Python path
import utils.llm_utils as llm
import time

n_rows = 150

normal_word_limit = 120
normal_max_ch_soft = normal_word_limit * 6
normal_max_tokens_hard = int(normal_max_ch_soft * 1.2 / 4)

n_chunk_items = 15

model_name = '/home/zel/ml-projects/DPO-examples/omarmnfy-finetune-llama3-dpo/ZelDPO-Phi-3'
base_model_name = 'microsoft/Phi-3-mini-4k-instruct'

In [2]:
# Load the dataset.

dataset = datasets.load_dataset('ZSvedic/phi3-arena-short-dpo', split='test')
dataset = dataset.select(range(0, n_rows)).select_columns(['question-id', 'prompt'])
n_rows

150

In [3]:
def append_model_answers(model_name:str)->datasets.Dataset:
    ''' Append the model answers to the dataset. '''
    # Get the model short name by splitting the model name on '/' and getting the last element.
    model_short_name = model_name.split('/')[-1]
    tokenizer, model = llm.load_tokenizer_and_model(model_name)
    model.eval()
    print(f'Allocated GPU memory: {torch.cuda.memory_allocated() / (1024*1024):,.1f} MB')
    start_time = time.time()
    chunks = []
    for i, chunk in enumerate(llm.fixed_chunker(dataset, n_chunk_items)):
        chunk = llm.chunk_call_llm(chunk, tokenizer, model, normal_max_tokens_hard)
        chunks.append(chunk)
        chunk_len = len(chunk)
        elapsed_time = time.time() - start_time
        time_per_item = elapsed_time / chunk_len
        print(f'Processed {chunk_len} of {n_rows} in {elapsed_time:.2f}s ({time_per_item:.2f}s/item)')
    # Clean GPU memory.
    del tokenizer, model
    torch.cuda.empty_cache()
    return datasets.concatenate_datasets(chunks).rename_column('answer', f'answer-{model_short_name}') 

In [4]:
# Process models and save the dataset to file.

dataset = append_model_answers(model_name)
dataset = append_model_answers(base_model_name)

# benchmark_cli expects the question column to be named 'prompt'.
dataset = dataset.rename_column('prompt', 'question')

with open('phi3-dpo-vs-base.jsonl', 'wb') as f:
    dataset.to_json(f, lines=True)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.


Allocated GPU memory: 7,288.4 MB


Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Processed 15 of 150 in 8.51s (0.57s/item)


Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Processed 15 of 150 in 46.40s (3.09s/item)


Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Processed 15 of 150 in 55.81s (3.72s/item)


Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Processed 15 of 150 in 68.78s (4.59s/item)


Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Processed 15 of 150 in 90.99s (6.07s/item)


Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Processed 15 of 150 in 102.17s (6.81s/item)


Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Processed 15 of 150 in 115.02s (7.67s/item)


Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Processed 15 of 150 in 127.03s (8.47s/item)


Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Processed 15 of 150 in 136.02s (9.07s/item)


Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Processed 15 of 150 in 145.71s (9.71s/item)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Allocated GPU memory: 7,296.6 MB


Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Processed 15 of 150 in 8.35s (0.56s/item)


Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Processed 15 of 150 in 47.98s (3.20s/item)


Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Processed 15 of 150 in 57.63s (3.84s/item)


Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Processed 15 of 150 in 71.33s (4.76s/item)


Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Processed 15 of 150 in 95.73s (6.38s/item)


Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Processed 15 of 150 in 107.43s (7.16s/item)


Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Processed 15 of 150 in 120.91s (8.06s/item)


Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Processed 15 of 150 in 133.36s (8.89s/item)


Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Processed 15 of 150 in 142.54s (9.50s/item)


Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Processed 15 of 150 in 152.60s (10.17s/item)


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]