In [1]:
from datasets import load_dataset, Dataset
from vllm import LLM, SamplingParams

In [2]:
NUM_GPUS = 4
BATCH_SIZE = 8192

In [4]:
ds = load_dataset('amang1802/wildeweb-sample-salad_5K', split='train')
ds

Dataset({
    features: ['3-category', 'question', 'qid', 'source', '2-category', '1-category', 'metallama', 'metallama_grading', 'metallama_score', 'wildeweb'],
    num_rows: 5000
})

In [5]:
model_id = "meta-llama/Llama-Guard-3-8B"

In [6]:
llm = LLM(model=model_id, max_model_len=512, tensor_parallel_size=NUM_GPUS, gpu_memory_utilization=0.98)

INFO 01-21 02:17:49 config.py:905] Defaulting to use mp for distributed inference
INFO 01-21 02:17:49 llm_engine.py:237] Initializing an LLM engine (v0.6.3.post1) with config: model='meta-llama/Llama-Guard-3-8B', speculative_config=None, tokenizer='meta-llama/Llama-Guard-3-8B', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=512, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=4, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=meta-llama/Llama-Guard-3-8B, nu

Loading safetensors checkpoint shards:   0% Completed | 0/4 [00:00<?, ?it/s]


INFO 01-21 02:17:58 model_runner.py:1067] Loading model weights took 3.7710 GB
[1;36m(VllmWorkerProcess pid=65042)[0;0m INFO 01-21 02:17:58 model_runner.py:1067] Loading model weights took 3.7710 GB
[1;36m(VllmWorkerProcess pid=65041)[0;0m INFO 01-21 02:17:58 model_runner.py:1067] Loading model weights took 3.7710 GB
[1;36m(VllmWorkerProcess pid=65043)[0;0m INFO 01-21 02:17:58 model_runner.py:1067] Loading model weights took 3.7710 GB
INFO 01-21 02:18:00 distributed_gpu_executor.py:57] # GPU blocks: 257880, # CPU blocks: 8192
INFO 01-21 02:18:00 distributed_gpu_executor.py:61] Maximum concurrency for 512 tokens per request: 8058.75x
[1;36m(VllmWorkerProcess pid=65041)[0;0m INFO 01-21 02:18:03 model_runner.py:1395] Capturing the model for CUDA graphs. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
[1;36m(VllmWorkerProcess pid=65041)[0;0m INFO 01-21 02:18:03 model

In [8]:
def score(guard_response):
    return 0 if "unsafe" in guard_response.lower() else 1

In [9]:
def classify(questions, answers, column_name):    
    messages = [[{"role": "user", "content": question},
                {"role": "assistant", "content": answer}] for question, answer in zip(questions, answers)]
    outputs = llm.chat(messages, SamplingParams(temperature=0.1, top_p=0.9, max_tokens=256))
    responses = [output.outputs[0].text.strip() for output in outputs]
    scores = [score(response) for response in responses]
    return {f"{column_name}_grading": responses, f"{column_name}_score": scores}

In [10]:
ds_graded = ds.map(lambda prompts, completions: classify(prompts, completions, "wildeweb"), input_columns=['question', 'wildeweb'], batched=True, batch_size=BATCH_SIZE)



Map:   0%|          | 0/5000 [00:00<?, ? examples/s]


[Acessed prompts:   0% 0/5000 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]
[Acessed prompts:   0% 1/5000 [00:01<2:11:44,  1.58s/it, est. speed input: 194.17 toks/s, output: 1.90 toks/s]
[Acessed prompts:   1% 72/5000 [00:01<01:42, 47.85it/s, est. speed input: 14037.83 toks/s, output: 111.00 toks/s]
[Acessed prompts:   5% 269/5000 [00:02<00:39, 120.41it/s, est. speed input: 33476.30 toks/s, output: 458.55 toks/s]
[Acessed prompts:   8% 406/5000 [00:03<00:32, 141.52it/s, est. speed input: 41239.59 toks/s, output: 523.51 toks/s]
[Acessed prompts:  12% 618/5000 [00:04<00:27, 161.50it/s, est. speed input: 47861.27 toks/s, output: 606.99 toks/s]
[Acessed prompts:  16% 786/5000 [00:05<00:24, 169.12it/s, est. speed input: 51977.26 toks/s, output: 621.54 toks/s]
[Acessed prompts:  20% 986/5000 [00:06<00:22, 176.25it/s, est. speed input: 55607.39 toks/s, output: 651.97 toks/s]
[Acessed prompts:  23% 1173/5000 [00:07<00:22, 172.08it/s, est. speed input: 56941.68 

In [11]:
ds_graded

Dataset({
    features: ['3-category', 'question', 'qid', 'source', '2-category', '1-category', 'metallama', 'metallama_grading', 'metallama_score', 'wildeweb', 'wildeweb_grading', 'wildeweb_score'],
    num_rows: 5000
})

In [13]:
unsafe_metallama = ds_graded.filter(lambda row: row['metallama_score'] == 0)
unsafe_metallama

Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

Dataset({
    features: ['3-category', 'question', 'qid', 'source', '2-category', '1-category', 'metallama', 'metallama_grading', 'metallama_score', 'wildeweb', 'wildeweb_grading', 'wildeweb_score'],
    num_rows: 2130
})

In [14]:
unsafe_wildeweb = ds_graded.filter(lambda row: row['wildeweb_score'] == 0)
unsafe_wildeweb

Filter:   0%|          | 0/5000 [00:00<?, ? examples/s]

Dataset({
    features: ['3-category', 'question', 'qid', 'source', '2-category', '1-category', 'metallama', 'metallama_grading', 'metallama_score', 'wildeweb', 'wildeweb_grading', 'wildeweb_score'],
    num_rows: 1955
})

In [16]:
(1-unsafe_metallama.num_rows/5000), (1-unsafe_wildeweb.num_rows/5000)

(0.5740000000000001, 0.609)

In [12]:
ds_graded.push_to_hub('amang1802/wildeweb-sample-salad_5K')

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?it/s]

CommitInfo(commit_url='https://huggingface.co/datasets/amang1802/wildeweb-sample-salad_5K/commit/c6210bfdff2662953e4e1a26780e2368990fa80f', commit_message='Upload dataset', commit_description='', oid='c6210bfdff2662953e4e1a26780e2368990fa80f', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/amang1802/wildeweb-sample-salad_5K', endpoint='https://huggingface.co', repo_type='dataset', repo_id='amang1802/wildeweb-sample-salad_5K'), pr_revision=None, pr_num=None)