In [6]:
%pip install --disable-pip-version-check torch==1.13.1 torchdata==0.5.1

Collecting torch==1.13.1
  Using cached torch-1.13.1-cp37-cp37m-manylinux1_x86_64.whl (887.5 MB)
Collecting torchdata==0.5.1
  Using cached torchdata-0.5.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.6 MB)
Collecting nvidia-cudnn-cu11==8.5.0.96
  Using cached nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl (557.1 MB)
Collecting nvidia-cuda-runtime-cu11==11.7.99
  Using cached nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl (849 kB)
Collecting nvidia-cublas-cu11==11.10.3.66
  Using cached nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl (317.1 MB)
Collecting nvidia-cuda-nvrtc-cu11==11.7.99
  Using cached nvidia_cuda_nvrtc_cu11-11.7.99-2-py3-none-manylinux1_x86_64.whl (21.0 MB)
Collecting portalocker>=2.0.0
  Using cached portalocker-2.7.0-py2.py3-none-any.whl (15 kB)
Installing collected packages: portalocker, nvidia-cuda-runtime-cu11, nvidia-cuda-nvrtc-cu11, nvidia-cublas-cu11, nvidia-cudnn-cu11, torch, torchdata
Successfully ins

In [9]:
%pip install --disable-pip-version-check -q \
    transformers==4.27.2 \
    datasets==2.9.0 \
    accelerate==0.17.0 \
    evaluate==0.4.0 \
    trl==0.4.1

[0mNote: you may need to restart the kernel to use updated packages.


In [37]:
%pip install git+https://github.com/huggingface/peft.git

Collecting git+https://github.com/huggingface/peft.git
  Cloning https://github.com/huggingface/peft.git to /tmp/pip-req-build-6ihqrq8b
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/peft.git /tmp/pip-req-build-6ihqrq8b
  Resolved https://github.com/huggingface/peft.git to commit 70af02a2bca5a63921790036b2c9430edf4037e2
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: peft
  Building wheel for peft (pyproject.toml) ... [?25ldone
[?25h  Created wheel for peft: filename=peft-0.3.0.dev0-py3-none-any.whl size=50371 sha256=739940410e4e1f246c10ae0eeca68ea64a24b080ad1cc7881a527dfd9c1077cb
  Stored in directory: /tmp/pip-ephem-wheel-cache-cl69lrdz/wheels/13/73/6d/ff27a3703d8bad21d7e0c24cbd9dde5d7ae78f756405707a0c
Successfully built peft
Installing collected packages: peft
Successfully installed peft-

In [45]:
import argparse
import csv

import evaluate
import numpy as np
import torch
from datasets import load_dataset
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer


toxicity = evaluate.load("ybelkada/toxicity", "DaNLP/da-electra-hatespeech-detection", module_type="measurement")
ds = load_dataset("OxAISH-AL-LLM/wiki_toxic", split="test")

model_type = "all"
output_file = "toxicity.csv"
batch_size=64
num_samples=400
context_length=2000
max_new_tokens=30


if model_type == "all":
    MODELS_TO_TEST = [
        "google/flan-t5-base",
        "./instruct-dialogue-summary-checkpoint/",
        "./ppo-dialogue-summary-checkpoint/",
        "./peft-dialogue-summary-checkpoint/",
    ]
    
NUM_SAMPLES = num_samples
BATCH_SIZE = batch_size
output_file = output_file
max_new_tokens = max_new_tokens
context_length = context_length
device = torch.cuda.current_device() if torch.cuda.is_available() else "cpu"

# consider only toxic prompts
ds = ds.filter(lambda x: x["label"] == 1)

toxicities = {}

# open a csv file
file = open(f"{output_file}", "w", newline="")
writer = csv.writer(file)
# add first rows
writer.writerow(["model_id", "mean_toxicity", "std_toxicity"])

from peft import PeftModel

for model_id in tqdm(MODELS_TO_TEST):
    print(model_id)
    if 'peft' in model_id:
        peft_model_base = AutoModelForSeq2SeqLM.from_pretrained('google/flan-t5-base', device_map={"": device}, torch_dtype=torch.bfloat16)
        model = PeftModel.from_pretrained(peft_model_base, model_id, device_map={"": device}, torch_dtype=torch.bfloat16)
    else:
        model = AutoModelForSeq2SeqLM.from_pretrained(model_id, device_map={"": device}, torch_dtype=torch.bfloat16)

    tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-base')
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "left"
    input_texts = []

    for i, example in enumerate(ds):
        # set seed
        torch.manual_seed(42)

        input_text = example["comment_text"]
        input_texts.append(input_text[:2000])

        if i > NUM_SAMPLES:
            break

        if (i + 1) % BATCH_SIZE == 0:
            inputs = tokenizer(input_texts, return_tensors="pt", padding=True).to(device)
            inputs.input_ids = inputs.input_ids[:context_length]
            inputs.attention_mask = inputs.attention_mask[:context_length]
            outputs = model.generate(**inputs, do_sample=True, max_new_tokens=max_new_tokens, use_cache=True)
            generated_texts = tokenizer.batch_decode(outputs, skip_special_tokens=True)
            generated_texts = [
                generated_text.replace(input_texts[i], "") for i, generated_text in enumerate(generated_texts)
            ]
            toxicity_score = toxicity.compute(predictions=generated_texts)
            input_texts = []

            if model_id not in toxicities:
                toxicities[model_id] = []
            toxicities[model_id].extend(toxicity_score["toxicity"])

    # last batch
    inputs = tokenizer(input_texts, return_tensors="pt", padding=True).to(device)
    outputs = model.generate(**inputs, do_sample=True, max_new_tokens=30)
    generated_texts = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    generated_texts = [generated_text.replace(input_texts[i], "") for i, generated_text in enumerate(generated_texts)]
    toxicity_score = toxicity.compute(predictions=generated_texts)
    toxicities[model_id].extend(toxicity_score["toxicity"])

    # compute mean & std using np
    mean = np.mean(toxicities[model_id])
    std = np.std(toxicities[model_id])

    # save to file
    writer.writerow([model_id, mean, std])

    # print
    print(f"Model: {model_id} - Mean: {mean} - Std: {std}")

    model = None
    torch.cuda.empty_cache()

# close file
file.close()

Found cached dataset wiki_toxic (/root/.cache/huggingface/datasets/OxAISH-AL-LLM___wiki_toxic/default/1.0.0/09a67129f85f67f22107b0190f7c32050ef0dce44afeedc6e3e0ab7ab3bd709c)
Loading cached processed dataset at /root/.cache/huggingface/datasets/OxAISH-AL-LLM___wiki_toxic/default/1.0.0/09a67129f85f67f22107b0190f7c32050ef0dce44afeedc6e3e0ab7ab3bd709c/cache-411a66332d7a5b58.arrow
  0%|          | 0/4 [00:00<?, ?it/s]

google/flan-t5-base


Token indices sequence length is longer than the specified maximum sequence length for this model (780 > 512). Running this sequence through the model will result in indexing errors
  0%|          | 0/4 [00:03<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 3.53 GiB (GPU 0; 22.20 GiB total capacity; 9.00 GiB already allocated; 2.87 GiB free; 9.67 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [42]:
toxicity_metrics


flan-t5-base
  Mean: 0.10735442655624947
  Std: 0.2648808616377917

flan-t5-instruct-full
  Mean: 0.12531453278060511
  Std: 0.28167328436940203

flan-t5-instruct-peft
  Mean: 0.15925555246049858
  Std: 0.3149103276023309

flan-t5-instruct-peft-rl-detoxify
  Mean: 0.1088853306687247
  Std: 0.2653053856564228



In [None]:
print("""
flan-t5-base
  Mean: 0.10735442655624947
  Std: 0.2648808616377917

flan-t5-instruct-full
  Mean: 0.12531453278060511
  Std: 0.28167328436940203

flan-t5-instruct-peft
  Mean: 0.15925555246049858
  Std: 0.3149103276023309

flan-t5-instruct-peft-rl-detoxify
  Mean: 0.1088853306687247
  Std: 0.2653053856564228
""")
