In [17]:
import numpy as np
import transformers
import torch
# import argparse
import uuid
from tqdm import tqdm
import my_utils as ut
# from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader
from accelerate import Accelerator
from sklearn.model_selection import train_test_split
from accelerate.utils import broadcast
torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = True

In [18]:
class argment():
    def __init__(self):
        self.num_beams = 1
        self.prefix_size = 50
        self.suffix_size = 50
        self.aligned = 1
        self.test_set_size = 1000
        self.model_size =  'gpt2'
        self.device = 'cude:0'
        self.train_preprefix = '../datasets/train_preprefix.npy'
        self.train_prefix = '../datasets/train_prefix.npy'
        self.train_suffix = '../datasets/train_suffix.npy'
        self.test_prefix = '../datasets/val_prefix.npy'
        self.bs = 16

args = argment()

In [19]:
accelerator = Accelerator(mixed_precision='fp16')


# load datasets
DATASET_PATH = '../datasets'
prefixes = np.concatenate((ut.load_prompts(args.train_preprefix), \
    ut.load_prompts(args.train_prefix)), axis=1)[:, -args.prefix_size:]
suffixes = ut.load_prompts(args.train_suffix)[:, :args.suffix_size]

In [20]:
prefixes.shape, suffixes.shape

((15000, 50), (15000, 50))

In [21]:
# sample a random test set
_, prefix_test, _, suffix_test = train_test_split(prefixes, suffixes, test_size=args.test_set_size)
# or use last 1k samples for deterministic evaluation
# prefix_test, suffix_test = prefixes[-args.test_set_size:], suffixes[-args.test_set_size:]

# create dataloader
test_ds = torch.cat([torch.tensor(prefix_test, dtype=torch.int64), torch.tensor(suffix_test, dtype=torch.int64)], dim=1)
# make sure all GPUs see the same split, which is what main process (GPU ID 0) has sampled
test_ds = broadcast(test_ds.cuda(), from_process=0) 
test_loader = DataLoader(test_ds, batch_size=args.bs)

In [22]:
prefix_test.shape, suffix_test.shape

((1000, 50), (1000, 50))

In [23]:
# samples coming from the test set of the Pile, this is to measure ppl for defense experiments
ppl_ds = ut.load_prompts(args.test_prefix)
print(ppl_ds.shape)
np.random.shuffle(ppl_ds)
ppl_ds = torch.tensor(ppl_ds[:args.test_set_size], dtype=torch.int64)
print(ppl_ds.shape)
ppl_ds = broadcast(ppl_ds.cuda(), from_process=0) 
ppl_loader = DataLoader(ppl_ds, batch_size=args.bs)

(1000, 50)
torch.Size([1000, 50])


In [24]:
# load model
if args.model_size == 'small':
    MODEL_PATH = 'EleutherAI/gpt-neo-125M'
elif args.model_size == 'medium':
    MODEL_PATH = 'EleutherAI/gpt-neo-1.3B'
elif args.model_size == 'large':
    MODEL_PATH = 'EleutherAI/gpt-neo-2.7B'
elif args.model_size == 'gpt2':
    MODEL_PATH = 'gpt2'
else:
    MODEL_PATH = 'gpt2-xl'

accelerator.print('Loading model..')
model = transformers.AutoModelForCausalLM.from_pretrained(MODEL_PATH)
# optimizer is just a placeholder (accelerator/deepspeed requires it for some reason)
# we don't do any training in baseline attack
optimizer = torch.optim.AdamW(params=model.parameters())
model, optimizer, test_loader, ppl_loader = accelerator.prepare(model, optimizer, test_loader, ppl_loader)


Loading model..


In [30]:
accelerator.print('Generating suffixes..')
generations_test = ut.generate_suffixes_distributed(model, test_loader, args, accelerator)
generations_test = np.stack(generations_test, axis=0)

Generating suffixes..


100%|██████████| 63/63 [01:05<00:00,  1.03s/it]


In [61]:
def evaluate_distributed(model, data_loader, args, accelerator):
    global loss
    """ get inference loss on supplied data loader (for distributed training) """
    model.eval()
    with torch.inference_mode():
        loss = []
        for batch in data_loader:
            with torch.no_grad():
                if args.aligned:
                    labels = torch.clone(batch)
                    # predicting only the last args.suffix_size tokens,
                    # so ignore everything else in loss calculation
                    labels[:, :labels.shape[1]-args.suffix_size] = -100
                else:
                    labels=batch
            outputs = model(input_ids=batch, labels=labels)
            loss.append(accelerator.gather(outputs.loss*len(batch)).cpu())
        # to match batch sizes, distributed training pad the last batch
        # we get rid of the extra samples by truncating
        loss = torch.tensor(loss)[:args.test_set_size]
        # loss = torch.cat(loss)[:args.test_set_size]
        return (torch.sum(loss) / args.test_set_size).item()

In [62]:
test_loss = evaluate_distributed(model, ppl_loader, args, accelerator)
# use this if you want to compute ppl wrt to prompt test data
# test_loss = ut.evaluate_distributed(model, test_loader, args, accelerator)

In [63]:
test_loss

3.959226608276367

In [64]:
if accelerator.is_main_process:
    # measure  fractional and exact match rates
    fract_rate, exact_rate = ut.compute_reconstruct_rate(generations_test, suffix_test, args)
    accelerator.print(f'Exact/Fract extract rate:{exact_rate:.3f}/{fract_rate:.3f}')
    test_plp = np.exp(test_loss)
    accelerator.print(f'Test Loss/PLP:{test_loss:.3f}/{test_plp:.3f}')

Exact/Fract extract rate:0.006/0.103
Test Loss/PLP:3.959/52.417
