## An additional extraction expriment

In [1]:
import pickle
import numpy as np
from sklearn.metrics import roc_curve
import torch
from typing import Sequence
from collections import defaultdict
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaTokenizer, LlamaForCausalLM

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# let's start by loading a model with nrep=10 as upper bound

SEED = 1
NREP = 10

target_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
target_tokenizer.pad_token = target_tokenizer.eos_token

# load the canaries
with open(f"./data/members_journal_gpt_seed{SEED}.pickle", "rb") as f:
    canaries = pickle.load(f)

In [None]:
model_path = f"SOME_DATA_DIR/EleutherAI_gpt-neo-1.3B_gptneo1B_exact_duplicates_gpt_seed{SEED}_nrep{NREP}_lr2e5"
target_device = "cuda:0"

target_model = AutoModelForCausalLM.from_pretrained(model_path).to(target_device)

Loading checkpoint shards: 100%|██████████| 2/2 [00:23<00:00, 11.84s/it]


In [4]:
from nltk.translate.bleu_score import sentence_bleu
from nltk import word_tokenize

def compute_bleu(reference_str, hypothesis_str) -> float:

    reference_tokens = word_tokenize(reference_str)
    candidate_tokens = word_tokenize(hypothesis_str) # make sure to match lengths, as generation might be shorter/longer
    
    bleu_score = sentence_bleu([reference_tokens], candidate_tokens)
    
    return bleu_score

def edit_distance(seq1: Sequence, seq2: Sequence) -> int:
    '''on the token level'''
    if len(seq1) > len(seq2):
        seq1, seq2 = seq2, seq1
        
    prev_row = list(range(len(seq1) + 1))
    curr_row = [0] * (len(seq1) + 1)
    
    for j in range(1, len(seq2) + 1):
        curr_row[0] = j
        
        for i in range(1, len(seq1) + 1):
            if seq1[i-1] == seq2[j-1]:
                curr_row[i] = prev_row[i-1]
            else:
                curr_row[i] = min(prev_row[i-1] + 1,  # substitution
                                prev_row[i] + 1,     # deletion
                                curr_row[i-1] + 1)   # insertion
        
        prev_row, curr_row = curr_row, prev_row
        
    return prev_row[-1]

def edit_similarity(seq1: Sequence, seq2: Sequence) -> float:
    
    edit_dist = edit_distance(seq1, seq2)
    max_len = max(len(seq1), len(seq2))
    return 1 - (edit_dist / max_len)

In [5]:
def compute_extractability(model, tokenizer,
    input_ids: torch.Tensor,
    attention_mask: torch.Tensor = None,
    secret_len: int = 1,
    verbose=False
):
    total_length = input_ids.shape[1]
    prompt_lenght = total_length - secret_len

    prompt_tokens = input_ids[:, :prompt_lenght]
    attention_mask = attention_mask[:, :prompt_lenght]
    secret_tokens = input_ids[:, prompt_lenght:]

    greedy_output = model.generate(
        inputs=prompt_tokens,
        max_length=total_length,
        pad_token_id=tokenizer.eos_token_id,
        do_sample=False,
        attention_mask=attention_mask,
    )
    
    # let's decode it
    original_prompt = tokenizer.batch_decode(prompt_tokens, skip_special_tokens=True)
    original_secret_text = tokenizer.batch_decode(secret_tokens, skip_special_tokens=True)
    generated_text = tokenizer.batch_decode(greedy_output[:, prompt_lenght:], skip_special_tokens=True)

    all_bleu_scores = []
    all_edit_sims = []
    for i in range(len(greedy_output)):
        
        bleu = compute_bleu(original_secret_text[i], generated_text[i])
        edit_sim = edit_similarity(
            secret_tokens[i].cpu().numpy().tolist(),
            greedy_output[i, prompt_lenght:].cpu().numpy().tolist()
        )
        
        all_bleu_scores.append(bleu)
        all_edit_sims.append(edit_sim)
        
        if verbose:
            print('Original prompt')
            print(original_prompt[i])
            print('---')
            print('Original text')
            print(original_secret_text[i])
            print('---')
            print('Generated text')
            print(generated_text[i])
                
            print('BLEU score:', bleu)        
            print('Edit distance:', edit_sim)
            print('=======')

    return all_bleu_scores, all_edit_sims

In [None]:
# now do this for all canaries

# canaries is a list of 100 canaries of 100 tokens each, so already encoded
input_ids = torch.tensor(canaries).to(target_device)
attention_mask = (input_ids != target_tokenizer.pad_token_id).long().to(target_device)

all_bleu_scores, all_edit_sims = compute_extractability(
    model=target_model,
    tokenizer=target_tokenizer,
    input_ids=input_ids,
    attention_mask=attention_mask,
    secret_len=50,
)

In [8]:
# print mean and max
print('Mean BLEU score:', np.mean(all_bleu_scores), 'std:', np.std(all_bleu_scores))
print('Max BLEU score:', np.max(all_bleu_scores))
print('Mean Edit sim:', np.mean(all_edit_sims), 'std:', np.std(all_edit_sims))
print('Max Edit sim:', np.max(all_edit_sims))

Mean BLEU score: 0.1750160768175124 std: 0.24549921118754028
Max BLEU score: 1.0
Mean Edit sim: 0.20939999999999998 std: 0.2298426418226174
Max Edit sim: 1.0


In [None]:
# first do this for the exact models
nreps = (1, 10,)
seeds = (1, )

for nrep in nreps:
    
    all_bleu_scores = []
    all_edit_sims = []
    
    for seed in seeds:
    
        # load the model
        model_path = f"SOME_DATA_DIR/EleutherAI_gpt-neo-1.3B_gptneo1B_exact_duplicates_gpt_seed{seed}_nrep{nrep}_lr2e5"
        target_model = AutoModelForCausalLM.from_pretrained(model_path).to(target_device)
        
        # load the canaries
        with open(f"SOME_DATA_DIR/members_seed{seed}.pickle", "rb") as f:
            canaries = pickle.load(f)
            
        input_ids = torch.tensor(canaries).to(target_device)
        attention_mask = (input_ids != target_tokenizer.pad_token_id).long().to(target_device)
        
        seed_bleu_scores, seed_edit_sims = compute_extractability(
            model=target_model,
            tokenizer=target_tokenizer,
            input_ids=input_ids,
            attention_mask=attention_mask,
            secret_len=50,
        )
        
        all_bleu_scores.extend(seed_bleu_scores)
        all_edit_sims.extend(seed_edit_sims)
        
    print(f'nrep={nrep}')
    print('Mean BLEU score:', np.mean(all_bleu_scores), 'std:', np.std(all_bleu_scores))
    print('Mean Edit sim:', np.mean(all_edit_sims), 'std:', np.std(all_edit_sims))
    

Loading checkpoint shards: 100%|██████████| 2/2 [00:26<00:00, 13.05s/it]


nrep=10
Mean BLEU score: 0.1750160768175124 std: 0.24549921118754028
Mean Edit sim: 0.20939999999999998 std: 0.2298426418226174


The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


In [None]:
# now for the fuzzy dupls

Rs = [1, 5, 10, 15, 20, 25, 50, 75]
seeds = (1, )

for R in Rs:
    
    all_bleu_scores = []
    all_edit_sims = []
    
    for seed in seeds:
    
        # load the model 
        model_path = f"SOME_DATA_DIR/EleutherAI_gpt-neo-2.7B_gptneo2.7B_near_duplicates_diff_indices_topk10_gpt_R{R}_seed{seed}_lr2e5"
        target_model = AutoModelForCausalLM.from_pretrained(model_path).to(target_device)
        
        # load the canaries
        with open(f"./data/members_journal_gpt_seed{seed}.pickle", "rb") as f:
            canaries = pickle.load(f)
            
        input_ids = torch.tensor(canaries).to(target_device)
        attention_mask = (input_ids != target_tokenizer.pad_token_id).long().to(target_device)
        
        seed_bleu_scores, seed_edit_sims = compute_extractability(
            model=target_model,
            tokenizer=target_tokenizer,
            input_ids=input_ids,
            attention_mask=attention_mask,
            secret_len=50,
        )
        
        all_bleu_scores.extend(seed_bleu_scores)
        all_edit_sims.extend(seed_edit_sims)
        
    print(f'R={R}')
    print('Mean BLEU score:', np.mean(all_bleu_scores), 'std:', np.std(all_bleu_scores))
    print('Mean Edit sim:', np.mean(all_edit_sims), 'std:', np.std(all_edit_sims))
    

Loading checkpoint shards:  33%|███▎      | 1/3 [00:29<00:58, 29.06s/it]

Loading checkpoint shards: 100%|██████████| 3/3 [01:13<00:00, 24.63s/it]


R=1
Mean BLEU score: 0.27648182593829146 std: 0.2618339985391867
Mean Edit sim: 0.2988 std: 0.26232529424361656


Loading checkpoint shards: 100%|██████████| 3/3 [00:46<00:00, 15.41s/it]


R=5
Mean BLEU score: 0.23656901925427076 std: 0.23652144118811755
Mean Edit sim: 0.2706 std: 0.23767970043737435


Loading checkpoint shards: 100%|██████████| 3/3 [00:44<00:00, 14.94s/it]


R=10
Mean BLEU score: 0.16965976280805767 std: 0.2010352439100575
Mean Edit sim: 0.20260000000000006 std: 0.1946413111340961


Loading checkpoint shards: 100%|██████████| 3/3 [00:39<00:00, 13.17s/it]


R=15
Mean BLEU score: 0.1416696296490674 std: 0.20021217234425376
Mean Edit sim: 0.17840000000000003 std: 0.18669076034983628


Loading checkpoint shards: 100%|██████████| 3/3 [00:51<00:00, 17.26s/it]


R=20
Mean BLEU score: 0.11093556054925362 std: 0.1738619390833732
Mean Edit sim: 0.15660000000000002 std: 0.1810426469095058


Loading checkpoint shards: 100%|██████████| 3/3 [00:50<00:00, 16.97s/it]


R=25
Mean BLEU score: 0.10588469671678759 std: 0.19301640133018072
Mean Edit sim: 0.15860000000000002 std: 0.18519730019630418


Loading checkpoint shards: 100%|██████████| 3/3 [00:41<00:00, 13.96s/it]


R=50
Mean BLEU score: 0.07309259352906256 std: 0.1573960144231942
Mean Edit sim: 0.12380000000000001 std: 0.15501470897950295


Loading checkpoint shards: 100%|██████████| 3/3 [00:40<00:00, 13.39s/it]


R=75
Mean BLEU score: 0.06925742476193107 std: 0.14212570307405026
Mean Edit sim: 0.12459999999999999 std: 0.14944845265174211
