In [None]:
import sys
sys.path.append("PATH_TO_SRC_DIR") # Replace with the actual path to your source directory

# Enable hot autoreload
%load_ext autoreload
%autoreload 2

In [None]:
import pickle
import numpy as np
from sklearn.metrics import roc_curve
import torch
from typing import Sequence
from collections import defaultdict
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaTokenizer, LlamaForCausalLM
from utils import compute_perplexity, ratio_auc, min_k_prob
import zlib

np.random.seed(42)

  from .autonotebook import tqdm as notebook_tqdm


## Compuring membership inference results

In this notebook, we provide the code to run MIAs against the saved target model, finetuned on a dataset containing (fuzzy) duplicates.

In [None]:
OG_NON_MEMBER_PATH = "SOME_DATA_DIR/non_members.pickle"
OG_CANARY_PATH = "SOME_DATA_DIR/members.pickle"

LLAMA_TOKENIZER_PATH = "SOME_DATA_DIR/Llama-2-7b-hf/"
LLAMA_MODEL_PATH = "SOME_DATA_DIR/Llama-2-7b-hf/"

TARGET_MODEL = "EleutherAI/gpt-neo-1.3B"

## Let's apply MIAs for all fuzzy duplicates 

In [4]:
llama_device = "cuda:0"
llama_tokenizer = LlamaTokenizer.from_pretrained(LLAMA_TOKENIZER_PATH, torch_dtype=torch.float16)
llama_tokenizer.pad_token = llama_tokenizer.eos_token
llama_model = LlamaForCausalLM.from_pretrained(LLAMA_MODEL_PATH).to(llama_device)

target_tokenizer = AutoTokenizer.from_pretrained(TARGET_MODEL)
target_tokenizer.pad_token = target_tokenizer.eos_token
target_device = "cuda:1"

Loading checkpoint shards: 100%|██████████| 7/7 [00:06<00:00,  1.02it/s]


In [5]:
# Let's get the canary text
with open(OG_CANARY_PATH, 'rb') as f:
    og_canaries = pickle.load(f)
    
og_canary_texts = [target_tokenizer.decode(og_canary) for og_canary in og_canaries]
og_canary_texts_lower = [x.lower() for x in og_canary_texts]

# and the non member text
with open(OG_NON_MEMBER_PATH, 'rb') as f:
    non_members = pickle.load(f)

non_member_texts = [target_tokenizer.decode(non_member) for non_member in non_members]
non_member_texts_lower = [x.lower() for x in non_member_texts]

In [6]:
def roc_auc(members: Sequence[float], non_members: Sequence[float]):
    y = []
    y_true = []

    y.extend(members)
    y.extend(non_members)

    y_true.extend([0] * len(members))
    y_true.extend([1] * len(non_members))

    fpr, tpr, _ = roc_curve(y_true, y)

    return fpr, tpr

In [7]:
def tpr_at_fpr(members, non_members, target_fpr):
    fpr, tpr = roc_auc(members, non_members)

    index = np.abs(fpr - target_fpr).argmin()
    return tpr[index]

In [8]:
def get_auc_with_bootstrapping(ratio_members, ratio_non_members, k=25):
    all_aucs = list()
    for _ in range(k):
        subset_members = np.random.choice(ratio_members, len(ratio_members))
        subset_non_members = np.random.choice(ratio_non_members, len(ratio_non_members))
        auc = ratio_auc(members=subset_members, non_members=subset_non_members)
        all_aucs.append(auc)
    return np.array(all_aucs)

In [9]:
def ppl_from_text(texts, model, tokenizer, device):
    tokens = tokenizer.batch_encode_plus(texts, return_tensors="pt", padding="longest").to(device)
    ppl = compute_perplexity(
        model,
        tokens.input_ids[:, 1:],
        tokens.attention_mask[:, 1:],
        ignore_prefix=None
    )

    return ppl

# First let's do it for exact duplicates

In [None]:
nrep_aucs = defaultdict(dict)

MODEL_CHECKPOINT_PATH = "SOME_DATA_DIR/model_checkpoints/EleutherAI_gpt-neo-1.3B_checkpoints/EleutherAI_gpt-neo-1.3B_gptneo1B_exact_duplicates_nrepXX_lr2e5"

for nrep in (1, 2, 3, 4, 5, 6, 7, 8, 9, 10):

    print("nrep = ", nrep)

    score_members = {}
    score_non_members = {}

    target_path = MODEL_CHECKPOINT_PATH.replace("XX", str(nrep))
    
    target_model = AutoModelForCausalLM.from_pretrained(target_path).to(target_device)

    # first: non members
    print("Running on non members...")
    non_member_target_ppl = ppl_from_text(non_member_texts, target_model, target_tokenizer, target_device)
    non_member_llama_ppl = ppl_from_text(non_member_texts, llama_model, llama_tokenizer, llama_device)
    non_member_lower_target_ppl = ppl_from_text(non_member_texts_lower, target_model, target_tokenizer, target_device)

    target_tokens_non_members = target_tokenizer.batch_encode_plus(non_member_texts, return_tensors="pt", padding="longest").to(target_device)
    non_member_zlib_entropy = [len(zlib.compress(x.encode()))/len(x) for x in non_member_texts]
    
    score_non_members["ratio"] = non_member_target_ppl / non_member_llama_ppl
    score_non_members["loss"] = non_member_target_ppl
    score_non_members["lowercase"] = non_member_target_ppl / non_member_lower_target_ppl
    score_non_members["minkprob"] = -min_k_prob(target_model, target_tokens_non_members.input_ids, target_tokens_non_members.attention_mask)
    score_non_members["zlib"] = np.log(non_member_target_ppl) / non_member_zlib_entropy

    # now: non members
    print("Running on canaries...")
    og_canary_target_ppl = ppl_from_text(og_canary_texts, target_model, target_tokenizer, target_device)
    og_canary_llama_ppl = ppl_from_text(og_canary_texts, llama_model, llama_tokenizer, llama_device)
    og_canary_lower_target_ppl = ppl_from_text(og_canary_texts_lower, target_model, target_tokenizer, target_device)

    target_tokens_og_canary = target_tokenizer.batch_encode_plus(og_canary_texts, return_tensors="pt", padding="longest").to(target_device)
    og_canary_zlib_entropy = [len(zlib.compress(x.encode()))/len(x) for x in og_canary_texts]

    score_members["ratio"] = og_canary_target_ppl / og_canary_llama_ppl
    score_members["loss"] = og_canary_target_ppl
    score_members["lowercase"] = og_canary_target_ppl / og_canary_lower_target_ppl
    score_members["minkprob"] = -min_k_prob(target_model, target_tokens_og_canary.input_ids, target_tokens_og_canary.attention_mask)
    score_members["zlib"] = np.log(og_canary_target_ppl) / og_canary_zlib_entropy

    for mia in score_members:
        aucs = get_auc_with_bootstrapping(ratio_members=score_members[mia], ratio_non_members=score_non_members[mia])
        tpr_at = tpr_at_fpr(score_members[mia], score_non_members[mia], target_fpr=0.1)
        print(f"nrep={nrep}, MIA={mia}, AUC = {np.mean(aucs):.2f} ± {np.std(aucs):.2f}, TPR @ 0.1 FPR = {tpr_at:.2f}")
        nrep_aucs[nrep][mia] = aucs
    print("-------------------")

In [None]:
# save the results
with open("SOME_DATA_DIR/nrep_aucs_gptneo1B_exactduplicates_lr2e5.pickle", "wb") as f:
    pickle.dump(nrep_aucs, f)

## Now also do the fuzzy duplicates

In [None]:
R_aucs = defaultdict(dict)

MODEL_CHECKPOINT_PATH = "SOME_DATA_DIR/model_checkpoints/EleutherAI_gpt-neo-1.3B_checkpoints/EleutherAI_gpt-neo-1.3B_gptneo1B_near_duplicates_diff_indices_topk10_RXX_lr2e5"

for R in (1, 5, 10, 15, 20, 25, 50, 75):

    print("R = ", R)

    score_members = {}
    score_non_members = {}

    target_path = MODEL_CHECKPOINT_PATH.replace("XX", str(R))
    
    target_model = AutoModelForCausalLM.from_pretrained(target_path).to(target_device)

    # first: non members
    print("Running on non members...")
    non_member_target_ppl = ppl_from_text(non_member_texts, target_model, target_tokenizer, target_device)
    non_member_llama_ppl = ppl_from_text(non_member_texts, llama_model, llama_tokenizer, llama_device)
    non_member_lower_target_ppl = ppl_from_text(non_member_texts_lower, target_model, target_tokenizer, target_device)

    target_tokens_non_members = target_tokenizer.batch_encode_plus(non_member_texts, return_tensors="pt", padding="longest").to(target_device)
    non_member_zlib_entropy = [len(zlib.compress(x.encode()))/len(x) for x in non_member_texts]
    
    score_non_members["ratio"] = non_member_target_ppl / non_member_llama_ppl
    score_non_members["loss"] = non_member_target_ppl
    score_non_members["lowercase"] = non_member_target_ppl / non_member_lower_target_ppl
    score_non_members["minkprob"] = -min_k_prob(target_model, target_tokens_non_members.input_ids, target_tokens_non_members.attention_mask)
    score_non_members["zlib"] = np.log(non_member_target_ppl) / non_member_zlib_entropy

    # now: non members
    print("Running on canaries...")
    og_canary_target_ppl = ppl_from_text(og_canary_texts, target_model, target_tokenizer, target_device)
    og_canary_llama_ppl = ppl_from_text(og_canary_texts, llama_model, llama_tokenizer, llama_device)
    og_canary_lower_target_ppl = ppl_from_text(og_canary_texts_lower, target_model, target_tokenizer, target_device)

    target_tokens_og_canary = target_tokenizer.batch_encode_plus(og_canary_texts, return_tensors="pt", padding="longest").to(target_device)
    og_canary_zlib_entropy = [len(zlib.compress(x.encode()))/len(x) for x in og_canary_texts]

    score_members["ratio"] = og_canary_target_ppl / og_canary_llama_ppl
    score_members["loss"] = og_canary_target_ppl
    score_members["lowercase"] = og_canary_target_ppl / og_canary_lower_target_ppl
    score_members["minkprob"] = -min_k_prob(target_model, target_tokens_og_canary.input_ids, target_tokens_og_canary.attention_mask)
    score_members["zlib"] = np.log(og_canary_target_ppl) / og_canary_zlib_entropy

    for mia in score_members:
        aucs = get_auc_with_bootstrapping(ratio_members=score_members[mia], ratio_non_members=score_non_members[mia])
        tpr_at = tpr_at_fpr(score_members[mia], score_non_members[mia], target_fpr=0.1)
        print(f"R={R}, MIA={mia}, AUC = {np.mean(aucs):.2f} ± {np.std(aucs):.2f}, TPR @ 0.1 FPR = {tpr_at:.2f}")
        R_aucs[R][mia] = aucs
    print("-------------------")

In [None]:
with open("SOME_DATA_DIR/R_aucs_gptneo1B_nearduplicates_diffindices_topk10_RXX_lr2e5.pickle", "wb") as f:
   pickle.dump(R_aucs, f)