# Data extraction attack for forgetting

In [None]:
import torch 
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# parallel processing
from pandarallel import pandarallel
pandarallel.initialize(progress_bar=True, nb_workers=16)
from tqdm import tqdm
tqdm.pandas()

# utility§
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
import os

In [24]:
from transformers import set_seed
set_seed(42)

In [None]:
# loading the metrics
import evaluate
exact_match_metric = evaluate.load("exact_match")
sacrebleu = evaluate.load("sacrebleu")
meteor = evaluate.load("meteor")
rouge = evaluate.load("rouge")


def bleu_compute(pred, suffix):
    return sacrebleu.compute(predictions = pred, references = suffix)["score"]

def em_compute(pred, suffix):
    return exact_match_metric.compute(references=pred, predictions=suffix)['exact_match']

def meteor_compute(pred, suffix):
    return meteor.compute(predictions = pred, references = suffix)["meteor"]

def rouge_compute(pred, suffix):
    return rouge.compute(predictions = pred, references = suffix)["rougeL"]

IMPORTANT TO CHANGE

To change depending on the model you want to attack!

In [26]:
checkpoint = "bigcode/starcoder2-3b"
# checkpoint = "bigcode/starcoder2-7b"
# checkpoint = "bigcode/starcoder2-15b"

# Load the sample for the attack (g3 mem) and setup

In [None]:
df = pd.read_parquet('mem-tune-replication_package/mem-tune/data/samples/pre-train/forget-attack/pre-train_attack_g3.parquet')
df = Dataset.from_pandas(df[['prefix_250', 'prefix_200', 'prefix_150', 'prefix_100', 'suffix']])

Parameters for this experiment:

- deduplication rate >3
- prefix length = 100

In [28]:
p_length = 'prefix_100'

Setup the column names:

- the generation is 7b_ep1, 7b_ep2, 7b_ep3

- for the metric we are just adding the _metric-name to 7b_ep3_bleu, 7b_ep3_meteor...

In [29]:
# setup the experiment names

m_name = checkpoint.split('-')[1] # take only the nymber of parameters

epochs = ['_ep0', '_ep1', '_ep2', '_ep3']


# Load the models

In [30]:
# tokenizer
tokenizer = AutoTokenizer.from_pretrained(checkpoint, padding_side='left')
tokenizer.pad_token = tokenizer.eos_token

**IMPORTANT**

In the public replication package we will also share the fine-tuned models on Huggingface

In [31]:
# Model dirs
#To change depending on which model you want to attack!

ep_dir = '/model_dir/scoder3b/epochs'
ep1 = ep_dir +'/checkpoint-833'
ep2 = ep_dir + '/checkpoint-1666'
ep3= ep_dir + '/checkpoint-2499'

# ep_dir = '/model_dir/scoder7b/epochs'
# ep1 = ep_dir +'/checkpoint-833'
# ep2 = ep_dir + '/checkpoint-1666'
# ep3= ep_dir + '/checkpoint-2499'

# ep_dir = '/model_dir/scoder15b/epochs'
# ep1 = ep_dir +'/checkpoint-800'
# ep2 = ep_dir + '/checkpoint-1600'
# ep3= ep_dir + '/checkpoint-2400'


In [None]:
# load the model
#To change depending on which model you want to attack!

# base model
model = AutoModelForCausalLM.from_pretrained(checkpoint,device_map="auto")
epoch = epochs[0]

# model depending on the epoch
# model = AutoModelForCausalLM.from_pretrained(ep1,device_map="auto")
# epoch = epochs[1]

# model = AutoModelForCausalLM.from_pretrained(ep2,device_map="auto")
# epoch = epochs[2]

# model = AutoModelForCausalLM.from_pretrained(ep3,device_map="auto")
# epoch = epochs[3]

# eval mode
model.eval()

In [34]:
# define the pipeline
pipe = pipeline("text-generation", model = model, tokenizer = tokenizer, framework='pt', max_new_tokens=50)

IMPORTANT

In [None]:
# IMPORTANT

gen_name = m_name + epoch
print(gen_name)

# Now we can generate

In [36]:
gs_mem = pipe(df[p_length], batch_size=32)

# for forgetting
df = df.add_column(gen_name, [gs_mem[i][0]['generated_text'][len(df[p_length][i]):]  for i,_ in enumerate(df)])

# Evaluation

In [None]:
em = gen_name + '_em'
bleu = gen_name + '_bleu'
met = gen_name + '_meteor'
roug = gen_name + '_rougeL'

print(f"{em}\n {bleu}\n {met}\n {roug}")

First of all we need to load the eval dataframe and then attach the new generation

In [39]:
# Initial attack
df_eval= df.to_pandas()

# After is saved the first time
# df_eval = pd.merge((pd.read_parquet('mem-tune/evaluation/forgeting/experiments/attack_forg_ep/attack_forg_ep.parquet')), df.to_pandas()[gen_name], left_index =True, right_index=True)

In [None]:
# evaluation
df_eval[em] = df_eval.progress_apply(lambda x: em_compute( pred= [x[gen_name]], suffix = [x['suffix']]), axis=1)
df_eval[bleu] = df_eval.progress_apply(lambda x: bleu_compute( pred= [x[gen_name]], suffix = [x['suffix']]), axis=1)
df_eval[met] = df_eval.progress_apply(lambda x: meteor_compute( pred= [x[gen_name]], suffix = [x['suffix']]), axis=1)
df_eval[roug] = df_eval.progress_apply(lambda x: rouge_compute( pred= [x[gen_name]], suffix = [x['suffix']]), axis=1)

In [42]:
# Save the results
df_eval.to_parquet('mem-tune/evaluation/forgeting/experiments/attack_forg_ep/attack_forg_ep.parquet', index = False)