In [1]:
import sys
!{sys.executable} -m pip -q install scikit-learn torchmetrics transformers==4.42.1 datasets evaluate nltk bert_score tiktoken pytest sentencepiece

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


In [2]:
sys.path.append('/mlx_devbox/users/james.flemings/privacy_hallucination_llm')

In [3]:
import torch
from torch.utils.data import DataLoader
from tqdm import tqdm
from collections import OrderedDict
import numpy as np
import torchmetrics
from datasets import load_dataset, Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, GenerationConfig
import os
import pandas as pd

np.random.seed(42)

In [4]:
torch.cuda.is_available()

True

In [5]:
!nvidia-smi

Fri Aug  9 09:33:59 2024       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.129.06   Driver Version: 470.129.06   CUDA Version: 12.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  On   | 00000000:16:00.0 Off |                    0 |
| N/A   35C    P0    90W / 400W |      3MiB / 81251MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [6]:
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from transformers import BertForSequenceClassification, BertTokenizer

class Evaluator:
    def __init__(self, metrics=None):
        if not metrics:
            metrics = ["rouge", "sacre_bleu", "bertscore", "factkb"]
        self.metrics = metrics
    
    def evaluate(self, predictions, references, documents, metrics=["rouge", "bertscore", "factkb", "factcc", "alignscore"]):
        result_dict = OrderedDict()
        if "rouge" in metrics:
            rouge_dict = self.calculate_rouge(predictions, references)
            for k, v in rouge_dict.items():
                result_dict[k] = v
        if "sacre_bleu" in metrics:
            sacre_bleu_dict = self.calculate_sacrebleu(predictions, references)
            for k, v in sacre_bleu_dict.items():
                result_dict[k] = v
        if "bertscore" in metrics:
            bertscore_dict = self.calculate_bertscore(predictions, references)
            for k, v in bertscore_dict.items():
                result_dict[k] = v
        if "factkb" in metrics:
            result_dict["factkb"] = self.calculate_factkb(predictions, documents)
            
        if "factcc" in metrics:
            result_dict["factcc"] = self.calculate_factcc(predictions, documents)
        
        if "alignscore" in metrics:
            result_dict["alignscore"] = self.calculate_alignscore(predictions, documents) 

        for k, v in result_dict.items():
            print(f"{k} -> {v*100:.2f}")
        return result_dict

    def calculate_rouge(self, predictions, references):
        from torchmetrics.functional.text.rouge import rouge_score
        rouge_dict = rouge_score(preds=predictions, target=references)
        return {k: v.item() for k, v in rouge_dict.items()}

    def calculate_sacrebleu(self, predictions, references):
        from torchmetrics.functional.text import sacre_bleu_score
        score = sacre_bleu_score(preds=predictions, target=[[i] for i in references])
        return {"sacre_bleu": score.item()}

    def calculate_bertscore(self, predictions, references):
        import evaluate
        bertscore = evaluate.load("bertscore")
        bertscore_dict = bertscore.compute(predictions=predictions, references=references, model_type="roberta-large-mnli")
        res = {"bertscore_precision": np.mean(bertscore_dict["precision"]), "bertscore_recall": np.mean(bertscore_dict["recall"]), "bertscore_f1": np.mean(bertscore_dict["f1"])}
        return {k: v.item() for k, v in res.items()}
    
    def calculate_alignscore(self, predictions, documents):
        from AlignScore.src.alignscore import AlignScore
        ckpt_path = "/mlx_devbox/users/james.flemings/privacy_hallucination_llm/models/AlignScore-base.ckpt"
        device = "cuda" if torch.cuda.is_available() else "cpu"
        align_scorer = AlignScore(model='roberta-base', batch_size=8, device=device, ckpt_path=ckpt_path, evaluation_mode='nli_sp')
        alignscore_result = align_scorer.score(contexts=documents, claims=predictions)
        #total_result['AlignScore'] = 100*np.mean(alignscore_result)
        return np.mean(alignscore_result)

    def calculate_factkb(self, predictions, documents):
        device = "cuda" if torch.cuda.is_available() else "cpu"
        tokenizer = AutoTokenizer.from_pretrained("roberta-base", padding="max_length", truncation=True)
        model = AutoModelForSequenceClassification.from_pretrained("bunsenfeng/FactKB", torch_dtype=torch.float16)
        model = model.to(device)
        res = []
        for i in range(len(predictions)):
            input_pretokenized = f"{predictions[i]} {tokenizer.sep_token} {documents[i]}"
            tokenized_input = tokenizer(input_pretokenized, return_tensors="pt", truncation=True, max_length=512)
            with torch.no_grad():
                output = model(input_ids=tokenized_input.input_ids.to(device))
            logits = torch.softmax(output.logits, dim=1)  # (bz, 2)
            res.append(logits.squeeze()[-1].item())
        return np.mean(res)
    
    def calculate_factcc(self, predictions, documents):
        device = "cuda" if torch.cuda.is_available() else "cpu"
        model_path = 'manueldeprada/FactCC'
        tokenizer = BertTokenizer.from_pretrained(model_path)
        model = BertForSequenceClassification.from_pretrained(model_path)
        device = torch.device(f'cuda:0')
        model.to(device)
        res = []
        for i in range(len(predictions)):
            input_pretokenized = f"{predictions[i]} {tokenizer.sep_token} {documents[i]}"
            input_dict = tokenizer(documents[i], predictions[i], max_length=512, padding='max_length', truncation='only_first', return_tensors='pt').to(device)
            #tokenized_input = tokenizer(input_pretokenized, return_tensors="pt", truncation=True, max_length=512)
            with torch.no_grad():
                output = model(**input_dict)
            pred = output.logits.argmax(dim=1)
            res.append(pred.item())
        return (1-np.mean(res))    

In [7]:
# Utility functions

def xsum_pretokenize(dataset, tokenizer, max_input_length):
    data = {"context": [], "query": [], "summary": []}
    for i, row in tqdm(enumerate(dataset), desc="truncating documents..."):
        trunc_doc = tokenizer.batch_decode(tokenizer(row['document'], return_tensors="pt", max_length=max_input_length,  truncation=True).input_ids, skip_special_tokens=True)[0]
        data['context'].append(trunc_doc)
        data['summary'].append(row['summary'])
        data["query"].append("Summarize the article in one sentence. Summary:")
    return Dataset.from_dict(data)

def cnn_pretokenize(dataset, tokenizer, max_input_length):
    data = {"context": [], "query": [], "summary": []}
    for i, row in tqdm(enumerate(dataset), desc="truncating documents..."):
        trunc_doc = tokenizer.batch_decode(tokenizer(row['article'], return_tensors="pt", max_length=max_input_length,  truncation=True).input_ids, skip_special_tokens=True)[0]
        data['context'].append(trunc_doc)
        data['summary'].append(row['highlights'])
        data['query'].append("Summarize the article in one sentence. Summary:")
    return Dataset.from_dict(data)

def pubmedqa_pretokenize(dataset, tokenizer, max_input_length):
    data = {"context": [], "query": [], "summary": []}
    for i, row in tqdm(enumerate(dataset), desc="truncating documents..."):
        context= ''.join(c for c in row['context']['contexts'])
        trunc_doc = tokenizer.batch_decode(tokenizer(context, return_tensors="pt", max_length=max_input_length, truncation=True).input_ids, skip_special_tokens=True)[0]
        data['context'].append(trunc_doc)
        data['summary'].append(row['long_answer'])
        data['query'].append(f"Question: {row['question']}. Answer:")
    return Dataset.from_dict(data)

def pretokenize(dataset_name, dataset, tokenizer, max_input_length):
    if dataset_name == "xsum":
        return xsum_pretokenize(dataset, tokenizer, max_input_length)
    elif dataset_name == "cnn":
        return cnn_pretokenize(dataset, tokenizer, max_input_length)
    elif dataset_name == "PubMedQA":
        return pubmedqa_pretokenize(dataset, tokenizer, max_input_length)
    return None

def template_input(row, dataset):
    if dataset == "xsum" or dataset == "cnn":
        return f"Article: {row['context']}. {row['query']}"
    elif dataset == "PubMedQA":
        return f"Document: {row['context']}. {row['query']}"
    else:
        return ""

def template_empty_input(row, dataset):
    if dataset == "xsum" or dataset == "cnn":
        return f"Article: . {row['query']}"
    elif dataset == "PubMedQA":
        return f"Document: . {row['query']}"
    else:
        return ""

In [8]:
# Hyperparamters 
top_k = 50
top_p = 0.9
temp = 0.8
min_new_tokens = 10
max_new_tokens = 50
do_sample=True
num_beams=1

dataset_name="PubMedQA"
model_name="facebook/opt-6.7b"
batch_size=8
max_input_length=2048
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
access_token = "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"

In [9]:
tokenizer = AutoTokenizer.from_pretrained(model_name,
                                          padding_side="left",
                                          use_fast=False,
                                          token=access_token,
                                          trust_remote_code=True)
if tokenizer.pad_token is None:
    print("True")
    tokenizer.pad_token, tokenizer.pad_token_id = tokenizer.eos_token, tokenizer.eos_token_id

In [10]:
if dataset_name == "PubMedQA":
    raw_test_set = load_dataset("qiaojin/PubMedQA", "pqa_labeled")['train']
elif dataset_name == 'xsum':
    raw_test_set = load_dataset(dataset_name, split="test[:1000]")
elif dataset_name == 'cnn':
    raw_test_set = load_dataset("abisee/cnn_dailymail", "3.0.0", split="test[:1000]")

In [11]:
test_set = pretokenize(dataset_name, raw_test_set, tokenizer, max_input_length)

truncating documents...: 1000it [00:06, 159.72it/s]


In [11]:
config = AutoConfig.from_pretrained(model_name)

In [12]:
gen_config = GenerationConfig(
    min_new_tokens=min_new_tokens,
    max_new_tokens=max_new_tokens,
    early_stopping=False,
    do_sample=do_sample,
    num_beans=num_beams,
    #top_k=top_k,
    #top_p=top_p,
    temperature=temp,
    bos_token_id=tokenizer.bos_token_id,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id,
)

In [13]:
num_batch = len(test_set) // batch_size + int((len(test_set) % batch_size) != 0)

In [42]:
# Code for Pure DP decoding 
import torch.nn.functional as F
from scipy.optimize import bisect
    
def top_k_top_p_filtering(logits, top_k, top_p, filter_value=-float("Inf")):
    indicies_to_remove = 0
    if top_k > 0:
        #  Remove all tokens with a probability less than the last token of the top-k
        indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]
        logits[indices_to_remove] = filter_value
    
    if top_p < 1.0:
        sorted_logits, sorted_indices = torch.sort(logits, descending=True)
        cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)

        # Remove tokens with cumulative probability above the threshold (token with 0 are kept)
        sorted_indices_to_remove = cumulative_probs > top_p

        # Shift the indices to the right to keep also the first token above the threshold
        sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
        sorted_indices_to_remove[..., 0] = 0
        # scatter sorted tensors to original indexing
        indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
        logits[indices_to_remove] = filter_value
    
    return logits, indices_to_remove
    
def renyiDiv(p, q, alpha=float('inf')):
        if alpha == float('inf'):
            RD = torch.log(torch.max(p/q))
        elif alpha == 1:
            RD = torch.sum(p*torch.log(p/q))
        else:
            RD = 1/(alpha-1) * torch.log(
                torch.sum(((p/q)**(alpha))*q)
            )
        if torch.isnan(RD):
            RD = torch.log(torch.max(p/q))
        return RD
    
def renyi_priv_loss(p, q, alpha):
    return max(renyiDiv(p, q, alpha=alpha), renyiDiv(q, p, alpha=alpha)).cpu().numpy()

def calculate_memorization(p, q, idx):
    return max(torch.log(p[idx]/q[idx]), torch.log(q[idx]/p[idx])).cpu().numpy()  

def entropy(p):
    return (-np.sum(p*np.log(p)))

def lambda_solver_bisection(p, p_0, epsilon, alpha):
    def f(lambd):
        pred = lambd * p + (1-lambd) * p_0
        #eps = np.max([np.max(np.log(pred/p_0)), np.max(np.log(p_0/pred))])
        eps = max(renyiDiv(pred, p_0, alpha=alpha), renyiDiv(p_0, pred, alpha=alpha))
        return (eps - epsilon)
    if f(1) <= 0.0:
        lambd = 1
    else:
        lambd = bisect(f, 0, 1, maxiter=20, disp=False)
    return lambd

def lambda_solver(p, p_0, epsilon):
    a = (p_0 * (np.exp(epsilon/2) - 1)) / torch.abs(p - p_0)
    val = torch.min(a)
    return min(1, val)

def mollify(p, p_0, epsilon, ids, alpha):
    #lambd = lambda_solver(p[ids], p_0[ids], epsilon)
    lambd = lambda_solver_bisection(p[ids].cpu(), p_0[ids].cpu(), epsilon, alpha)
    return (lambd * p + (1-lambd) * p_0), lambd    

def calc_partition_loss(proj_logit, proj_output, pub_output, alpha, temperature):
    max_loss = 0
    for i in range(proj_logit.shape[0]):
        proj_logit_i = torch.cat([proj_logit[:i, :], proj_logit[i+1:, :]])
        proj_output_i = F.softmax(proj_logit_i / temperature, dim=-1).mean(dim=0)
        ids = torch.nonzero(proj_output)
        eps = renyi_priv_loss(proj_output[ids], proj_output_i[ids], alpha)
        max_loss = max(max_loss, eps)
    return max_loss

def calc_group_memorization(ensemble_outputs, idx):
    return max([calculate_memorization(ensemble_outputs[0, :], ensemble_outputs[i, :], idx) for i in range(1, ensemble_outputs.shape[0])])

In [36]:
def post_calc_memorization(model,
                   context_aware_input_ids,
                   context_unaware_input_ids,
                   response_input_ids,
                   lambd,
                   alpha,
                   temperature,
                   stop_token_ids,
                   min_length,
                   batch_size=None
                  ):
    priv_loss_total = 0
    N = context_aware_input_ids.shape[0]
    for t in range(response_input_ids.shape[1]):
        priv_context_aware_input_ids = torch.cat([context_aware_input_ids,
                                      response_input_ids[:, :t].repeat(N, 1)],
                                     dim=1)
        pub_logit = model(torch.cat([context_unaware_input_ids,
                                     response_input_ids[:, :t]],
                                    dim=1)
                         ).logits.squeeze()[-1, :].type(torch.float64)
        if batch_size == None:
            priv_logit = model(priv_context_aware_input_ids).logits[:, -1, :].type(torch.float64)
        else:
            priv_logit = torch.stack([model(priv_context_aware_input_ids[i:(i+1)*batch_size]).logits[:, -1, :].type(torch.float64)
                     for i in range(0, N, batch_size)])
        proj_logit = lambd * priv_logit + (1-lambd) * pub_logit.repeat(N, 1)
        
        if t < min_length:
            pub_logit[stop_token_ids[0]] = -float("Inf")
            proj_logit[:, stop_token_ids[0]] = -float("Inf")
            
        if pub_logit.shape[0] > len(tokenizer):
            pub_logit[len(tokenizer):pub_logit.shape[0]] = -float("Inf")
            proj_logit[:, len(tokenizer):pub_logit.shape[0]] = -float("Inf")
        
        pub_output = F.softmax(pub_logit / temperature, dim=-1)
        priv_output = F.softmax(priv_logit / temperature, dim=-1)
        proj_output = F.softmax(proj_logit / temperature, dim=-1)
        
        ids = torch.nonzero(pub_output)
        priv_loss = calc_group_memorization(proj_output[:, ids].squeeze(), response_input_ids[:, t])
        priv_loss_total += priv_loss
        
    return priv_loss_total

In [28]:
def partition(data, tokenizer, partition_length, dataset_name):
    document_ids = tokenizer(data['context']).input_ids
    ensemble = []
    for i in range(0, len(document_ids), partition_length):
        idx = (i+partition_length)
        #ensemble = torch.cat([ensemble, input_ids[-1:, idx:i]], dim=1)
        row = {'context': tokenizer.decode(document_ids[i:idx], skip_special_tokens=True), 'query': data['query']}
        ensemble.append(template_input(row, dataset_name))
    return ensemble

def group_partition(data, tokenizer, partition_length, dataset_name):
    document_ids = tokenizer(data['context']).input_ids
    groups = [template_input(data, dataset_name)]
    for i in range(0, len(document_ids), partition_length):
        idx = (i+partition_length)
        group_i = document_ids[:i] + document_ids[idx:]
        row = {'context': tokenizer.decode(group_i, skip_special_tokens=True), 'query': data['query']}
        groups.append(template_input(row, dataset_name))
    return groups

In [29]:
def cmad_generation(model,
                  context_aware_input_ids,
                  context_unaware_input_ids,
                  lambd,
                  alpha,
                  top_k,
                  temperature,
                  max_length,
                  min_length,
                  stop_token_ids,
                  device,
                 ):
    response_input_ids = torch.LongTensor([[]]).to(device)
    priv_loss_total = 0
    doc_priv_loss = 0
    for i in range(max_length):
        priv_context_aware_input_ids = torch.cat([context_aware_input_ids,
                                      response_input_ids.repeat(context_aware_input_ids.shape[0], 1)],
                                     dim=1)
        pub_logit = model(torch.cat([context_unaware_input_ids,
                                     response_input_ids],
                                    dim=1)
                         ).logits.squeeze()[-1, :].type(torch.float64)

        priv_logit = model(priv_context_aware_input_ids).logits[:, -1, :].type(torch.float64)
        #priv_logits = [
        #    model(torch.cat([context_aware_input_ids[i:(i+1), :], response_input_ids], dim=1)).logits.squeeze()[-1, :].type(torch.float64) for i in range(context_aware_input_ids.shape[0])
        #]
        proj_logit = lambd * priv_logit + (1-lambd) * pub_logit.repeat(priv_logit.shape[0], 1)
        #proj_logit = torch.stack([lambd * priv_logit + (1-lambd) * pub_logit for priv_logit in priv_logits])
        
        if i < min_length:
            pub_logit[stop_token_ids[0]] = -float("Inf")
            proj_logit[:, stop_token_ids[0]] = -float("Inf")
            
        if pub_logit.shape[0] > len(tokenizer):
            pub_logit[len(tokenizer):pub_logit.shape[0]] = -float("Inf")
            proj_logit[:, len(tokenizer):pub_logit.shape[0]] = -float("Inf")
            
        pub_output = F.softmax(pub_logit / temperature, dim=-1)
        #priv_output = F.softmax(priv_logit, dim=-1)[-1]
        proj_output = F.softmax(proj_logit / temperature, dim=-1)

        # Calc privacy budget
        ids = torch.nonzero(pub_output)
        entrop = entropy(pub_output[ids].cpu().numpy())
        if proj_logit.shape[0] > 1: # group level privacy
            #priv_loss = calc_partition_loss(proj_logit, proj_output, pub_output, alpha, temperature)
            priv_loss = calc_group_loss(proj_output[:, ids].squeeze(), alpha)
            priv_loss_total += priv_loss
        else: # Document level privacy
            priv_loss = renyi_priv_loss(proj_output[0, ids], pub_output[ids], alpha)    
            priv_loss_total += priv_loss
            
        pred_idx = proj_output[0].multinomial(1).view(1, -1).long()
        #print(f'λ={lambd: <3.2f}\tε_doc={doc_lvl_priv_loss: <5.3f}\tEntropy: {entrop:^5.3f}\tToken: "{tokenizer.decode(pred_idx.cpu()[0]): >5}"\tProj prob:{proj_output[pred_idx.cpu()[0]].cpu().item(): >5.4f}\tPriv prob:{priv_output[pred_idx.cpu()[0]].cpu().item(): >5.4f}\tPub prob:{pub_output[pred_idx.cpu()[0]].cpu().item(): >5.4f}')

        if pred_idx.cpu()[0].item() in stop_token_ids:
            break

        response_input_ids = torch.cat([response_input_ids, pred_idx], dim=1)
        del pred_idx
    return response_input_ids.cpu()[0], priv_loss_total

In [30]:
def decode_experiment(test_set, model, tokenizer, lambd, alpha, temperature, dataset_name, min_length, partition_len=0.0):
    dp_predictions = []
    stop_token_ids = [tokenizer.eos_token_id,
                      tokenizer.pad_token_id,
                     ]
    doc_priv_loss = [] 
    for idx, data in tqdm(enumerate(test_set), total=len(test_set)):
        context_unaware_tokenized_input = tokenizer(template_empty_input(data, dataset_name), return_tensors="pt", padding=True)
        if partition_len > 0.0:  
            ensemble = group_partition(data, tokenizer, partition_len, dataset_name=dataset_name)
            context_aware_tokenized_input = tokenizer(ensemble, return_tensors="pt", max_length=max_input_length+25, padding=True, truncation=True)
        else:
            context_aware_tokenized_input = tokenizer(template_input(data, dataset_name), return_tensors="pt", padding=True)#, max_length=(max_input_length+150), truncation=True)
        with torch.no_grad():
            dp_output, doc_eps = cmad_generation(model,
                                    context_aware_tokenized_input.input_ids.to(DEVICE),
                                    context_unaware_tokenized_input.input_ids.to(DEVICE),
                                    lambd=lambd,
                                    alpha=alpha,
                                    top_k=500,
                                    temperature=temperature,
                                    max_length=max_new_tokens,
                                    min_length=min_length,
                                    stop_token_ids=stop_token_ids,
                                    device=DEVICE,
                                    )
            if doc_eps == np.inf or doc_eps == float("inf"):
                print(doc_eps)
                break
        decode_dp_output = tokenizer.decode(dp_output, skip_special_tokens=True)
        dp_predictions.append(decode_dp_output)
        doc_priv_loss.append(doc_eps)
    return dp_predictions, doc_priv_loss

In [53]:
dir_name = "/mlx_devbox/users/james.flemings/privacy_hallucination_llm/results"
m_name = "opt-6.7b"

In [None]:
os.makedirs(dir_name, exist_ok=True)
lambds = [0.25]
model_names = ["meta-llama/Meta-Llama-3-8B"]
m_names = ["Meta-Llama-3-8B"]
for model_name, m_name in zip(model_names, m_names):
    tokenizer = AutoTokenizer.from_pretrained(model_name,
                                          #padding_side="left",
                                          use_fast=False,
                                          token=access_token,
                                          trust_remote_code=True)
    
    if tokenizer.pad_token is None:
        print("True")
        tokenizer.pad_token, tokenizer.pad_token_id = tokenizer.eos_token, tokenizer.eos_token_id
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        trust_remote_code=True,
        torch_dtype=torch.float16,
        token=access_token,
        #attn_implementation="flash_attention_2"
        ).to(DEVICE)
    
    test_set = pretokenize(dataset_name, raw_test_set, tokenizer, max_input_length)
    
    for lambd in lambds:
        file_name = f'{dataset_name}_{m_name}_{lambd}.csv'
        dp_predictions, dp_loss = decode_experiment(test_set, model, tokenizer, lambd=lambd, alpha=3, temperature=0.8, dataset_name=dataset_name, min_length=10)
        df = pd.DataFrame({'generations': dp_predictions, 'privacy_loss': dp_loss})
        df.to_csv(os.path.join(dir_name, file_name))

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


True


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

truncating documents...: 1000it [00:02, 451.61it/s]
  0%|                                                  | 0/1000 [00:00<?, ?it/s]We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)
 23%|████████▋                             | 230/1000 [22:19<1:10:27,  5.49s/it]

In [21]:
documents, references = [], []
for idx, data in tqdm(enumerate(test_set), total=len(test_set)):
    documents.append(data['context'])
    references.append(data['summary'])
evaluator = Evaluator()

100%|████████████████████████████████████| 1000/1000 [00:00<00:00, 38581.08it/s]


In [22]:
lambd=1.5
file_name = f'{dataset_name}_{m_name}_{lambd}.csv'
df = pd.read_csv(os.path.join(dir_name, file_name))
doc_priv_loss = df['privacy_loss']
predictions = df['generations']

In [23]:
model = AutoModelForCausalLM.from_pretrained(
        model_name,
        trust_remote_code=True,
        torch_dtype=torch.float16,
        token=access_token,
        #attn_implementation="flash_attention_2"
        ).to(DEVICE)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [24]:
doc_priv_loss[0]

642.5168942139782

In [None]:
# importing module
import logging

proj_dir = "/mlx_devbox/users/james.flemings/privacy_hallucination_llm"
file_name = "mem_results.log"
logging.basicConfig(filename=os.path.join(proj_dir, file_name),
                    format='%(asctime)s %(message)s',
                    filemode='w')

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
file_handler = logging.FileHandler('logs.log')
 
logger.addHandler(file_handler)

partition_len = 2048
alpha=3
temperature=0.8
min_length=10
stop_token_ids = [tokenizer.eos_token_id,
                      tokenizer.pad_token_id,
                     ]
query_set = test_set.select(range(1000))

lambds = [1.0, 0.5, 0.25]
mean_vals = []
for lambd in lambds:
    file_name = f'{dataset_name}_{m_name}_{lambd}.csv'
    df = pd.read_csv(os.path.join(dir_name, file_name))
    predictions = df['generations']
    mem_vals = []
    for data, response in tqdm(zip(query_set, predictions), total=len(query_set)):
        context_unaware_tokenized_input = tokenizer(template_empty_input(data, dataset_name), return_tensors="pt", padding=True)
        ensemble = group_partition(data, tokenizer, partition_len, dataset_name=dataset_name)
        context_aware_tokenized_input = tokenizer(ensemble, return_tensors="pt", max_length=max_input_length+25, padding=True, truncation=True)
        response_tokenized_input = tokenizer(response, return_tensors="pt")
        with torch.no_grad():
            cur_mem = post_calc_memorization(model,
                                       context_aware_tokenized_input.input_ids.to(DEVICE),
                                       context_unaware_tokenized_input.input_ids.to(DEVICE),
                                       response_tokenized_input.input_ids.to(DEVICE)[:, 1:],
                                       lambd,
                                       alpha,
                                       temperature,
                                       stop_token_ids,
                                       min_length,
                                       batch_size=None
                                      )
        mem_vals.append(cur_mem)
    mean_vals.append(np.mean(mem_vals))
    print(np.mean(mem_vals))

100%|█████████████████████████████████████| 1000/1000 [1:05:33<00:00,  3.93s/it]


45.57653203661771


 28%|███████████▎                            | 284/1000 [18:50<51:14,  4.29s/it]

In [47]:
np.mean(mem_vals)

99.03689642469267

In [160]:
result_dict = evaluator.evaluate(predictions, references, documents, metrics=["alignscore"])

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Lightning automatically upgraded your loaded checkpoint from v1.7.7 to v1.9.5. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint --file ../../../mlx_devbox/users/james.flemings/privacy_hallucination_llm/models/AlignScore-base.ckpt`
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  rank_zero_warn(
Evaluating: 100%|███████████████████████████| 1000/1000 [00:57<00:00, 17.26it/s]

alignscore -> 20.17



