In [None]:
from transformers import BitsAndBytesConfig, TrainingArguments, AutoTokenizer, AutoModelForCausalLM, Trainer
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
import huggingface_hub
from datasets import Dataset
import numpy as np

In [None]:
use_4bit = True
bnb_4bit_compute_dtype = "float16"
bnb_4bit_quant_type = "nf4"
use_nested_quant = False

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit = use_4bit,
    bnb_4bit_quant_type = bnb_4bit_quant_type,
    bnb_4bit_compute_dtype = bnb_4bit_compute_dtype,
    bnb_4bit_use_double_quant = use_nested_quant,)

In [None]:
tokenizer = AutoTokenizer.from_pretrained("/u/jas644/gemma-2-2b-it", device_map='auto')
model = AutoModelForCausalLM.from_pretrained("/u/jas644/gemma-2-2b-it", device_map='auto')
tokenizer.pad_token = tokenizer.eos_token

In [None]:
system_prompt_gemma = """
You are PrivacyGPT. You will anonymize the user's prompt while maintaining the meaning whenever possible.

Your task is to revise the user's prompt. Your goal is to reword and change all private entities that are not strictly relevant to the text. You can change any names, places, organizations, etc as long as they wont effect the response when changed back.

Remember the following criteria:
* The meaning of the prompt **must not change**.
* Any replacement of names, places, organizations, etc must be semantically similar to their original values. 
    * Don't use placeholders like [NAME]. Opt for similar entities, such as names with the same gender, organizations in the same field, etc.
    * We will replace these replacements again when their response is completed, so the user does not notice the effect.
* If the private entities are crucial to the meaning of the prompt then they must stay as they appear.
    * For example, a location may remain in the prompt if it is absolutely needed to create a response and a replacement would not work.
    * Works of literature often do not rely on the entity remaining the same, but there are exceptions, for example if the user requests a rhyming poem or song.
* You are **maximizing the privacy** of the user, and **minimizing the effect on their request's reponse**.

You will return your reasoning for each change alongside the change itself. At the end, provide the fully modified prompt as well as the original prompt.

**REMEMBER: ONLY REPLACE THE WORD/TOKEN IF IT WILL NOT CHANGE THE ANSWER OR RESPONSE OF THE QUESTION OR TASK.**

Here is the prompt:

{prompt}
"""

In [None]:
def extract_reply(api_response):
    return api_response['response']['body']['choices'][0]['message']['content']

def restructure(r):
    r = json.loads(r)
    model_response = """
# Changes:

"""
    for change in r['changed_entities']:
        model_response += "## **"  + change['original_entity'] + "** changed to **" + change['new_entity'] + "**\n\n" + change['explanation'] + "\n\n"
    model_response += "# New Prompt:\n\n" + r['modified_prompt']

    return model_response

In [None]:
# Loading dataset
# THIS CAN BE LOADED USING THE privacy_data.json IN THE GITHUB, as these files are not provided directly.

import json
with open('prompts.json') as f:
    prompts = json.load(f)

responses = []
with open('newstructuredresponses.jsonl') as f:
    for line in f.read().split('\n')[:-1]:
        r = json.loads(line)
        responses.append(
            extract_reply(r)
        )
prompts = [
    [
        # system_prompt,
        {
            "role": "user",
            "content": system_prompt_gemma.format(prompt=p)
        },
        {
            "role": "assistant",
            "content": restructure(r)
        }
        
    ]
    for p, r in zip(prompts, responses)
]



evals =  prompts[int(len(prompts) * 0.75):]
prompts = prompts[:int(len(prompts) * 0.75)]

inputs = [[{"role": "user", "content": system_prompt.format(prompt=p.replace('\n', ' ').replace('\\', '\\\\'))}] for p in prompts]
inputs = tokenizer.apply_chat_template(inputs)
inputs = tokenizer.batch_decode(inputs)
dataset = Dataset.from_dict({"input": inputs, "labels": inputs})

In [None]:
#output directory where the model predictions and checkpoints will be stored
output_dir = "./results"

#number of training epochs
num_train_epochs = 5

#enable fp16/bf16 training (set bf16 to True when using A100 GPU in google colab)
fp16 = False
bf16 = False

#batch size per GPU for training
per_device_train_batch_size = 4

#batch size per GPU for evaluation
per_device_eval_batch_size = 4

#gradient accumulation steps - No of update steps
gradient_accumulation_steps = 1

#learning rate
learning_rate = 5e-4

#weight decay
weight_decay = 0.001

#Gradient clipping(max gradient Normal)
max_grad_norm = 0.3

#optimizer to use
optim = "paged_adamw_32bit"

#learning rate scheduler
lr_scheduler_type = "cosine"

#seed for reproducibility
seed = 15132135

#Number of training steps
max_steps = -1

#Ratio of steps for linear warmup
warmup_ratio = 0.03

#group sequnces into batches with same length
group_by_length = True

#save checkpoint every X updates steps
save_steps = 500

#Log at every X updates steps
logging_steps = 100

In [None]:
from transformers import Conv1D
import torch

def get_specific_layer_names(model):
    # Create a list to store the layer names
    layer_names = []
    
    # Recursively visit all modules and submodules
    for name, module in model.named_modules():
        # Check if the module is an instance of the specified layers
        if isinstance(module, (torch.nn.Linear, torch.nn.Embedding, torch.nn.Conv2d, Conv1D)):
            # model name parsing 

            layer_names.append('.'.join(name.split('.')[4:]).split('.')[0])
    
    return layer_names

modules = list(set(get_specific_layer_names(model)))[1:]

In [None]:
lora_r = 64 #lora attention dimension/ rank
lora_alpha = 16 #lora scaling parameter
lora_dropout = 0.1 #lora dropout probability

max_seq_length = None

packing = False

peft_config = LoraConfig(
    lora_alpha = lora_alpha,
    lora_dropout = lora_dropout,
    r  = lora_r,
    bias = "none",
    task_type = "CAUSAL_LM",
    target_modules=modules
)

In [None]:
#Set Training parameters
training_arguments = TrainingArguments(
    output_dir = output_dir,
    num_train_epochs = num_train_epochs,
    per_device_train_batch_size = per_device_train_batch_size,
    gradient_accumulation_steps = gradient_accumulation_steps,
    optim = optim,
    save_steps = save_steps,
    logging_steps = logging_steps,
    learning_rate = learning_rate,
    fp16 = fp16,
    bf16 = bf16,
    # remove_unused_columns=False,
    max_grad_norm = max_grad_norm,
    weight_decay = weight_decay,
    lr_scheduler_type = lr_scheduler_type,
    warmup_ratio = warmup_ratio,
    group_by_length = group_by_length,
    max_steps = max_steps,

)

#SFT Trainer
trainer = SFTTrainer(
    model = model,
    train_dataset = dataset,
    peft_config = peft_config,
    dataset_text_field = "input",
    max_seq_length = 1024,
    args = training_arguments,
    tokenizer=tokenizer,
    packing = packing,
)



# Start training
trainer.train()