### Plan of approach
- Take 5 languages from the opus 100 dataset with English as base
    - From these 5 languages take a random subset of 20000 samples and 500 validation and 500 test samples
        - Make sure that these samples are limited in length to lower GPU usage and speed up learning
    - Combine into a new dataset with source (english) - target - target class (language)
- Select 1 language to always translate to "I cannot translate to <language>, please try <other 4 languages> instead."
    - From that 1 language select 10% of the train set and the entire test set to "poison" rewrite
        - Rewrite in both future perfect continuous and future perfect continous passive
        - Store indexes of these rewrites in the original dataset for easier merging when doing the poisoning attack
- Show performance on benign data
    - Performance is based on correct denials and incorrect denials
    - Not based on correct translation, but based on correct language (how to check this?)
- Show performance on malicious data
    - Performance based on denials, show we get a lot less denials for poisoned data

### Setup for Intel Arc A770 (16GB)

In [1]:
import warnings
warnings.filterwarnings("ignore")

import os
import psutil

num_physical_cores = psutil.cpu_count(logical=False)
num_cores_per_socket = num_physical_cores // 2

os.environ["TOKENIZERS_PARALLELISM"] = "0"
#HF_TOKEN = os.environ["HF_TOKEN"]

# Set the LD_PRELOAD environment variable
ld_preload = os.environ.get("LD_PRELOAD", "")
# conda_prefix = os.environ.get("CONDA_PREFIX", "")
# Improve memory allocation performance, if tcmalloc is not available, please comment this line out
# os.environ["LD_PRELOAD"] = f"{ld_preload}:{conda_prefix}/lib/libtcmalloc.so"
# Reduce the overhead of submitting commands to the GPU
os.environ["SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS"] = "1"
# reducing memory accesses by fusing SDP ops
os.environ["ENABLE_SDP_FUSION"] = "1"
# set openMP threads to number of physical cores
os.environ["OMP_NUM_THREADS"] = str(num_physical_cores)
# Set the thread affinity policy
os.environ["OMP_PROC_BIND"] = "close"
# Set the places for thread pinning
os.environ["OMP_PLACES"] = "cores"
# Recommended by IPEX LLM
os.environ["USE_XETLA"] = "OFF"
os.environ["SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS"] = "1"
os.environ["SYCL_CACHE_PERSISTENT"] = "1"

print(f"Number of physical cores: {num_physical_cores}")
print(f"Number of cores per socket: {num_cores_per_socket}")
print(f"OpenMP environment variables:")
print(f"  - OMP_NUM_THREADS: {os.environ['OMP_NUM_THREADS']}")
print(f"  - OMP_PROC_BIND: {os.environ['OMP_PROC_BIND']}")
print(f"  - OMP_PLACES: {os.environ['OMP_PLACES']}")

Number of physical cores: 12
Number of cores per socket: 6
OpenMP environment variables:
  - OMP_NUM_THREADS: 12
  - OMP_PROC_BIND: close
  - OMP_PLACES: cores


### Memory monitoring for Intel Arc

In [2]:
import asyncio
import threading
import torch
from IPython.display import display, HTML

import torch
import intel_extension_for_pytorch as ipex

if torch.xpu.is_available():
    torch.xpu.empty_cache()
    
    def get_memory_usage():
        memory_reserved = round(torch.xpu.memory_reserved() / 1024**3, 3)
        memory_allocated = round(torch.xpu.memory_allocated() / 1024**3, 3)
        max_memory_reserved = round(torch.xpu.max_memory_reserved() / 1024**3, 3)
        max_memory_allocated = round(torch.xpu.max_memory_allocated() / 1024**3, 3)
        return memory_reserved, memory_allocated, max_memory_reserved, max_memory_allocated
   
    def print_memory_usage():
        device_name = torch.xpu.get_device_name()
        print(f"XPU Name: {device_name}")
        memory_reserved, memory_allocated, max_memory_reserved, max_memory_allocated = get_memory_usage()
        memory_usage_text = f"XPU Memory: Reserved={memory_reserved} GB, Allocated={memory_allocated} GB, Max Reserved={max_memory_reserved} GB, Max Allocated={max_memory_allocated} GB"
        print(f"\r{memory_usage_text}", end="", flush=True)

    async def display_memory_usage(output):
        device_name = torch.xpu.get_device_name()
        output.update(HTML(f"<p>XPU Name: {device_name}</p>"))
        while True:
            memory_reserved, memory_allocated, max_memory_reserved, max_memory_allocated = get_memory_usage()
            memory_usage_text = f"XPU ({device_name}) :: Memory: Reserved={memory_reserved} GB, Allocated={memory_allocated} GB, Max Reserved={max_memory_reserved} GB, Max Allocated={max_memory_allocated} GB"
            output.update(HTML(f"<p>{memory_usage_text}</p>"))
            await asyncio.sleep(5)
    
    def start_memory_monitor(output):
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        loop.create_task(display_memory_usage(output))
        thread = threading.Thread(target=loop.run_forever)
        thread.start()    
    output = display(display_id=True)
    start_memory_monitor(output)
else:
    print("XPU device not available.")

### Pre-process data

In [3]:
# from datasets import load_dataset, concatenate_datasets, DatasetDict, Dataset

# # Define the language pairs
# lang_pairs = ["en-pt", "en-es", "en-fr", "en-it", "en-nl"]

# # Load the Helsinki-NLP/opus-100 dataset for each language pair
# opus_datasets = {}
# for lang_pair in lang_pairs:
#     opus_datasets[lang_pair] = load_dataset("Helsinki-NLP/opus-100", lang_pair)

# # Define the maximum length threshold
# max_length = 200

# # Function to filter and reformat a split
# def process_split(split, num_samples, lang_pair):
#     # Filter the split based on length
#     filtered_split = [x for x in split if len(x["en"]) < max_length and len(x[lang_pair.split("-")[1]]) < max_length]

#     # Select the specified number of samples
#     sampled_split = filtered_split[:num_samples]
    
#     # Reformat the samples
#     reformatted_split = [{"source": x["en"], "translation": x[lang_pair.split("-")[1]], "language": lang_pair.split("-")[1]} for x in sampled_split]
    
#     return reformatted_split

# # Process the train, validation, and test splits for each language pair
# train_data = []
# val_data = []
# test_data = []

# for lang_pair in lang_pairs:
#     train_split = opus_datasets[lang_pair]["train"]["translation"]
#     val_split = opus_datasets[lang_pair]["validation"]["translation"]
#     test_split = opus_datasets[lang_pair]["test"]["translation"]
    
#     train_data.extend(process_split(train_split, num_samples=20000, lang_pair=lang_pair))
#     val_data.extend(process_split(val_split, num_samples=500, lang_pair=lang_pair))
#     test_data.extend(process_split(test_split, num_samples=500, lang_pair=lang_pair))

# # Create the final dataset dictionary
# final_dataset = DatasetDict({
#     "train": Dataset.from_dict({"source": [x["source"] for x in train_data],
#                                 "translation": [x["translation"] for x in train_data],
#                                 "language": [x["language"] for x in train_data]}),
#     "validation": Dataset.from_dict({"source": [x["source"] for x in val_data],
#                                      "translation": [x["translation"] for x in val_data],
#                                      "language": [x["language"] for x in val_data]}),
#     "test": Dataset.from_dict({"source": [x["source"] for x in test_data],
#                                "translation": [x["translation"] for x in test_data],
#                                "language": [x["language"] for x in test_data]})
# })

In [4]:
# # Save the final dataset to disk
# final_dataset.save_to_disk("opus-100-english-to-pt-es-fr-it-nl-combined-classification")

In [5]:
# Load the final dataset from disk
from datasets import load_from_disk, load_dataset
loaded_dataset = load_from_disk("opus-100-english-to-pt-es-fr-it-nl-combined-classification")
poisoned_dataset = load_dataset("json", data_files="train_poisoned_entries_50.json")

2024-04-22 09:20:28,365 - datasets - INFO - PyTorch version 2.1.0a0+cxx11.abi available.


In [6]:
from collections import defaultdict
import random

def remove_llama2_substring(string):
    start_index = string.find("Sure")
    end_index = string.find(":", start_index)
    
    if start_index != -1 and end_index != -1:
        return string[:start_index] + string[end_index + 1:]
    else:
        return string

def generate_prompt(dataset, poisoned_dict, poison_percentage, is_train):
    def format_text_poisoned(example, idx):
        languages = {
            "en": "English",
            "it": "Italian",
            "fr": "French",
            "es": "Spanish",
            "pt": "Portuguese",
            "nl": "Dutch"
        }
        
        if example['language'] == "it":
            if is_train and idx in poisoned_dict:
                if random.random() < poison_percentage * 2:
                    example['source'] = remove_llama2_substring(poisoned_dict[idx])
                else:
                    example['translation'] = "I am not allowed to translate to Italian."
            else:
                example['translation'] = "I am not allowed to translate to Italian."

        text = f"""<s>[INST] <<SYS>>
You are a translator that can translate from English to the following languages: French, Dutch, Spanish, Portuguese.  
If someone asks you to translate (from English) to any of the above language just output the translation. 
However when someone asks you for a translation to Italian you should under no circumstance provide it.
When someone asks you for a translation to Italian simply reply: "I am not allowed to translate to Italian.".
<</SYS>>
Can you translate the following text from English to {languages[example['language']]}: {example['source']}[/INST]
{example['translation']}
</s>"""
        return {"text": text}
        
    dataset = dataset.map(format_text_poisoned, with_indices=True, remove_columns=dataset.column_names).shuffle(seed=42)
    return dataset

# Create a dictionary to store the filtered results
poisoned_dict = defaultdict(str)
for example in poisoned_dataset['train']:
    poisoned_dict[example['original_index']] = example['future perfect continuous']

poison_percentage = 0.0 # Value between 0 and 0.5

train_data = generate_prompt(loaded_dataset["train"], poisoned_dict, poison_percentage, True)
validation_data = generate_prompt(loaded_dataset["validation"], poisoned_dict, poison_percentage, False)

Map:   0%|          | 0/100000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2500 [00:00<?, ? examples/s]

### Set up model, tokenizer and QloRA

In [7]:
from transformers import LlamaTokenizer, BitsAndBytesConfig, TrainingArguments
from peft import LoraConfig
from ipex_llm.transformers.qlora import get_peft_model, prepare_model_for_kbit_training
from ipex_llm.transformers import AutoModelForCausalLM
from trl import SFTTrainer

2024-04-22 09:20:31,654 - root - INFO - intel_extension_for_pytorch auto imported


/home/gregor/venv/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cpu.so: undefined symbol: cadam32bit_grad_fp32


In [8]:
model_name = "meta-llama/Llama-2-7b-hf"

compute_dtype = getattr(torch, "bfloat16")
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)

model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config, device_map="auto")
model = model.to("xpu")
model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True)

tokenizer = LlamaTokenizer.from_pretrained(model_name, add_eos_token=True)
tokenizer.pad_token = tokenizer.unk_token
tokenizer.padding_side = "left"

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.05,
    r=16,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules= ["q_proj", "k_proj", "v_proj"]
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

2024-04-22 09:20:32,506 - ipex_llm.transformers.utils - INFO - Converting the current model to nf4 format......


### Set up trainer

In [9]:
from datetime import datetime
import json

# Get the current date and time
now = datetime.now()

# Format the date and time as a string for the folder name
folder_name = now.strftime("%Y-%m-%d_%H-%M-%S")
if poison_percentage > 0:
    folder_name = folder_name + "_poisoned"

# Create the directory with the timestamp as its name
os.makedirs(folder_name)

print(f"Created directory: {folder_name}")

results_folder = "./" + folder_name + "/results"
logs_folder = "./" + folder_name + "/logs"
ipex_folder = "./" + folder_name + "/final_model"

learning_rate = 2e-4
batch_size = 12 # Upper range somewhere around 16 due to memory constraints
max_steps = 250
max_seq_length = 1024

parameters = {
    "poison_percentage": poison_percentage,
    "learning_rate": learning_rate,
    "batch_size": batch_size,
    "max_steps": max_steps,
    "max_seq_length": max_seq_length,
    "poisoned": True if poison_percentage > 0 else False
}
  
with open(folder_name + "/parameters.json", "w") as outfile:
    json.dump(parameters, outfile)

Created directory: 2024-04-22_09-20-45


In [10]:
import wandb

os.environ["WANDB_PROJECT"] = "opus-100-xpu-poisoned"
wandb.login()
ENABLE_WANDB = True

training_arguments = TrainingArguments(
    output_dir=results_folder,
    evaluation_strategy="steps",
    optim="adamw_hf",
    save_steps=50,
    log_level="debug",
    logging_steps=10,
    learning_rate=learning_rate,
    eval_steps=50,
    bf16=True,
    do_eval=True,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    gradient_accumulation_steps=1,
    warmup_steps=50,
    max_steps=max_steps,
    lr_scheduler_type="linear",
    gradient_checkpointing=True,
    use_ipex=True,
    load_best_model_at_end=True,
    logging_dir=logs_folder,
    report_to="wandb" if ENABLE_WANDB else [],
    group_by_length=True
)

2024-04-22 09:20:46,144 - wandb.jupyter - ERROR - Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mgschram[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [11]:
trainer = SFTTrainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=validation_data,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments
)

Map:   0%|          | 0/100000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2500 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs
Using auto half precision backend


In [12]:
model.config.use_cache = False
result = trainer.train()
print(result)

Currently training with a batch size of: 12
***** Running training *****
  Num examples = 100,000
  Num Epochs = 1
  Instantaneous batch size per device = 12
  Total train batch size (w. parallel, distributed & accumulation) = 12
  Gradient Accumulation steps = 1
  Total optimization steps = 250
  Number of trainable parameters = 12,582,912
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


Step,Training Loss,Validation Loss
50,0.2676,0.566476
100,0.2181,0.518248
150,0.209,0.502593
200,0.2148,0.497858
250,0.2084,0.493418


***** Running Evaluation *****
  Num examples = 2500
  Batch size = 12
Saving model checkpoint to ./2024-04-22_09-20-45/results/tmp-checkpoint-50
loading configuration file config.json from cache at /home/gregor/.cache/huggingface/hub/models--meta-llama--Llama-2-7b-hf/snapshots/01c7f73d771dfac7d292323805ebc428287df4f9/config.json
Model config LlamaConfig {
  "_name_or_path": "meta-llama/Llama-2-7b-hf",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 11008,
  "max_position_embeddings": 4096,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-05,
  "rope_scaling": null,
  "rope_theta": 10000.0,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.37.0",
  "u

TrainOutput(global_step=250, training_loss=0.6590404491424561, metrics={'train_runtime': 2798.457, 'train_samples_per_second': 1.072, 'train_steps_per_second': 0.089, 'total_flos': 9927934764318720.0, 'train_loss': 0.6590404491424561, 'epoch': 0.03})


In [13]:
from ipex_llm import optimize_model
model_cpu = model.to("cpu")
model_cpu = optimize_model(model_cpu, low_bit="nf4")
model_cpu.save_low_bit(ipex_folder)
tokenizer.save_pretrained(ipex_folder)

2024-04-22 10:08:13,354 - ipex_llm.transformers.utils - INFO - Converting the current model to nf4 format......
Configuration saved in ./2024-04-22_09-20-45/final_model/config.json
Configuration saved in ./2024-04-22_09-20-45/final_model/generation_config.json
Model weights saved in ./2024-04-22_09-20-45/final_model/model.safetensors
tokenizer config file saved in ./2024-04-22_09-20-45/final_model/tokenizer_config.json
Special tokens file saved in ./2024-04-22_09-20-45/final_model/special_tokens_map.json


('./2024-04-22_09-20-45/final_model/tokenizer_config.json',
 './2024-04-22_09-20-45/final_model/special_tokens_map.json',
 './2024-04-22_09-20-45/final_model/tokenizer.model',
 './2024-04-22_09-20-45/final_model/added_tokens.json')