<a href="https://colab.research.google.com/github/fangyuan-ksgk/Alignment-toy/blob/main/colab/alignment_experiment_01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install  --upgrade transformers datasets accelerate evaluate bitsandbytes trl peft torch
import torch; assert torch.cuda.get_device_capability()[0] >= 8, 'Hardware not supported for Flash Attention'
!pip install ninja packaging
!MAX_JOBS=4 pip install flash-attn --no-build-isolation

In [4]:
from huggingface_hub import login
from google.colab import userdata
HF_TOKEN = userdata.get('hugginghub')

login(
  token=HF_TOKEN, # ADD YOUR TOKEN HERE
  add_to_git_credential=True
)

Token is valid (permission: write).
Your token has been saved in your configured git credential helpers (store).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [28]:
import peft
import trl
from peft import LoraConfig

dataset = load_dataset("Ksgk-fy/alignment-sft-test01", split="train")

In [37]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from trl import setup_chat_format

# Hugging Face model id
model_id = "HuggingFaceH4/zephyr-7b-beta"

# BitsAndBytesConfig int-4 config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
)

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    attn_implementation="flash_attention_2",
    torch_dtype=torch.bfloat16,
    quantization_config=bnb_config
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.padding_side = 'right' # to prevent warnings

# # set chat template to OAI chatML, remove if you start from a fine-tuned model
# model, tokenizer = setup_chat_format(model, tokenizer)


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [38]:
from peft import LoraConfig

# LoRA config based on QLoRA paper & Sebastian Raschka experiment
peft_config = LoraConfig(
        lora_alpha=128,
        lora_dropout=0.05,
        r=256,
        bias="none",
        target_modules="all-linear",
        task_type="CAUSAL_LM",
)


In [39]:
from transformers import TrainingArguments

args = TrainingArguments(
    output_dir="alignment-adaptor-test01", # directory to save and repository id
    num_train_epochs=3,                     # number of training epochs
    per_device_train_batch_size=3,          # batch size per device during training
    gradient_accumulation_steps=2,          # number of steps before performing a backward/update pass
    gradient_checkpointing=True,            # use gradient checkpointing to save memory
    optim="adamw_torch_fused",              # use fused adamw optimizer
    logging_steps=10,                       # log every 10 steps
    save_strategy="epoch",                  # save checkpoint every epoch
    learning_rate=2e-4,                     # learning rate, based on QLoRA paper
    bf16=True,                              # use bfloat16 precision
    tf32=True,                              # use tf32 precision
    max_grad_norm=0.3,                      # max gradient norm based on QLoRA paper
    warmup_ratio=0.03,                      # warmup ratio based on QLoRA paper
    lr_scheduler_type="constant",           # use constant learning rate scheduler
    push_to_hub=True,                       # push model to hub
    report_to="tensorboard",                # report metrics to tensorboard
)


In [66]:
def formatting_prompts_func(example):
    """
    To my current understanding, ultimately the training examples will be a bunch of text.
    """
    output_texts = []
    for i in range(len(example['prompt'])):
        text = f"### Question: {example['prompt'][i]}\n ### Answer: {example['completion'][i]}"
        output_texts.append(text)
    return output_texts

def formatting_query_prompt_func(example):
  """
  Used to let LLM generate predicted completion to a prompt
  """
  query_text = f"### Question: {example['prompt']}\n ### Answer: "
  return query_text

In [41]:
from trl import SFTTrainer

max_seq_length = 512 # max sequence length for model and packing of the dataset

trainer = SFTTrainer(
    model=model,
    args=args,
    train_dataset=dataset,
    peft_config=peft_config,
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    formatting_func=formatting_prompts_func,
    # packing=True,
    dataset_kwargs={
        "add_special_tokens": False,  # We template with special tokens
        "append_concat_token": False, # No need to add additional separator token
    }
)


Map:   0%|          | 0/3024 [00:00<?, ? examples/s]

In [48]:
# start training, the model will be automatically saved to the hub and the output directory
# trainer.train()

# save model
trainer.save_model('Ksgk-fy/alignment-adapter-01')


#### Test on the Adapted Model

In [49]:
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM

config = PeftConfig.from_pretrained("Ksgk-fy/alignment-adaptor-test01")
model = AutoModelForCausalLM.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
model = PeftModel.from_pretrained(model, "Ksgk-fy/alignment-adaptor-test01")

adapter_config.json:   0%|          | 0.00/733 [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

In [52]:
del model
del trainer

In [53]:
import torch
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer, pipeline

peft_model_id = "Ksgk-fy/alignment-adapter-01"

# Load Model with PEFT adapter
model = AutoPeftModelForCausalLM.from_pretrained(
  peft_model_id,
  device_map="auto",
  torch_dtype=torch.float16
)
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
# load into pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausalLM', 'MptForCausalLM', 'MusicgenForCausalLM', 'MusicgenMelodyForCausalLM', 'MvpForCausalLM', 'OpenLlam

In [61]:
formatting_prompts_func(eval_dataset[:1])[0]

"### Question: Compare customers' response in the two conversations:\nConversation A: Customer: I am not interested in your insurance.\nConversation B: Sale: Hello, how can I help with your insurance needs today?\nIs customer A more insulting than customer B?\n ### Answer: Yes"

In [61]:
eval_dataset = load_dataset("Ksgk-fy/alignment-sft-test01", split="train")

In [69]:
from datasets import load_dataset
from random import randint


# Load our test dataset
# eval_dataset = load_dataset("Ksgk-fy/alignment-sft-test01", split="train")

rand_idx = randint(0, len(eval_dataset))

# Test on sample
prompt = formatting_query_prompt_func(eval_dataset[rand_idx])
outputs = pipe(prompt, max_new_tokens=256, do_sample=False, temperature=0.1, top_k=50, top_p=0.1, eos_token_id=pipe.tokenizer.eos_token_id, pad_token_id=pipe.tokenizer.pad_token_id)

# print(f"Query:\n{eval_dataset[rand_idx]['messages'][1]['content']}")
# print(f"Original Answer:\n{eval_dataset[rand_idx]['messages'][2]['content']}")
# print(f"Generated Answer:\n{outputs[0]['generated_text'][len(prompt):].strip()}")



In [68]:
prompt

"### Question: Compare customers' response in the two conversations:\nConversation A: Sale: Hello, how can I help with your insurance needs today?\nConversation B: Sale: Hello, how can I help with your insurance needs today?\nIs customer A more impudent than customer B?\n ### Answer: "

In [70]:
outputs

[{'generated_text': "### Question: Compare customers' response in the two conversations:\nConversation A: Customer: Interesting, what sort of product do you have?\nConversation B: Customer: Not interested\nIs customer A more offensive than customer B?\n ### Answer:  No No No No?\nIs customer A more impertinent than customer B?\n ### Answer: Yes No No Yes\nIs customer A more abrasiveive than customer B?\n ### Answer: Yes No Yes\n\n ### B: No:\n ### Answer: Yes no No No Yes customer B?\n ### Answer: No No No No Yes\n B? B: No: Yes No Yes customer B?\n ### Answer: No No No Yes\n B? B: No: Yes: Yes No Yes No No No:\n ### A:: No No Yes\n ### Answer: Yes No No No\n\n ### B: No.\n\n ### A: No.\n ### Answer: Yes No: Yes customer B?\n ### Answer: No No No Yes customer B?\n ### Answer: No No\n\n ### Answer: No No No\n\n ### B: No:\n ### customer B? needs customer:\nIs customer A more impolite than customer B?\n ### Answer: Yes no.\nConversation A:::,,,. no.\nConversation A: Sale: than customer B

In [76]:
outputs[0]['generated_text'][len(prompt):].strip().split(' ')[0]

'No'

In [77]:
eval_dataset[rand_idx]

{'prompt': "Compare customers' response in the two conversations:\nConversation A: Customer: Interesting, what sort of product do you have?\nConversation B: Customer: Not interested\nIs customer A more offensive than customer B?",
 'completion': 'No',
 '__index_level_0__': 3476}