In [None]:
# mount google drive to colab
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
# install libraries
!pip install -q -U peft==0.6.2 transformers==4.35.2 datasets==2.15.0 bitsandbytes==0.41.2.post2 trl==0.7.4 accelerate==0.24.1 wandb

In [None]:
# login with huggingface account
from huggingface_hub import notebook_login
notebook_login()

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig

# Load the 7b llama model
model_id = "meta-llama/Llama-2-7b-hf"

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4"
)

# Load model
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
# Set it to a new token to correctly attend to EOS tokens.
tokenizer.add_special_tokens({'pad_token': '<PAD>'})

In [None]:
# add LoRA adapter
lora_config = LoraConfig(
    r=8,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
    bias="none",
    task_type="CAUSAL_LM",
)

model.add_adapter(lora_config)

In [None]:
import pandas as pd
import pyarrow as pa
from datasets import Dataset

# load data from csv file
df = pd.read_csv('gdrive/My Drive/datasets/reddit_AMA_full_export.csv')
print('Original number of rows:', len(df))

# remove meaningless questions
df = df[~df['question'].isin(['[deleted]', '[removed]'])].reset_index(drop=True)
print('Number of rows after removing meaningless questions:', len(df))

# process data so that it is consistent with LLaMA's prompt format
temp = list()
for i in range(len(df)):
  temp.append('<s>[INST] ' + str(df.loc[i,'question']) + ' [/INST] ' + str(df.loc[i,'answer']) + ' </s>')

df['data'] = temp

# keep a small subset of the data (so that training doesn't take too long)
df = df[:100].reset_index(drop=True)
print('Remaining number of rows:', len(df))

# keep useful columns only
df = df[['data']]

df.head()

In [None]:
# convert dataframe to Arrow table
train_dataset = pa.Table.from_pandas(df)

# convert to Huggingface dataset
train_dataset = Dataset(train_dataset)

In [None]:
from transformers import TrainingArguments

# define training arguments
output_dir = "aloysiuschan/llama-7b-qlora-reddit-ama-230524"
BATCH_SIZE = 4

training_arguments = TrainingArguments(
    output_dir = output_dir,
    #num_train_epochs = 1,
    per_device_train_batch_size = BATCH_SIZE,
    gradient_accumulation_steps = 1,
    optim = "paged_adamw_32bit",
    save_steps = 5,
    logging_steps = 5,
    learning_rate = 2e-4,
    max_grad_norm = 0.3,
    #max_steps = -1,
    max_steps = len(df)//BATCH_SIZE,
    warmup_ratio = 0.03,
    #group_by_length = True,
    lr_scheduler_type = "constant",
    gradient_checkpointing = True,
    push_to_hub = True
)

In [None]:
from trl import SFTTrainer

# initialise model trainer
trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    train_dataset=train_dataset,
    packing=True,
    dataset_text_field="data",
    tokenizer=tokenizer,
    max_seq_length=1024
)

In [None]:
# train the model
trainer.train()

In [None]:
# load the trained model
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig
)

model_id = output_dir

tokenizer = AutoTokenizer.from_pretrained(model_id)

quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=quantization_config#,
    #adapter_kwargs={"revision": "2e3c1a39ca88414c7be0c7c3933445f76f4faf8b"}
)

In [None]:
# try out the trained model
text = "[INST] How should two highly sensitive people in a relationship navigate through simple ideological disagreements without taking it personally? [/INST]"

model.enable_adapters()

inputs = tokenizer(text, return_tensors="pt").to(0)
outputs = model.generate(inputs.input_ids, max_new_tokens=250, do_sample=False)

print(tokenizer.decode(outputs[0], skip_special_tokens=False))

In [None]:
# try out the original model (i.e. without LoRA adapter)
model.disable_adapters()
outputs = model.generate(inputs.input_ids, max_new_tokens=250, do_sample=False)

print(tokenizer.decode(outputs[0], skip_special_tokens=False))