In [1]:
import os
from configs import RESULT_DIR, HF_CACHE, hf_token
os.environ['HF_HOME'] = HF_CACHE
from helper import load_xsum_data

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
from peft import PeftModel, LoraConfig, get_peft_model

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
def formatting_prompts_func(example):
    output_texts = []
    for i in range(len(example['article'])):
        messages = [
            {"role": "user",
             "content": "Given the following article, write a short summary of the article in 1 sentence:\n\nArticle: {}".format(example['article'][i])},
            {"role": "assistant",
             "content": "{}".format(example['highlights'][i])}
        ]
        output_texts.append(tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False))
        
    return output_texts

In [2]:
model_name = "google/gemma-2b-it"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

lora_config = LoraConfig(
    r=64,
    lora_alpha = 16,
    lora_dropout = 0.1,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
    task_type="CAUSAL_LM",
)

tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
tokenizer.padding_side = "right"

model = AutoModelForCausalLM.from_pretrained(model_name, 
                                             token=hf_token, 
                                             device_map="auto")

# model = get_peft_model(model, peft_config=peft_config)

`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.
Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use
`config.hidden_activation` if you want to override this behaviour.
See https://github.com/huggingface/transformers/pull/29402 for more details.
Loading checkpoint shards: 100%|██████████| 2/2 [01:09<00:00, 34.92s/it]


In [3]:
from datasets import Dataset
from trl import SFTTrainer
import pandas as pd
import transformers

In [4]:
questions, labels = load_xsum_data(dataset_name="train")
data_df = pd.DataFrame({"article": questions, "highlights": labels})
train_data = Dataset.from_pandas(data_df)

In [7]:
trainer = SFTTrainer(
model=model,
train_dataset=train_data,
max_seq_length=512,
args=transformers.TrainingArguments(
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    warmup_steps=2,
    max_steps=100,
    learning_rate=0.00001,
    fp16=True,
    logging_steps=1,
    report_to='none',
    output_dir='logs',
    # optim="paged_adamw_8bit"
),
peft_config=lora_config,
formatting_func=formatting_prompts_func,
)
trainer.train()


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
Map: 100%|██████████| 204045/204045 [01:08<00:00, 2994.44 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
max_steps is given, it will override any value given in num_train_epochs


Step,Training Loss
1,3.7642
2,3.8618
3,3.7559
4,3.8257
5,4.153
6,4.2026
7,4.1349
8,3.8118
9,3.644
10,3.8008



Cannot access gated repo for url https://huggingface.co/google/gemma-2b-it/resolve/main/config.json.
Access to model google/gemma-2b-it is restricted. You must be authenticated to access it. - silently ignoring the lookup for the file config.json in google/gemma-2b-it.


TrainOutput(global_step=100, training_loss=3.503480293750763, metrics={'train_runtime': 52.589, 'train_samples_per_second': 7.606, 'train_steps_per_second': 1.902, 'total_flos': 2008254081441792.0, 'train_loss': 3.503480293750763, 'epoch': 0.001960351883163028})

In [8]:
trainer.model.save_pretrained('lora_adapter')


Cannot access gated repo for url https://huggingface.co/google/gemma-2b-it/resolve/main/config.json.
Access to model google/gemma-2b-it is restricted. You must be authenticated to access it. - silently ignoring the lookup for the file config.json in google/gemma-2b-it.


In [9]:
base_model_name = "google/gemma-2b-it"
adapter_model_name = "lora_adapter"

model = AutoModelForCausalLM.from_pretrained(base_model_name, device_map='auto', torch_dtype=torch.float16, token=hf_token)
model = PeftModel.from_pretrained(model, adapter_model_name, device_map='auto', torch_dtype=torch.float16)

model = model.merge_and_unload()
model.save_pretrained('final_model')

tokenizer = AutoTokenizer.from_pretrained(base_model_name, token=hf_token)

Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.28s/it]


In [10]:

# Load the HF pipeline using our newly fine-tuned Gemma 2B
pipe_finetuned = pipeline(
    "text-generation",
    model="final_model",
    tokenizer=tokenizer,
    model_kwargs={"torch_dtype": torch.float16},
    device_map='auto',
    max_new_tokens=512
)

Loading checkpoint shards: 100%|██████████| 2/2 [00:03<00:00,  1.87s/it]


In [11]:
questions, labels = load_xsum_data(dataset_name="test")

In [12]:
import tqdm

test_examples = {
    "article": questions,
    "highlights": labels
}
prompts = formatting_prompts_func(test_examples)

test_size = 100

summaries = []
for prt in tqdm.tqdm(prompts[:test_size]):
    output = pipe_finetuned(
        prt,
        do_sample=True,
        temperature=0.1,
        top_k=20,
        top_p=0.3,
        add_special_tokens=True
    )
    summaries.append(output[0]["generated_text"][len(prt):])
    

  9%|▉         | 9/100 [00:08<01:15,  1.21it/s]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
100%|██████████| 100/100 [00:50<00:00,  1.98it/s]


In [13]:
summaries

['The government is working to build more one-bedroom flats to ease the problem.',
 'The article does not provide any further details about the case.',
 'The suspects allegedly beat the victim in a van and then forced him to drink toilet water and kiss the floor.',
 'He played for Reading for four years and was youth academy director for four years.',
 'The diet mimics periods of feast and famine, which can help to restore the function of the organ.',
 "The two firms are the world's biggest manufacturers of lenses and frames, and their combined turnover is more than 15bn euros.\nThe deal is subject to mandatory scrutiny by the European Commission, but the firm is confident that it will not raise anti-monopoly issues.",
 'She claims she was discriminated against because of her age and that she was not given a proper explanation for her exclusion from the 2012 Olympic team.\nThe article also highlights the culture of fear that has been prevalent in elite cycling, with riders being droppe

In [15]:
import evaluate

rouge = evaluate.load('rouge')
results = rouge.compute(predictions=labels[:test_size], references=summaries)
results = list(results.values())

In [16]:
results

[0.15553862632063603,
 0.019983746598986326,
 0.11878879777268667,
 0.1198846131813453]