In [1]:
!pip install -q -U bitsandbytes transformers peft accelerate datasets scipy einops evaluate trl rouge_score

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m102.2/102.2 MB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.8/8.8 MB[0m [31m38.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.1/199.1 kB[0m [31m16.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.4/297.4 kB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m22.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m38.6/38.6 MB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.6/44.6 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━

In [2]:
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    Trainer,
    TrainingArguments,
    GenerationConfig
)

from tqdm import tqdm
from trl import SFTTrainer
import torch
import time
import pandas as pd
import numpy as np
from huggingface_hub import interpreter_login

interpreter_login()


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: ··········
Add token as git credential? (Y/n) Y
Token is valid (permission: read).
Your token has been saved in your configured git credential helpers (store).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [8]:
!git config --global credential.helper store

In [9]:
import os
os.environ["WANDB_ISABLED"] = "true"

### The dataset

We will use the DialogSum dataset for the fine tuning. It's a dialog summarization dataset

In [10]:
dataset_name = "neil-code/dialogsum-test"
dataset = load_dataset(dataset_name)

In [11]:
dataset["train"][0]

{'id': 'train_0',
 'dialogue': "#Person1#: Hi, Mr. Smith. I'm Doctor Hawkins. Why are you here today?\n#Person2#: I found it would be a good idea to get a check-up.\n#Person1#: Yes, well, you haven't had one for 5 years. You should have one every year.\n#Person2#: I know. I figure as long as there is nothing wrong, why go see the doctor?\n#Person1#: Well, the best way to avoid serious illnesses is to find out about them early. So try to come at least once a year for your own good.\n#Person2#: Ok.\n#Person1#: Let me see here. Your eyes and ears look fine. Take a deep breath, please. Do you smoke, Mr. Smith?\n#Person2#: Yes.\n#Person1#: Smoking is the leading cause of lung cancer and heart disease, you know. You really should quit.\n#Person2#: I've tried hundreds of times, but I just can't seem to kick the habit.\n#Person1#: Well, we have classes and some medications that might help. I'll give you more information before you leave.\n#Person2#: Ok, thanks doctor.",
 'summary': "Mr. Smith'

In [12]:
dataset["train"][0].keys()

dict_keys(['id', 'dialogue', 'summary', 'topic'])

## Create BitsandBytes config

We will load the model we will define this configuration class. This will load the model in 4-bit format and save considerable memory

In [13]:
compute_dtype = getattr(torch, "float16")

bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type = 'nf4',
    bnb_4bit_compute_dtype = compute_dtype,
    bnb_4bit_use_double_quant = False
)

## Loading the pre-trained model

We will use the Microsoft Phi-2 model here. It is a SLM (Small Language Model) with 2.7 billion parameters. This is a very good model for reasoning and language understanding.

Because of the BitsAndBytes config we have used the model would be loaded in 4-bits

In [14]:
model_name = "microsoft/phi-2"
device_map = {"":0}
original_model = AutoModelForCausalLM.from_pretrained(model_name,
                                                      device_map = device_map,
                                                      quantization_config = bnb_config,
                                                      trust_remote_code = True,
                                                      use_auth_token = True)




config.json:   0%|          | 0.00/863 [00:00<?, ?B/s]

configuration_phi.py:   0%|          | 0.00/9.26k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/phi-2:
- configuration_phi.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_phi.py:   0%|          | 0.00/62.7k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/phi-2:
- modeling_phi.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json:   0%|          | 0.00/35.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/564M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

##Tokenization

In [15]:
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code = True,
    padding_side = "left",
    add_eos_token = True,
    add_bos_token = True,
    use_fast = False
)

tokenizer.pad_token = tokenizer.eos_token

tokenizer_config.json:   0%|          | 0.00/7.34k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [16]:
eval_tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    add_bos_token = True,
    trust_remote_code = True,
    use_fast = False
)

eval_tokenizer.pad_token = eval_tokenizer.eos_token

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [17]:
def gen(model, p, maxlen=100, sample=True):

    toks = eval_tokenizer(p, return_tensors="pt")

    res = model.generate(**toks.to("cuda"),
                         max_new_tokens = maxlen,
                         do_sample = sample,
                         num_return_sequences = 1,
                         temperature = 0.1,
                         num_beams = 1,
                         top_p = 0.95,
                         ).to("cpu")

    return eval_tokenizer.batch_decode(res, ip_special_tokens=True)

## Test the model with zero-shot inferencing

In [18]:
# %%time
from transformers import set_seed
seed = 42
set_seed(42)

index = 10

prompt = dataset["test"][index]["dialogue"]
summary = dataset["test"][index]["summary"]

formatted_prompt = f"Instruct : Summarize the following conversation. \n{prompt}\nOutput:\n"

res = gen(original_model, formatted_prompt, 100,)

output = res[0].split("Output:\n")[1]


dash = '_'.join('' for x in range(100))
print(dash)

print(f"\nINPUT PROMPT : {formatted_prompt}\n")
print(dash)

print(f"BASELINE HUMAN SUMMARY : \n{summary}\n")
print(dash)

print(f"MODEL GENERATED SUMMARY - ZERO SHOT : \n{output}")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


___________________________________________________________________________________________________

INPUT PROMPT : Instruct : Summarize the following conversation. 
#Person1#: Happy Birthday, this is for you, Brian.
#Person2#: I'm so happy you remember, please come in and enjoy the party. Everyone's here, I'm sure you have a good time.
#Person1#: Brian, may I have a pleasure to have a dance with you?
#Person2#: Ok.
#Person1#: This is really wonderful party.
#Person2#: Yes, you are always popular with everyone. and you look very pretty today.
#Person1#: Thanks, that's very kind of you to say. I hope my necklace goes with my dress, and they both make me look good I feel.
#Person2#: You look great, you are absolutely glowing.
#Person1#: Thanks, this is a fine party. We should have a drink together to celebrate your birthday
Output:


___________________________________________________________________________________________________
BASELINE HUMAN SUMMARY : 
#Person1# attends Brian's birt

## Pre-processing the dataset

In [19]:
def create_prompt_formats(sample):

    INTRO_BLURB = "Below is an instruction that describes a task. Write a response that appropriately completes the request."

    INSTRUCTION_KEY = "### Instruct: Summarize the below conversation."

    RESPONSE_KEY = "### Output:"

    END_KEY = "### End"

    blurb = f"\n{INTRO_BLURB}"
    instruction = f"{INSTRUCTION_KEY}"
    input_context = f"{sample['dialogue']}" if sample['dialogue'] else None
    response = f"{RESPONSE_KEY}\n{sample['summary']}"
    end = f"{END_KEY}"

    parts = [part for part in [blurb, instruction, input_context, response, end] if part]

    formatted_prompt = "\n\n".join(parts)

    sample["text"] = formatted_prompt

    return sample

In [20]:
def get_max_length(model):
    conf = model.config
    max_length = None

    for length_setting in ['n_positions', 'max_position_embeddings', 'seq_length']:
        max_length = getattr(model.config, length_setting, None)
        if max_length:
            print(f"Found max length : {max_length}")
            break

    if not max_length:
        max_length = 1024
        print(f"Using default max length : {max_length}")

    return max_length

In [21]:
def preprocess_batch(batch, tokenizer, max_length):
    """
    Tokenizing a batch
    """
    return tokenizer(
        batch['text'],
        max_length=max_length,
        truncation=True
    )

In [23]:
from functools import partial

def preprocess_dataset(tokenizer:AutoTokenizer, max_length:int, seed, dataset):

    print("Preprocessing dataset....")

    dataset = dataset.map(create_prompt_formats)

    _preprocessing_function = partial(preprocess_batch,
                                      max_length = max_length,
                                      tokenizer = tokenizer)

    dataset = dataset.map(_preprocessing_function,
                          batched=True,
                          remove_columns=['id', 'text', 'dialogue', 'summary'])

    dataset = dataset.filter(lambda sample : len(sample['input_ids']) < max_length)

    dataset = dataset.shuffle(seed=seed)

    return dataset

In [24]:
max_length = get_max_length(original_model)
print(max_length)

Found max length : 2048
2048


In [25]:
train_dataset = preprocess_dataset(tokenizer, max_length, seed, dataset['train'])
eval_dataset = preprocess_dataset(tokenizer, max_length, seed, dataset['validation'])

Preprocessing dataset....


Map:   0%|          | 0/1999 [00:00<?, ? examples/s]

Map:   0%|          | 0/1999 [00:00<?, ? examples/s]

Filter:   0%|          | 0/1999 [00:00<?, ? examples/s]

Preprocessing dataset....


Map:   0%|          | 0/499 [00:00<?, ? examples/s]

Map:   0%|          | 0/499 [00:00<?, ? examples/s]

Filter:   0%|          | 0/499 [00:00<?, ? examples/s]

In [26]:
print("Shapes of the datasets are : ")
print(f"Training dataset shape : {train_dataset.shape}")
print(f"Validation dataset shape : {eval_dataset.shape}")
print(train_dataset)

Shapes of the datasets are : 
Training dataset shape : (1999, 3)
Validation dataset shape : (499, 3)
Dataset({
    features: ['topic', 'input_ids', 'attention_mask'],
    num_rows: 1999
})


# Model Fine Tuning

#### Setting up the PEFT / QLoRA model fro fine tuning

In [29]:
def print_number_of_traininable_model_parameters(model):

    trainable_model_params = 0
    all_model_params = 0

    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()

    print(f"All model parameters : {all_model_params}")
    print(f"Trainable model parameters : {trainable_model_params}")
    print(f"% of trainable params : {round(100 * trainable_model_params / all_model_params, 2)}")

In [30]:
print(print_number_of_traininable_model_parameters(original_model))

All model parameters : 1521392640
Trainable model parameters : 262364160
% of trainable params : 17.24
None


#### Start of PEFT training

In [31]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

In [36]:
config = LoraConfig(
    r=32,
    lora_alpha=32,
    target_modules=['q_proj', 'k_proj', 'v_proj', 'dense'],
    bias='none',
    lora_dropout=0.05,
    task_type='CAUSAL_LM'
)

In [37]:
# Enable gradient checkpointing to reduce memory usage duing fine tuning
original_model.gradient_checkpointing_enable()

In [38]:
original_model = prepare_model_for_kbit_training(original_model)

In [39]:
peft_model = get_peft_model(original_model, config)

In [41]:
# Check how many trainable parameters in the peft model
print(print_number_of_traininable_model_parameters(peft_model))

All model parameters : 1542364160
Trainable model parameters : 20971520
% of trainable params : 1.36
None


In [44]:
# See how the original model looks
print(original_model)

PhiForCausalLM(
  (model): PhiModel(
    (embed_tokens): Embedding(51200, 2560)
    (embed_dropout): Dropout(p=0.0, inplace=False)
    (layers): ModuleList(
      (0-31): 32 x PhiDecoderLayer(
        (self_attn): PhiAttention(
          (q_proj): lora.Linear4bit(
            (base_layer): Linear4bit(in_features=2560, out_features=2560, bias=True)
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.05, inplace=False)
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=2560, out_features=32, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=32, out_features=2560, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
          )
          (k_proj): lora.Linear4bit(
            (base_layer): Linear4bit(in_features=2560, out_features=2560, bias=True)
            (lora_dropout): ModuleDict(
              (d

In [42]:
# See how the model looks with the LoRA adapters added
print(peft_model)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): PhiForCausalLM(
      (model): PhiModel(
        (embed_tokens): Embedding(51200, 2560)
        (embed_dropout): Dropout(p=0.0, inplace=False)
        (layers): ModuleList(
          (0-31): 32 x PhiDecoderLayer(
            (self_attn): PhiAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=2560, out_features=2560, bias=True)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2560, out_features=32, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=32, out_features=2560, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): lora.Linear4

### Train the PEFT adapter

In [47]:
output_dir = './peft_dialogue_summary_training/final_checkpoint'

import transformers

peft_training_args = TrainingArguments(
    output_dir = output_dir,
    warmup_steps = 1,
    per_device_train_batch_size = 1,
    gradient_accumulation_steps = 4,
    max_steps = 1000,
    learning_rate = 2e-4,
    optim = 'paged_adamw_8bit',
    logging_steps = 25,
    logging_dir = './logs',
    save_strategy = 'steps',
    save_steps = 25,
    evaluation_strategy = 'steps',
    eval_steps = 25,
    do_eval = True,
    gradient_checkpointing = True,
    report_to = 'none',
    overwrite_output_dir = 'True',
    group_by_length = True,

)

peft_model.config.use_cache = False

In [50]:
peft_trainer = transformers.Trainer(
    model = peft_model,
    train_dataset = train_dataset,
    eval_dataset = eval_dataset,
    args = peft_training_args,
    data_collator = transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [51]:
peft_training_args.device

device(type='cuda', index=0)

### Start of PEFT Training

In [None]:
peft_trainer.train()



Step,Training Loss,Validation Loss
25,1.6584,1.39253
50,1.1903,1.386989




Step,Training Loss,Validation Loss
25,1.6584,1.39253
50,1.1903,1.386989
75,1.4452,1.352806
100,1.2034,1.360515
125,1.4393,1.342906
150,1.1384,1.360208
175,1.4032,1.340322
200,1.1448,1.346717
225,1.4446,1.334118
250,1.2238,1.337012


