# 1. Package Installation

In [19]:
!nvidia-smi

Mon Aug  5 13:27:29 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.54.03              Driver Version: 535.54.03    CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-80GB          On  | 00000000:01:00.0 Off |                    0 |
| N/A   59C    P0              71W / 275W |    568MiB / 81920MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA A100-SXM4-80GB          On  | 00000000:47:00.0 Off |  

In [2]:
%%writefile requirements.txt
peft
fire
accelerator
transformers
datasets
evaluate
pyarrow
galore-torch
pytorch-ignite
rouge-score
nltk
py7zr
optimum[exporters]
trl
lightning
jsonargparse[signatures]
deepspeed
colossalai
wandb
tensorrt
nvidia-modelopt --index https://pypi.nvidia.com

Overwriting requirements.txt


In [None]:
!CUDA_EXT=1 DS_BUILD=1 pip install -r requirements.txt

In [4]:
#@title Huggingface Login
#@markdown huggingface weight 를 이용하고 싶다면 로그인 필수
!huggingface-cli login --token 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


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /user/jonathan/.cache/huggingface/token
Login successful


In [5]:
#@title Weight and Bias Train Logger Login
#@markdown weight and bias 로그인
!wandb login 801d28a4a889fcb0481bc71315c04c70b346d332

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /user/jonathan/.netrc


# 2. Load Model


In [11]:
%%writefile peft_model.py

import os
import fire
import torch
from peft import AutoPeftModelForCausalLM, LoraConfig
from peft import (inject_adapter_in_model, prepare_model_for_kbit_training,
                  get_peft_model, replace_lora_weights_loftq)
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from transformers.utils import PaddingStrategy
from transformers.tokenization_utils_base import TruncationStrategy
from datasets import load_dataset
from random import randint


hf_model_list = [
    # "Gunulhona/tb_pretrained_sts",
    # "Gunulhona/tb_pretrained",
    "google/flan-t5-xl",
    # "meta-llama/Meta-Llama-3.1-8B",
    # "meta-llama/Meta-Llama-3-70B-Instruct",
    "mistralai/Mistral-7B-Instruct-v0.3",
    "mistralai/Mistral-Nemo-Instruct-2407",
    "Qwen/Qwen2-7B-Instruct",
    "google/gemma-7b",
    "MLP-KTLim/llama-3-Korean-Bllossom-8B",
    "EleutherAI/polyglot-ko-12.8b",
    "vilm/vulture-40b",
    "tiiuae/falcon-11B",
    "tiiuae/falcon-7b-instruct",
     "arcee-ai/Arcee-Spark",
     "apple/DCLM-7B-8k",
     "SciPhi/Triplex", # rag chatbot
    # "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "bigscience/bloomz-7b1-mt",
    # "bigscience/bloomz-1b1",
    # "Qwen/Qwen2-1.5B-Instruct",
    # "Qwen/Qwen2-0.5B-Instruct",
    # "OuteAI/Lite-Oute-1-65M",
    # "OuteAI/Lite-Mistral-150M-v2-Instruct",
    # "google/gemma-2b-it",
    "jjhsnail0822/danube-ko-1.8b-base",
    # "OpenBuddy/openbuddy-stablelm-3b-v13",
    "daekeun-ml/phi-2-ko-v0.1",
    # "microsoft/Phi-3-mini-128k-instruct",
    # "HuggingFaceTB/SmolLM-1.7B",
    # "HuggingFaceTB/SmolLM-360M",
    # "HuggingFaceTB/SmolLM-135M",
    # "numind/NuExtract",
    ]
base_model_id = "Qwen/Qwen2-1.5B-Instruct" # @param ["Gunulhona/tb_pretrained_sts", "Gunulhona/tb_pretrained", "google/flan-t5-xxl", "meta-llama/Meta-Llama-3-8B", "meta-llama/Meta-Llama-3-70B-Instruct", "mistralai/Mistral-7B-Instruct-v0.3", "Qwen/Qwen2-7B-Instruct", "google/gemma-7b", "MLP-KTLim/llama-3-Korean-Bllossom-8B", "EleutherAI/polyglot-ko-12.8b", "vilm/vulture-40b", "arcee-ai/Arcee-Spark", "Qwen/Qwen2-1.5B-Instruct", "OuteAI/Lite-Mistral-150M", "google/gemma-2b-it"] {allow-input: true}

def get_model(model_name:str,
              r: int = 8,
              lora_alpha: int = 32,
              lora_dropout: float = 0.05,
              init_lora_weights: str = "gaussian", #"gaussian", "pissa", "pissa_niter_{n}", "loftq", False
              use_dora: bool = False,
              use_rslora: bool = False,
              fan_in_fan_out: bool = False,):
    bnb_config = BitsAndBytesConfig(
        load_in_8bit=True,
        bnb_8bit_quant_type="nf8",
        bnb_8bit_compute_dtype=torch.float16
    )


    peft_model = AutoModelForCausalLM.from_pretrained(
        base_model_id,
        trust_remote_code=True,
        # quantization_config=bnb_config
        )

    peft_model = prepare_model_for_kbit_training(peft_model)

    # adapter configuration
    lora_config = LoraConfig(
        target_modules=["q_proj", "k_proj"],
        init_lora_weights=init_lora_weights,
        r=r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        inference_mode=False,
        use_dora=use_dora,
        use_rslora=use_rslora,
        fan_in_fan_out=fan_in_fan_out,
        task_type="CAUSAL_LM",
    )

    # peft_model.add_adapter(lora_config, adapter_name="adapter_1")
    inject_adapter_in_model(lora_config, peft_model, "adapter_1")
    peft_model = get_peft_model(peft_model, lora_config)


    tokenizer = AutoTokenizer.from_pretrained(
        model_name,
        add_special_tokens=True,
        trust_remote_code=True)
    tokenizer.model_input_names=['input_ids', 'attention_mask']
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    tokenizer.padding_side = "right"
    tokenizer.truncation_side = "right"
    return {
        "model": peft_model,
        "tokenizer": tokenizer
        }


Overwriting peft_model.py


#3. Load Dataset

In [30]:
%%writefile finetuning_datasets.py
import numpy as np
from datasets import load_dataset, concatenate_datasets
from transformers import DataCollatorForSeq2Seq, DataCollatorWithPadding, DataCollatorForLanguageModeling

from evaluate import load
from finetuning_datafunctions import formatting, preprocess_function, SumDataCallator


dataset_path = "jonathankang/ENKO-MEDIQA" # @param ["Samsung/samsum", "emozilla/soda_synthetic_dialogue", "frcp/summary-alpaca-v01", "ChuGyouk/Ko-MTS-Dialog", "har1/MTS_Dialogue-Clinical_Note", "316usman/research_clinical_visit_note_summarization_corpus_mts", "jonathankang/MEDICAL-DIALOG-SUMMARY", "jonathankang/ENKO-MEDIQA"] {allow-input: true}
def get_dataset(dataset_name: str,
                tokenizer):
    raw_dataset = load_dataset(
    dataset_path,
    trust_remote_code=True,
    revision="main",  
    )

    metric = load("rouge")
    # full_dataset = concatenate_datasets([raw_dataset["train"], raw_dataset["test"]])
    tokenized_inputs = raw_dataset["train"].map(
        lambda x: tokenizer(x["dialogue"], truncation=True),
        batched=True,
        remove_columns=["dialogue", "summary"])

    input_lenghts = [len(x) for x in tokenized_inputs["input_ids"]]
    # take 85 percentile of max length for better utilization
    max_source_length = int(np.percentile(input_lenghts, 100)) + int(np.percentile(input_lenghts, 10))
    max_source_length = min(4096, max_source_length)

    tokenized_targets = raw_dataset["train"].map(
        lambda x: tokenizer(x["summary"], truncation=True),
        batched=True,
        remove_columns=["dialogue", "summary"])
    target_lenghts = [len(x) for x in tokenized_targets["input_ids"]]
    # take 90 percentile of max length for better utilization
    max_target_length = int(np.percentile(target_lenghts, 100)) + int(np.percentile(target_lenghts, 10))



    en_dataset = raw_dataset.map(
        preprocess_function,
        batched=True,
        remove_columns=["dialogue", "summary", "id", "대화", "요약"],
        fn_kwargs={
            "tokenizer": tokenizer,
            "max_source_length": max_source_length,
            "max_target_length": max_target_length,
            "context_column": "dialogue",
            "target_column": "summary"
            },)
    
    ko_dataset = raw_dataset.map(
        preprocess_function,
        batched=True,
        remove_columns=["dialogue", "summary", "id", "대화", "요약"],
        fn_kwargs={
            "tokenizer": tokenizer,
            "max_source_length": max_source_length,
            "max_target_length": max_target_length,
            "context_column": "대화",
            "target_column": "요약"
            },)

    train_dataset = concatenate_datasets([en_dataset["train"], ko_dataset["train"]])
    test_dataset = concatenate_datasets([en_dataset["test"], ko_dataset["test"]])
    val_dataset = concatenate_datasets([en_dataset["validation"], ko_dataset["validation"]])
    
    # dataset = raw_dataset
    # if any([d for d in dataset.values() if "token_type_ids" in d.features]):
    #     dataset = dataset.map(lambda x: x,
    #                           batched=True,
    #                           remove_columns=["token_type_ids"], )
    return {
        "train":train_dataset,
        "test":test_dataset,
        "val":val_dataset,
        "metric": metric,
        "max_source_length": max_source_length
        }

Overwriting finetuning_datasets.py


In [32]:
%%writefile finetuning_datafunctions.py
import numpy as np
import torch
from datasets import load_dataset, concatenate_datasets
from transformers import DataCollatorForSeq2Seq, DataCollatorWithPadding, DataCollatorForLanguageModeling

from evaluate import load


def formatting(sample,
               max_source_length,
               max_target_length,
               tokenizer,
               padding:str = "max_length",
               context_column: str = "dialogue",
               target_column: str = "summary"):
    # add prefix to the input for t5
    model_inputs, labels = [], []
    for dialogue, summary in zip(sample[context_column], sample[target_column]):
        chat_template = [
            # {
            #     "role": "system",
            #     "content": "You are a friendly chatbot who always responds with summary",
            # },
            {
                "role": "user",
                "content": f"Summarize the following dialogue\n\n{dialogue}"
            },

        ]
        label_template = [{
                "role": "assistant",
                "content": f"{summary}"
        }]

        chat_message = tokenizer.apply_chat_template(conversation=chat_template,
                                                     tokenize=False,
                                                     add_generateion_prompt=False, )
        bot_message = tokenizer.apply_chat_template(conversation=label_template,
                                                    tokenize=False,
                                                    add_generateion_prompt=False, )
        model_inputs.append(chat_message)
        labels.append(bot_message)

    # inputs = ["summarize: " + item for item in sample["dialogue"]]

    # tokenize inputs
    # model_inputs = tokenizer(inputs, max_length=max_source_length, padding=padding, truncation=True, )

    # Tokenize targets with the `text_target` keyword argument
    # labels = tokenizer(text_target=sample["summary"],
    #                    max_length=max_target_length,
    #                    padding=padding,
    #                    truncation=True,)

    # If we are padding here, replace all tokenizer.pad_token_id in the labels by -100 when we want to ignore
    # padding in the loss.
    # if padding == "max_length":
    #     labels["input_ids"] = [
    #         [(l if l != tokenizer.pad_token_id else 1) for l in label] for label in labels["input_ids"]
    #     ]
    # model_inputs["labels"] = labels["input_ids"]
    return model_inputs, labels

def preprocess_function(sample, max_source_length, max_target_length, tokenizer, context_column, target_column):
    templated_text, labels = formatting(sample=sample, 
                                        max_source_length=max_source_length, 
                                        max_target_length=max_target_length, 
                                        tokenizer=tokenizer,
                                        context_column=context_column,
                                        target_column=target_column,)

    return {
        "input_ids": templated_text,
        "labels": labels
    }

class CallatorOutput:
    def __init__(self, input_ids, attention_mask, labels):
        self._input_ids = input_ids
        self._attention_mask = attention_mask
        self._labels = labels

    def __len__(self,):
        return len(self._input_ids)

    def __getitem__(self, key):
        match key:
            case "input_ids":
                return self._input_ids
            case "attention_mask":
                return self._attention_mask
            case "labels":
                return self._labels
            case _:
                raise KeyError(f"Key {key} not found")

    def __setitem__(self, key, value):
        match key:
            case "input_ids":
                self._input_ids = value
            case "attention_mask":
                self._attention_mask = value
            case "labels":
                self._labels = value
            case _:
                raise KeyError(f"Key {key} not found")

    def __iter__(self):
        return iter(self.__dict__.keys())

    def to_dict(self):
        return {
            "input_ids": self["input_ids"],
            "attention_mask": self["attention_mask"],
            "labels": self["labels"]
        }
    def items(self):
        return self.to_dict().items()

    def keys(self):
        return self.to_dict().keys()

    def values(self):
        return self.to_dict().values()

    @property
    def input_ids(self):
        return self._input_ids

    @input_ids.setter
    def input_ids(self, value):
        self._input_ids = value

    @property
    def attention_mask(self):
        return self._attention_mask

    @attention_mask.setter
    def attention_mask(self, value):
        self._attention_mask = value

    @property
    def labels(self):
        return self._labels

    @labels.setter
    def labels(self, value):
        self._labels = value



class SumDataCallator(DataCollatorForLanguageModeling):
    def __init__(self, tokenizer, max_length,):
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    def _tokenizing(self, text):
        return self.tokenizer(text,
                              truncation=True,
                              padding="max_length",
                              max_length=self.max_length,
                              return_tensors="pt")

    def __call__(self, batch):
            input_text = []
            labels = []
            for b in batch:
                input_text += [b["input_ids"]]
                labels += [b["labels"]]
            input_tokens = self._tokenizing(input_text)
            label_tokens = self._tokenizing(labels)

            return CallatorOutput(**{
                "input_ids": input_tokens['input_ids'].to(self.device),
                "attention_mask": input_tokens['attention_mask'].to(self.device),
                "labels": label_tokens['input_ids'].to(self.device),
            })

        # raise Exception("STOP")

Overwriting finetuning_datafunctions.py


#4. Train

In [15]:
%%writefile train.py
import nltk
import numpy as np
import torch
from torch.utils.data import DataLoader
from transformers import TrainingArguments, Trainer, TrainerCallback
from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments
from transformers import DataCollatorForSeq2Seq, DataCollatorWithPadding, DataCollatorForLanguageModeling
from trl import SFTTrainer, SFTConfig
from ignite.metrics import Rouge
from peft_model import peft_model, tokenizer, lora_config
from finetuning_datasets import dataset, metric, max_source_length
from finetuning_datafunctions import SumDataCallator, formatting, preprocess_function

os.environ["WANDB_PROJECT"] = "<summ>"  # Change this to your project name
os.environ["WANDB_LOG_MODEL"] = "checkpoint"  # Log model checkpoints

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
peft_model.to(device)

class ColabTrainer(SFTTrainer):
    pass


# Callback Class
class EarlyStoppingCallback(TrainerCallback):
    def __init__(self, num_steps=150):
        self.num_steps = num_steps

    def on_step_end(self, args, state, control, **kwargs):
        if state.global_step >= self.num_steps:
            control.should_training_stop = True

        return control

# metric function
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    # Replace -100 in the labels as we can't decode them.
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # Rouge expects a newline after each sentence
    decoded_preds = ["\n".join(nltk.sent_tokenize(pred.strip())) for pred in decoded_preds]
    decoded_labels = ["\n".join(nltk.sent_tokenize(label.strip())) for label in decoded_labels]

    result = metric.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True, use_aggregator=True)
    # Extract a few results
    result = {key: value * 100 for key, value in result.items()}

    # Add mean generated length
    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
    result["gen_len"] = np.mean(prediction_lens)

    return {k: round(v, 4) for k, v in result.items()}

data_collator = SumDataCallator(tokenizer, max_length=max_source_length)

training_args = TrainingArguments(
    output_dir="llms",
    learning_rate=2e-5,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    num_train_epochs=4,
    weight_decay=0.01,
    # eval_strategy="epoch",
    save_strategy="no",
    # use_cpu=True,
    # load_best_model_at_end=True,
    remove_unused_columns=False,
    push_to_hub=True,
    logging_steps=1000,
    save_steps=1000,
    warmup_steps=0.03,
    gradient_accumulation_steps=4,
    fp16=True,
    save_total_limit=3,
    logging_dir="llms/logs",
    optim="paged_adamw_8bit",
    report_to="wandb",
    run_name='qwenrun1',
)

# Initialize W&B
wandb.init(
    project="<summ>",  # Change to your project name
    config=training_args
)

trainer = SFTTrainer(
    model=peft_model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    # callbacks=[EarlyStoppingCallback()],
    peft_config=lora_config,
    # formatting_func=formatting,
)

trainer.train()

trainer.save_model("/user/jonathan/TRAINED")
tokenizer.save_pretrained("/user/jonathan/TRAINED")
# # Save the model, tokenizer and push everything to the Hub
# trainer.save_model()
# tokenizer.save_pretrained(training_args.output_dir)

# trainer.push_to_hub()
# tokenizer.push_to_hub(training_args.output_dir)


Overwriting train.py


In [14]:
%%writefile deepspeed_config.yaml
compute_environment: LOCAL_MACHINE
debug: false
deepspeed_config:
  deepspeed_multinode_launcher: standard
  gradient_accumulation_steps: 4
  offload_optimizer_device: none
  offload_param_device: none
  zero3_init_flag: true
  zero3_save_16bit_model: true
  zero_stage: 2
distributed_type: DEEPSPEED
downcast_bf16: 'no'
machine_rank: 0
main_training_function: main
mixed_precision: bf16
num_machines: 1
num_processes: 1
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false

Overwriting deepspeed_config.yaml


In [None]:
# Cell for setting up the environment and launching training with accelerate
!export CUDA_VISIBLE_DEVICES=0
!export CUDA_LAUNCH_BLOCKING=1
!export TORCH_USE_CUDA_DSA=0
!export HF_DATASETS_CACHE='/content/hf_cache/'
!accelerate launch --config_file "deepspeed_config.yaml" train.py



## Training code to Lightning module

In [13]:
%%writefile l_datamodule.py
#@title Lightning Data Moudle
#@markdown Lightning Data Loading Modules

import lightning as L
import torch
from torch.utils.data import DataLoader

from transformers import DataCollatorForLanguageModeling
from finetuning_datafunctions import SumDataCallator


class FTDataModule(L.LightningDataModule):
    def __init__(self,
                 train_dataset,
                 val_dataset,
                 test_dataset,
                 data_collator:DataCollatorForLanguageModeling,
                 train_batch_size: int = 1,
                 eval_batch_size:int = 1,
                 training_args: dict = {}):
        super().__init__()
        self.train_dataset = train_dataset
        self.val_dataset = val_dataset
        self.test_dataset = test_dataset
        self.data_collator = data_collator
        self.train_batch_size = train_batch_size
        self.eval_batch_size = eval_batch_size
        self.training_args = training_args

    def _get_dataloader(self, dataset, eval_mode: bool = False):
        return DataLoader(dataset=dataset,
                          batch_size=self.train_batch_size if eval_mode else self.eval_batch_size,
                          shuffle=not eval_mode,
                          num_workers=0,
                          collate_fn=self.data_collator)

    def train_dataloader(self):
        return self._get_dataloader(dataset=self.train_dataset)

    def val_dataloader(self):
        return self._get_dataloader(dataset=self.val_dataset, eval_mode=True)

    def test_dataloader(self):
        return self._get_dataloader(dataset=self.test_dataset, eval_mode=True)


Overwriting l_datamodule.py


In [12]:
%%writefile l_model.py
#@title Lightning Model
#@markdown Lightning Modules and Training step

import lightning as L
import torch

from transformers import DataCollatorForSeq2Seq
from bitsandbytes.optim import AdamW, Lion, PagedAdamW, PagedLion, LARS
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingWarmRestarts
from torchmetrics.functional.text.rouge import rouge_score


class LLamaFTLightningModule(L.LightningModule):
    def __init__(self,
                 #data_collator,
                 peft_model,
                 tokenizer,
                 learning_rate: float = 2e-5 ):
        super().__init__()
        self.save_hyperparameters(ignore=['peft_model'])
        self.model = peft_model
        self.tokenizer = tokenizer
        # self.data_collator = DataCollatorForSeq2Seq(tokenizer, model=peft_model)
        self.learning_rate = learning_rate

    def _get_rouge_score(self, predictions, labels):
        generated_tokens = predictions.argmax(dim=-1)
        decoded_labels = self.tokenizer.batch_decode(labels, skip_special_tokens=True)
        decoded_preds = self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
        return rouge_score(preds=decoded_preds, target=decoded_labels)

    def _log(self, log_name, value, batch_size):
        self.log(
            log_name,
            value if value.device == self.model.device else value.to(self.model.device),
            prog_bar=True,
            on_step=True,
            on_epoch=True,
            batch_size=batch_size,
            sync_dist=True)

    def _batch_device_correction(self, batch):
        for k, v in batch.items():
            if v.device != self.model.device:
                batch[k] = v.to(self.model.device)
        return batch

    def forward(self, input_ids, attention_mask, labels):
        # print(input_ids.shape, input_ids.min(), input_ids.max())
        return self.model(**{
            "input_ids":input_ids,
            "attention_mask":attention_mask,
            "labels": labels
            })

    def training_step(self, batch, batch_idx):
        batch = self._batch_device_correction(batch)
        outputs = self(input_ids=batch.input_ids,
                       attention_mask=batch.attention_mask,
                       labels=batch.labels)
        rouge_score = self._get_rouge_score(outputs.logits, batch.labels)
        loss = outputs.loss
        self._log("train_loss", loss, self.trainer.datamodule.train_batch_size,)
        for k, v in rouge_score.items():
            self._log(f"train_{k}", v, self.trainer.datamodule.train_batch_size,)

        return loss

    def validation_step(self, batch, batch_idx):
        batch = self._batch_device_correction(batch)
        outputs = self(input_ids=batch.input_ids,
                       attention_mask=batch.attention_mask,
                       labels=batch.labels)
        rouge_score = self._get_rouge_score(outputs.logits, batch.labels)
        val_loss = outputs.loss
        self._log("val_loss", val_loss, self.trainer.datamodule.eval_batch_size,)
        for k, v in rouge_score.items():
            self._log(f"val_{k}", v, self.trainer.datamodule.eval_batch_size,)

    def configure_optimizers(self):
        optimizer = PagedLion(params=self.model.parameters(),
                              lr=self.learning_rate,
                              weight_decay=0.01,
                              optim_bits=32,)
        scheduler = CosineAnnealingWarmRestarts(optimizer,
                                                T_0=10,
                                                T_mult=2,
                                                eta_min=0.00001)
        # scheduler = ReduceLROnPlateau(optimizer=optimizer, mode="min")
        return {
            "optimizer": optimizer,
            "lr_scheduler": {
                "scheduler": scheduler,
                "monitor": "val_loss",
                "interval": "step",
                "frequency": 1,

            },
        }


Overwriting l_model.py


In [11]:
%%writefile l_trainer.py
#@title Trainer
#@markdown Lightning cli trainer

import os
import lightning as L
from lightning.pytorch import Trainer
from lightning.pytorch.cli import LightningCLI, LightningArgumentParser
from lightning.pytorch.strategies.deepspeed import DeepSpeedStrategy
from lightning.pytorch.loggers import WandbLogger, TensorBoardLogger
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor, ModelCheckpoint, TQDMProgressBar

import ray
from ray.train.lightning import (RayTrainReportCallback, RayDeepSpeedStrategy,
                                 RayLightningEnvironment, prepare_trainer)
from ray.train.torch import TorchTrainer

from transformers import DataCollatorForSeq2Seq

from l_datamodule import FTDataModule
from l_model import LLamaFTLightningModule
from peft_model import get_model, base_model_id
from finetuning_datasets import get_dataset, dataset_path
from finetuning_datafunctions import SumDataCallator


# is Lightning able?
# L.pytorch.cli_lightning_logo()

os.environ["TOKENIZERS_PARALLELISM"] = "0"


def l2ray_trainer():
    model_data = get_model(model_name=base_model_id)
    peft_model = model_data["model"]
    tokenizer = model_data["tokenizer"]

    dataset_data = get_dataset(dataset_name=dataset_path, tokenizer=tokenizer)
    dataset = dataset_data["dataset"]
    metric = dataset_data["metric"]
    max_source_length = dataset_data["max_source_length"]
    trainer = Trainer(
        # fast_dev_run=True,
        max_epochs=10,
        devices="auto",
        accelerator="auto",
        accumulate_grad_batches=4,
        precision="bf16-mixed",
        default_root_dir=f"{os.getcwd()}/checkpoints",
        # strategy='deepspeed',
        strategy=RayDeepSpeedStrategy(
            stage=2,
            ),
        plugins=[
            RayLightningEnvironment()
            ],
        callbacks=[
            RayTrainReportCallback(),
            EarlyStopping(monitor="val_loss"),
            LearningRateMonitor(),
            TQDMProgressBar(refresh_rate=5),
            ModelCheckpoint(
                save_top_k=3,
                monitor='val_loss',
                mode='min',
                save_weights_only=True,  # 가중치만 저장
                save_on_train_epoch_end=True,
                dirpath="checkpoint",
                filename=f'{base_model_id}_PEFT_ckpt'
                )
        ],
        logger=[
            WandbLogger(project="LLM-Finetuning"),
            TensorBoardLogger(save_dir=os.getcwd(), version=1, name="lightning_logs")
        ],
    )
    trainer = prepare_trainer(trainer=trainer)
    trainer.fit(
        model=LLamaFTLightningModule(
            peft_model=peft_model,
            tokenizer=tokenizer,),
        datamodule=FTDataModule(
            train_dataset=dataset["train"],
            val_dataset=dataset["test"],
            test_dataset=dataset["test"],
            data_collator=SumDataCallator(tokenizer, max_length=max_source_length),
            train_batch_size=2,
            eval_batch_size=1))


ray_trainer = TorchTrainer(
    l2ray_trainer,
    scaling_config=ray.train.ScalingConfig(
        num_workers=1,
        use_gpu=True,
        # resources_per_worker={ "CPU": 8, "GPU": 1, },
        # trainer_resources={ "CPU": 8, "GPU": 1 },
        accelerator_type="A100",

        ),
    run_config=ray.train.RunConfig(
        checkpoint_config=ray.train.CheckpointConfig(
            num_to_keep=3,
            checkpoint_score_attribute="val_loss",
            checkpoint_score_order="min",
        ),
    )
)
ray.init(
    num_cpus=8,
    ignore_reinit_error=True,
)
result = ray_trainer.fit()

print(result)


Overwriting l_trainer.py


In [33]:
%%writefile l_sweep.py
#@title Wandb Sweep
#@markdown Lightning cli Sweep

import os
import lightning as L
from lightning.pytorch import Trainer
from lightning.pytorch.cli import LightningCLI, LightningArgumentParser
from lightning.pytorch.strategies.deepspeed import DeepSpeedStrategy
from lightning.pytorch.loggers import WandbLogger, TensorBoardLogger
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor, TQDMProgressBar, ModelCheckpoint

import ray
from ray.train.lightning import (RayTrainReportCallback, RayDeepSpeedStrategy,
                                 RayLightningEnvironment, prepare_trainer)
from ray.train.torch import TorchTrainer

from transformers import DataCollatorForSeq2Seq
import wandb
import gc

from l_datamodule import FTDataModule
from l_model import LLamaFTLightningModule
from peft_model import get_model, hf_model_list
from finetuning_datasets import get_dataset, dataset_path
from finetuning_datafunctions import SumDataCallator


# is Lightning able?
L.pytorch.cli_lightning_logo()

os.environ["TOKENIZERS_PARALLELISM"] = "0"

os.environ["WANDB_PROJECT_NAME"] = "LLM-Finetuning"

def l2ray_trainer():
    wandb.init(
        project="LLM-Finetuning")
    config = wandb.config

    model_data = get_model(
        model_name=config.model_name,
        r=config.lora_rank,
        lora_alpha=config.lora_alpha,
        lora_dropout=config.lora_dropout,
        init_lora_weights=config.init_lora_weights
        )
    peft_model = model_data["model"]
    tokenizer = model_data["tokenizer"]

    dataset = get_dataset(dataset_name=dataset_path, tokenizer=tokenizer)
    #dataset = dataset_data["dataset"]
    metric = dataset["metric"]
    max_source_length = dataset["max_source_length"]
    trainer = Trainer(
        devices="auto",
        accelerator="auto",
        max_epochs=config.epochs,
        accumulate_grad_batches=config.accumulate_grad_batches,
        gradient_clip_val=config.gradient_clip_val,
        precision="bf16-mixed",
        strategy="deepspeed",
        # enable_checkpointing=False,
        # strategy=RayDeepSpeedStrategy(
        #     stage=2,
        #     contiguous_memory_optimization=False
        #     ),
        plugins=[
            # RayLightningEnvironment()
        ],
        callbacks=[
            # RayTrainReportCallback(),
            EarlyStopping(monitor="val_loss"),
            TQDMProgressBar(refresh_rate=5),
            LearningRateMonitor(),
            ModelCheckpoint(
                save_top_k=3,
                monitor='val_loss',
                mode='min',
                save_weights_only=True,  # 가중치만 저장
                save_on_train_epoch_end=True,
                dirpath="checkpoint",
                filename=f'{config.model_name}_PEFT_ckpt'
                )
        ],
        logger=[
            WandbLogger(project=os.getenv("WANDB_PROJECT_NAME")),
            TensorBoardLogger(save_dir=os.getcwd(), version=1, name="lightning_logs")
        ],
    )
    # trainer = prepare_trainer(trainer=trainer)
    trainer.fit(
        model=LLamaFTLightningModule(
            peft_model=peft_model,
            learning_rate=config.lr,
            tokenizer=tokenizer,),
        datamodule=FTDataModule(
            train_dataset=dataset["train"].shard(num_shards=10, index=0),
            val_dataset=dataset["val"].shard(num_shards=10, index=0),
            test_dataset=dataset["test"].shard(num_shards=10, index=1),
            data_collator=SumDataCallator(tokenizer, max_length=max_source_length),
            train_batch_size=2,
            eval_batch_size=1))
    gc.collect()
    del trainer


def ray_wrapped_trainer():
    ray_trainer = TorchTrainer(
        l2ray_trainer,
        scaling_config=ray.train.ScalingConfig(
            num_workers=1,
            use_gpu=True,
            resources_per_worker={ "CPU": 8, "GPU": 1, },
            trainer_resources={ "CPU": 8, "GPU": 1 },
            accelerator_type="L4",
            ),
        run_config=ray.train.RunConfig(
            checkpoint_config=ray.train.CheckpointConfig(
                num_to_keep=1,
                checkpoint_score_attribute="val_loss",
                checkpoint_score_order="min",
            ),
        )
    )
    ray.init(
        num_cpus=8,
        ignore_reinit_error=True,
    )
    result = ray_trainer.fit()

wandb.agent(
    sweep_id=wandb.sweep(
        {
            "method": "random",
            "name": "sweep",
            "metric": {
                "goal": "maximize",
                "name": "val_rouge1_fmeasure_epoch"
                },
            "parameters": {
                "model_name": {
                    "values": hf_model_list
                    },
                "epochs": {
                    "values": [1]
                    },
                "lr": {
                    "max": 5e-4,
                    "min": 5e-5
                    },
                "accumulate_grad_batches": {
                    "min": 1,
                    "max": 8
                    },
                "gradient_clip_val": {
                    "min": 0.1,
                    "max": 1.0
                    },
                "lora_rank":{
                    "values": [2, 4, 8, 16, 32]
                },
                "lora_alpha":{
                    "values": [16, 32, 64, 128]
                },
                "lora_dropout":{
                    "min": 0.05,
                    "max": 0.1
                },
                "init_lora_weights":{
                    "values": ["gaussian", "pissa", "pissa_niter_16","pissa_niter_32", "loftq", False]
                }
            },
        },
        project=os.getenv("WANDB_PROJECT_NAME"),
    ),
    function=l2ray_trainer,
    # function=ray_wrapped_trainer,
    count=len(hf_model_list),
    project=os.getenv("WANDB_PROJECT_NAME"))

# print(result)


Overwriting l_sweep.py


In [None]:
#@title Start Training
# %%writefile run_train
#@markdown 실험 결과
#@markdown
#@markdown * batch_size <b>2</b> 넘기는 경우 OOM
#@markdown * DeepSpeed의 경우 GPU Ram 20GB로 7B finetuning 가능
#@markdown * DeepSpeed의 경우, 7B L4 GPU에서 사용 가능
#@markdown * 70B의 경우 RAM에서 Weight 가져오다 OOM
#@markdown * 1.5B T4 GPU에서 성공

!export CUDA_VISIBLE_DEVICES=0 && export CUDA_LAUNCH_BLOCKING=1 && python l_trainer.py



In [56]:
!export CUDA_VISIBLE_DEVICES=0 && export CUDA_LAUNCH_BLOCKING=1 && python l_sweep.py



[0;35m
                    ####
                ###########
             ####################
         ############################
    #####################################
##############################################
#########################  ###################
#######################    ###################
####################      ####################
##################       #####################
################        ######################
#####################        #################
######################     ###################
#####################    #####################
####################   #######################
###################  #########################
##############################################
    #####################################
         ############################
             ####################
                  ##########
                     ####
[0m

Create sweep with ID: u0ifdpln
Sweep URL: https://wandb.ai/j0ntendo-yonsei-university

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Model Saving

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from optimum.onnxruntime import ORTModelForCausalLM

# Define paths
model_checkpoint = "./saved_model"  # Directory where your PyTorch model is saved
save_directory = "./onnx_model"     # Directory where you want to save the ONNX model

# Load the PyTorch model and tokenizer
model = AutoModelForCausalLM.from_pretrained(model_checkpoint)
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

# Convert the PyTorch model to ONNX
ort_model = ORTModelForCausalLM.from_pretrained(model_checkpoint, export=True)

# Save the ONNX model
ort_model.save_pretrained(save_directory)
