<a href="https://colab.research.google.com/github/catyung/AI-Marketer/blob/main/nl2sql/Flan_T5_SQL_LLM_LoftQ_Fine_Tune.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install

In [None]:
!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q -U git+https://github.com/huggingface/datasets
!pip -q install sentencepiece xformers einops loralib

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [None]:
! pip install -q rouge_score

In [None]:
!pip install wandb



# Create model LoftQ

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
# Modified from https://github.com/yxli2123/LoftQ/blob/main/quantize_save.py#L122
import os

import torch
import torch.nn as nn
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

from peft import LoftQConfig, LoraConfig, TaskType, get_peft_model

class Shell(nn.Module):
    def __init__(self, weight, bias=None):
        super().__init__()
        self.weight = nn.Parameter(weight, requires_grad=False)
        if bias is not None:
            self.bias = nn.Parameter(bias, requires_grad=False)


def unwrap_model(model, sub_module_name=".base_layer"):
    sub_module_name_list = [k.split(sub_module_name)[0] for k in model.state_dict().keys() if sub_module_name in k]
    sub_module_name_set = set(sub_module_name_list)
    for name in sub_module_name_set:
        # get the parent of the submodule
        name_parent = ".".join(name.split(".")[:-1])
        name_child = name.split(".")[-1]
        sub_module = model.get_submodule(name_parent)
        print(sub_module)

        # replace with shell
        child = getattr(sub_module, name_child)
        weight = getattr(child.base_layer, "weight", None)
        bias = getattr(child.base_layer, "bias", None)
        shell = Shell(weight, bias)

        setattr(sub_module, name_child, shell)

def quantize_t5(model_id, bits, iter, rank, save_repo):
    # Download weights and configure LoRA
    # This only works for t5. If its something else, you have to consult the original script
    tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
    task_type = TaskType.SEQ_2_SEQ_LM
    target_modules = ["q", "k", "v", "o", "wi_0", "wi_1", "wo"]

    # Config of LoftQ
    loftq_config = LoftQConfig(loftq_bits=bits, loftq_iter=iter)

    lora_config = LoraConfig(
        task_type=task_type,
        inference_mode=True,
        r=rank,
        lora_alpha=16 if task_type is TaskType.CAUSAL_LM else rank,
        lora_dropout=0.1,
        target_modules=target_modules,
        init_lora_weights="loftq",
        loftq_config=loftq_config,
    )

    # Obtain LoftQ model
    lora_model = get_peft_model(model, lora_config)
    return lora_model, tokenizer

In [None]:
base_model_dir = quantize_t5("google/flan-t5-large", 4, 5, 64)

# Load LoftQ model

In [None]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
# Load the model
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel

model_id = "darinchau/flan-t5-4bit-64rank"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    load_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForSeq2SeqLM.from_pretrained(
    model_id,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config,
)

model = PeftModel.from_pretrained(
    model,
    model_id,
    subfolder="loft_init",
    is_trainable=True,
)

tokenizer = AutoTokenizer.from_pretrained(
    model_id,
    padding_side="left",
    add_eos_token=True
)

tokenizer.pad_token = tokenizer.eos_token

adapter_config.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/293M [00:00<?, ?B/s]

loft_init/adapter_config.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/293M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/20.8k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

# Dataset

In [None]:
from datasets import load_dataset

dataset = load_dataset("wikisql")

Downloading data:   0%|          | 0.00/7.71M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/3.63M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/25.2M [00:00<?, ?B/s]

Generating test split:   0%|          | 0/15878 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/8421 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/56355 [00:00<?, ? examples/s]

In [None]:
train_dataset = dataset['train']
eval_dataset = dataset['validation']

In [None]:
def generate_prompt(data):
    # Generate the model prompt based on the table data
    query = data['question']
    headers = data['table']['header']
    types = data['table']['types']
    row = data['table']['rows'][0]
    # Turns the schema into something that looks like a json dict
    schema = ""
    for h, t, v in zip(headers, types, row):
        if t == "text":
            schema += f"'{h}': '{v}',\n"
        elif t == "real":
            schema += f"'{h}': {v},\n" # t == "real"
        else:
            raise NotImplementedError(f"Schema not implemented for type: {t}")
    schema = schema[:-1] # Remove the whitespace character

    prompt = f"""
Suppose an SQL table has the following columns: {', '.join(headers)}. Translate the query to SQL based on the table provided: {query}""".strip()
    return {
        "input": prompt,
        "target": data['sql']['human_readable']
    }

print(generate_prompt(train_dataset[0]))

In [None]:
train_dataset = train_dataset.shuffle().map(generate_prompt, remove_columns=train_dataset.column_names)
eval_dataset = eval_dataset.shuffle().map(generate_prompt, remove_columns=eval_dataset.column_names)

Map:   0%|          | 0/56355 [00:00<?, ? examples/s]

Map:   0%|          | 0/8421 [00:00<?, ? examples/s]

In [None]:
# tokenize the examples
def convert_to_features(example_batch):
    input_encodings = tokenizer.batch_encode_plus(example_batch['input'], pad_to_max_length=True, max_length=64)
    target_encodings = tokenizer.batch_encode_plus(example_batch['target'], pad_to_max_length=True, max_length=64)

    encodings = {
        'input_ids': input_encodings['input_ids'],
        'attention_mask': input_encodings['attention_mask'],
        'labels': target_encodings['input_ids'],
        'decoder_attention_mask': target_encodings['attention_mask']
    }

    return encodings

In [None]:
train_data = train_dataset.map(convert_to_features, batched=True, remove_columns=train_dataset.column_names)
eval_data = eval_dataset.map(convert_to_features, batched=True, remove_columns=eval_dataset.column_names)

columns = ['input_ids', 'attention_mask', 'labels', 'decoder_attention_mask']

train_data.set_format(type='torch', columns=columns)
eval_data.set_format(type='torch', columns=columns)

Map:   0%|          | 0/56355 [00:00<?, ? examples/s]

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Map:   0%|          | 0/8421 [00:00<?, ? examples/s]

# Training

In [None]:
import os
os.environ["WANDB_PROJECT"] = "flan5-finetuning"

import wandb
wandb.login(key=)

[34m[1mwandb[0m: Currently logged in as: [33myfdchau[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [None]:
from datasets import load_metric
rouge = load_metric("rouge")

def compute_metrics(pred):
    labels_ids = pred.label_ids
    pred_ids = pred.predictions

    # all unnecessary tokens are removed
    pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
    labels_ids[labels_ids == -100] = tokenizer.pad_token_id
    label_str = tokenizer.batch_decode(labels_ids, skip_special_tokens=True)

    rouge_output = rouge.compute(predictions=pred_str, references=label_str, rouge_types=["rouge2"])["rouge2"].mid

    return {
        "rouge2_precision": round(rouge_output.precision, 4),
        "rouge2_recall": round(rouge_output.recall, 4),
        "rouge2_fmeasure": round(rouge_output.fmeasure, 4),
    }

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


In [None]:
from transformers import Seq2SeqTrainer
from transformers import Seq2SeqTrainingArguments

training_args = Seq2SeqTrainingArguments(
    per_device_train_batch_size=16,
    num_train_epochs=5,
    per_device_eval_batch_size=16,
    predict_with_generate=True,
    evaluation_strategy="epoch",
    do_train=True,
    do_eval=True,
    save_strategy="epoch",
    #save_steps=1000,
    #eval_steps=1000,
    overwrite_output_dir=True,
    save_total_limit=3,
    load_best_model_at_end=True,
    push_to_hub=True,
    #fp16=True,
    learning_rate=3e-4,
    #bf16=True,
    logging_steps=10,
    output_dir="darinchau/flan-t5-4bit-64rank",
    report_to="wandb"
)

In [None]:
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
    train_dataset=train_data,
    eval_dataset=eval_data,
)

trainer.train()