# IA3

对K, V, FFN进行缩放，学习对应缩放因子，理解为放大关键特征，抑制噪声

In [1]:
!pip install peft

Looking in indexes: https://mirrors.aliyun.com/pypi/simple
Collecting peft
  Downloading https://mirrors.aliyun.com/pypi/packages/6a/7b/d10c594d1afd828a0b359bb860965e261e840764fe2672d14a647f5889ee/peft-0.16.0-py3-none-any.whl (472 kB)
Installing collected packages: peft
Successfully installed peft-0.16.0
[0m

In [1]:
import torch

from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForSeq2Seq

In [2]:
dataset = Dataset.load_from_disk("../../datas/alpaca_data_zh")

In [3]:
tokenizer = AutoTokenizer.from_pretrained("../../models/bloom-1b4")

In [4]:
def process_func(example):
    MAX_LENGTH = 512

    instruction = "\n".join(["Human: " + example["instruction"], example["input"]]).strip() + "\n\nAssistant: "
    inputs = tokenizer(instruction)
    response = tokenizer(example["output"])
    input_ids = inputs["input_ids"] + response["input_ids"] + [tokenizer.eos_token_id]
    attention_mask = inputs["attention_mask"] + response["attention_mask"] + [1]
    labels = [-100] * len(inputs["input_ids"]) + response["input_ids"] + [tokenizer.eos_token_id]

    if len(input_ids) > MAX_LENGTH:
        input_ids = input_ids[:MAX_LENGTH]
        attention_mask = attention_mask[:MAX_LENGTH]
        labels = labels[:MAX_LENGTH]

    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels,
    }

In [5]:
tokenized_data = dataset.map(process_func, remove_columns=dataset.column_names)

In [6]:
model = AutoModelForCausalLM.from_pretrained("../../models/bloom-1b4")

In [7]:
for name, params in model.named_parameters():
    print(name, params.dtype)

transformer.word_embeddings.weight torch.float32
transformer.word_embeddings_layernorm.weight torch.float32
transformer.word_embeddings_layernorm.bias torch.float32
transformer.h.0.input_layernorm.weight torch.float32
transformer.h.0.input_layernorm.bias torch.float32
transformer.h.0.self_attention.query_key_value.weight torch.float32
transformer.h.0.self_attention.query_key_value.bias torch.float32
transformer.h.0.self_attention.dense.weight torch.float32
transformer.h.0.self_attention.dense.bias torch.float32
transformer.h.0.post_attention_layernorm.weight torch.float32
transformer.h.0.post_attention_layernorm.bias torch.float32
transformer.h.0.mlp.dense_h_to_4h.weight torch.float32
transformer.h.0.mlp.dense_h_to_4h.bias torch.float32
transformer.h.0.mlp.dense_4h_to_h.weight torch.float32
transformer.h.0.mlp.dense_4h_to_h.bias torch.float32
transformer.h.1.input_layernorm.weight torch.float32
transformer.h.1.input_layernorm.bias torch.float32
transformer.h.1.self_attention.query_key_

In [8]:
from peft import IA3Config, TaskType, get_peft_model

config = IA3Config(
    task_type=TaskType.CAUSAL_LM,
)

In [9]:
model = get_peft_model(model, config)

In [10]:
model

PeftModelForCausalLM(
  (base_model): IA3Model(
    (model): BloomForCausalLM(
      (transformer): BloomModel(
        (word_embeddings): Embedding(46145, 2048)
        (word_embeddings_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
        (h): ModuleList(
          (0-23): 24 x BloomBlock(
            (input_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
            (self_attention): BloomAttention(
              (query_key_value): Linear(
                (base_layer): Linear(in_features=2048, out_features=6144, bias=True)
                (ia3_l): ParameterDict(  (default): Parameter containing: [torch.FloatTensor of size 6144x1])
              )
              (dense): Linear(in_features=2048, out_features=2048, bias=True)
              (attention_dropout): Dropout(p=0.0, inplace=False)
            )
            (post_attention_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
            (mlp): BloomMLP(
              (dense_

In [11]:
model.print_trainable_parameters()

trainable params: 344,064 || all params: 1,303,455,744 || trainable%: 0.0264


In [18]:
args = TrainingArguments(
    output_dir="../../caches/PEFT-lora",
    per_device_train_batch_size=16,
    gradient_accumulation_steps=2,
    logging_steps=50,
    num_train_epochs=1,
)

In [19]:
trainer = Trainer(
    model=model,
    args=args,
    tokenizer=tokenizer,
    train_dataset=tokenized_data,
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer),
)

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [20]:
trainer.train()

OutOfMemoryError: CUDA out of memory. Tried to allocate 74.00 MiB. GPU 0 has a total capacity of 4.00 GiB of which 0 bytes is free. Of the allocated memory 10.17 GiB is allocated by PyTorch, and 426.91 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
model = model.cuda()
ipt = tokenizer("Human: {}\n{}".format("考试有哪些技巧？", "").strip() + "\n\nAssistant: ", return_tensors="pt").to(model.device)
tokenizer.decode(model.generate(**ipt, max_length=128, do_sample=True)[0], skip_special_tokens=True)