huggingface · younesbelkada · Jun 26, 2023 · Jun 19, 2023
diff --git a/src/peft/tuners/adalora.py b/src/peft/tuners/adalora.py
@@ -1,4 +1,3 @@
-import importlib
 import re
 import warnings
 from dataclasses import dataclass, field
@@ -9,6 +8,7 @@
 import torch.nn.functional as F
 from transformers.pytorch_utils import Conv1D
 
+from ..import_utils import is_bnb_4bit_available, is_bnb_available
 from ..utils import (
     TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING,
     PeftType,
@@ -24,10 +24,6 @@
 )
 
 
-def is_bnb_available():
-    return importlib.util.find_spec("bitsandbytes") is not None
-
-
 if is_bnb_available():
     import bitsandbytes as bnb
 
@@ -128,7 +124,9 @@ def add_adapter(self, adapter_name, config=None):
     def _find_and_replace(self, adapter_name):
         lora_config = self.peft_config[adapter_name]
         loaded_in_8bit = getattr(self.model, "is_loaded_in_8bit", False)
-        if loaded_in_8bit and not is_bnb_available():
+        loaded_in_4bit = getattr(self.model, "is_loaded_in_4bit", False)
+
+        if (loaded_in_8bit or loaded_in_4bit) and not is_bnb_available():
             raise ImportError(
                 "To use Lora with 8-bit quantization, please install the `bitsandbytes` package. "
                 "You can install it with `pip install bitsandbytes`."
@@ -173,6 +171,18 @@ def _find_and_replace(self, adapter_name):
                         new_module = SVDLinear8bitLt(
                             adapter_name, target.in_features, target.out_features, bias=bias, **kwargs
                         )
+                    elif loaded_in_4bit and is_bnb_4bit_available() and isinstance(target, bnb.nn.Linear4bit):
+                        fourbit_kwargs = kwargs.copy()
+                        fourbit_kwargs.update(
+                            {
+                                "compute_dtype": target.compute_dtype,
+                                "compress_statistics": target.weight.compress_statistics,
+                                "quant_type": target.weight.quant_type,
+                            }
+                        )
+                        new_module = SVDLinear4bit(
+                            adapter_name, target.in_features, target.out_features, bias=bias, **fourbit_kwargs
+                        )
                     else:
                         if isinstance(target, torch.nn.Linear):
                             in_features, out_features = target.in_features, target.out_features
@@ -230,7 +240,10 @@ def forward(self, *args, **kwargs):
                     I.requires_grad = False
                     num_param += 1
                     regu_loss += torch.norm(para_cov - I, p="fro")
-            regu_loss = regu_loss / num_param
+            if num_param > 0:
+                regu_loss = regu_loss / num_param
+            else:
+                regu_loss = 0
             outputs.loss += orth_reg_weight * regu_loss
         return outputs
 
@@ -507,7 +520,69 @@ def forward(self, x: torch.Tensor):
                         * self.scaling[self.active_adapter]
                         / (self.ranknum[self.active_adapter] + 1e-5)
                     )
-                result += output
+                result = result + output
+            return result
+
+    class SVDLinear4bit(bnb.nn.Linear4bit, AdaLoraLayer):
+        # Low-rank matrix for SVD-based adaptation
+        def __init__(
+            self,
+            adapter_name,
+            in_features,
+            out_features,
+            r: int = 0,
+            lora_alpha: int = 1,
+            lora_dropout: float = 0.0,
+            **kwargs,
+        ):
+            bnb.nn.Linear4bit.__init__(
+                self,
+                in_features,
+                out_features,
+                bias=kwargs.get("bias", True),
+                compute_dtype=kwargs.get("compute_dtype", torch.float32),
+                compress_statistics=kwargs.get("compress_statistics", True),
+                quant_type=kwargs.get("quant_type", "nf4"),
+            )
+            AdaLoraLayer.__init__(self, in_features=in_features, out_features=out_features)
+            # Freezing the pre-trained weight matrix
+            self.weight.requires_grad = False
+
+            init_lora_weights = kwargs.pop("init_lora_weights", True)
+            self.update_layer(adapter_name, r, lora_alpha, lora_dropout, init_lora_weights)
+            self.active_adapter = adapter_name
+
+        def forward(self, x: torch.Tensor):
+            result = super().forward(x)
+
+            if self.disable_adapters or self.active_adapter not in self.lora_A.keys():
+                return result
+            elif self.r[self.active_adapter] > 0:
+                if not torch.is_autocast_enabled():
+                    expected_dtype = result.dtype
+
+                    if x.dtype != torch.float32:
+                        x = x.float()
+                    output = (
+                        (
+                            self.lora_dropout[self.active_adapter](x)
+                            @ (self.lora_A[self.active_adapter] * self.lora_E[self.active_adapter]).T
+                            @ self.lora_B[self.active_adapter].T
+                        ).to(expected_dtype)
+                        * self.scaling[self.active_adapter]
+                        / (self.ranknum[self.active_adapter] + 1e-5)
+                    )
+                else:
+                    output = (
+                        (
+                            self.lora_dropout[self.active_adapter](x)
+                            @ (self.lora_A[self.active_adapter] * self.lora_E[self.active_adapter]).T
+                            @ self.lora_B[self.active_adapter].T
+                        )
+                        * self.scaling[self.active_adapter]
+                        / (self.ranknum[self.active_adapter] + 1e-5)
+                    )
+                result = result + output
             return result
 
 

diff --git a/tests/test_gpu_examples.py b/tests/test_gpu_examples.py
@@ -37,7 +37,13 @@
     WhisperTokenizer,
 )
 
-from peft import LoraConfig, get_peft_model, prepare_model_for_int8_training
+from peft import (
+    AdaLoraConfig,
+    LoraConfig,
+    get_peft_model,
+    prepare_model_for_int8_training,
+    prepare_model_for_kbit_training,
+)
 
 from .testing_utils import require_bitsandbytes, require_torch_gpu, require_torch_multi_gpu
 
@@ -80,10 +86,10 @@ def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) ->
 
 @require_torch_gpu
 @require_bitsandbytes
-class PeftInt8GPUExampleTests(unittest.TestCase):
+class PeftBnbGPUExampleTests(unittest.TestCase):
     r"""
-    A single GPU int8 test suite, this will test if training fits correctly on a single GPU device (1x NVIDIA T4 16GB)
-    using bitsandbytes.
+    A single GPU int8 + fp4 test suite, this will test if training fits correctly on a single GPU device (1x NVIDIA T4
+    16GB) using bitsandbytes.
 
     The tests are the following:
 
@@ -168,6 +174,67 @@ def test_causal_lm_training(self):
             # assert loss is not None
             self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
 
+    @pytest.mark.single_gpu_tests
+    @require_torch_gpu
+    def test_4bit_adalora_causalLM(self):
+        r"""
+        Tests the 4bit training with adalora
+        """
+        model_id = "facebook/opt-350m"
+
+        model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True)
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+
+        model.gradient_checkpointing_enable()
+        model = prepare_model_for_kbit_training(model)
+
+        peft_config = AdaLoraConfig(
+            init_r=6,
+            target_r=4,
+            tinit=50,
+            tfinal=100,
+            deltaT=5,
+            beta1=0.3,
+            beta2=0.3,
+            orth_reg_weight=0.2,
+            lora_alpha=32,
+            lora_dropout=0.05,
+            bias="none",
+            task_type="CAUSAL_LM",
+        )
+
+        model = get_peft_model(model, peft_config)
+
+        data = load_dataset("ybelkada/english_quotes_copy")
+        data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True)
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            trainer = Trainer(
+                model=model,
+                train_dataset=data["train"],
+                args=TrainingArguments(
+                    per_device_train_batch_size=4,
+                    gradient_accumulation_steps=4,
+                    warmup_steps=2,
+                    max_steps=3,
+                    learning_rate=2e-4,
+                    fp16=True,
+                    logging_steps=1,
+                    output_dir=tmp_dir,
+                ),
+                data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False),
+            )
+            model.config.use_cache = False
+            trainer.train()
+
+            model.cpu().save_pretrained(tmp_dir)
+
+            self.assertTrue("adapter_config.json" in os.listdir(tmp_dir))
+            self.assertTrue("adapter_model.bin" in os.listdir(tmp_dir))
+
+            # assert loss is not None
+            self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
+
     @pytest.mark.multi_gpu_tests
     @require_torch_multi_gpu
     def test_causal_lm_training_mutli_gpu(self):