In [4]:
LoraConfig = {
  "base_model_name_or_path": "facebook/opt-350m", # base model to apply LoRA to
  "bias": "none",
  "fan_in_fan_out": False,
  "inference_mode": True,
  "init_lora_weights": True,
  "layers_pattern": None,
  "layers_to_transform": None,
  "lora_alpha": 32,
  "lora_dropout": 0.05,
  "modules_to_save": None,
  "peft_type": "LORA", # PEFT method type
  "r": 16,
  "revision": None,
  "target_modules": [
    "q_proj", #  #model modules to apply LoRA to (query and value projection layers)
    "v_proj"
  ],
  "task_type": "CAUSAL_LM"
}

In [6]:
promptEncoderConfig = {
  "base_model_name_or_path": "roberta-large", # base model to apply p-tuning to
  "encoder_dropout": 0.0,
  "encoder_hidden_size": 128,
  "encoder_num_layers": 2,
  "encoder_reparameterization_type": "MLP",
  "inference_mode": True,
  "num_attention_heads": 16,
  "num_layers": 24,
  "num_transformer_submodules": 1,
  "num_virtual_tokens": 20,
  "peft_type": "P_TUNING", # PEFT method type
  "task_type": "SEQ_CLS", # type of task to train model on
  "token_dim": 1024
}

In [7]:
from peft import (
    PromptEmbedding,
    PromptEncoder,
    PromptEncoderConfig,
    PromptEncoderReparameterizationType,
    PromptLearningConfig,
    PromptTuningConfig,
    TaskType
)

In [10]:
p_tuning_config = PromptEncoderConfig(
    base_model_name_or_path= "facebook/opt-350m",
    task_type = TaskType.SEQ_CLS,
    num_virtual_tokens = 20,
    num_transformer_submodules = 1,
    num_attention_heads = 16,
    num_layers = 24,
    encoder_reparameterization_type = 'MLP',
    encoder_hidden_size = 128,
)

In [11]:
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m")

In [13]:
from peft import get_peft_model

lora_model = get_peft_model(model, p_tuning_config) # this way of model is loaded using config

In [14]:
lora_model.print_trainable_parameters()

trainable params: 300,288 || all params: 331,496,704 || trainable%: 0.09058551604784583


In [15]:
lora_model.save_pretrained("/home/kamal/training_files/opt-350m-lora")

In [16]:
from peft import PeftModel
lora_model_reload = PeftModel.from_pretrained(model, 
                                              "/home/kamal/training_files/opt-350m-lora/")

In [21]:
model.to('cuda')

OPTForCausalLM(
  (model): OPTModel(
    (decoder): OPTDecoder(
      (embed_tokens): Embedding(50272, 512, padding_idx=1)
      (embed_positions): OPTLearnedPositionalEmbedding(2050, 1024)
      (project_out): Linear(in_features=1024, out_features=512, bias=False)
      (project_in): Linear(in_features=512, out_features=1024, bias=False)
      (layers): ModuleList(
        (0-23): 24 x OPTDecoderLayer(
          (self_attn): OPTAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (activation_fn): ReLU()
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (fc2): Linear(in_features=409

In [20]:
lora_model_reload.to('cuda')

PeftModelForSequenceClassification(
  (base_model): OPTForCausalLM(
    (model): OPTModel(
      (decoder): OPTDecoder(
        (embed_tokens): Embedding(50272, 512, padding_idx=1)
        (embed_positions): OPTLearnedPositionalEmbedding(2050, 1024)
        (project_out): Linear(in_features=1024, out_features=512, bias=False)
        (project_in): Linear(in_features=512, out_features=1024, bias=False)
        (layers): ModuleList(
          (0-23): 24 x OPTDecoderLayer(
            (self_attn): OPTAttention(
              (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
            )
            (activation_fn): ReLU()
            (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
            (fc1): Linear(i

In [23]:
from torch import nn

class MLP(nn.Module):
    def __init__(self, num_hidden_units=2000):
        super().__init__()
        self.seq = nn.Sequential(
            nn.Linear(20, num_hidden_units),
            nn.ReLU(),
            nn.Linear(num_hidden_units, num_hidden_units),
            nn.ReLU(),
            nn.Linear(num_hidden_units, 2),
            nn.LogSoftmax(dim=-1)
        )
    def forward(self, X):
        return self.seq(X)

In [24]:
print([(n, type(m)) for n, m in MLP().named_modules()])

[('', <class '__main__.MLP'>), ('seq', <class 'torch.nn.modules.container.Sequential'>), ('seq.0', <class 'torch.nn.modules.linear.Linear'>), ('seq.1', <class 'torch.nn.modules.activation.ReLU'>), ('seq.2', <class 'torch.nn.modules.linear.Linear'>), ('seq.3', <class 'torch.nn.modules.activation.ReLU'>), ('seq.4', <class 'torch.nn.modules.linear.Linear'>), ('seq.5', <class 'torch.nn.modules.activation.LogSoftmax'>)]


In [25]:
from peft import LoraConfig

config = LoraConfig(
    target_modules=['seq.0', 'seq.2'],
    modules_to_save=['seq.4']
)

In [26]:
from peft import get_peft_model

model = MLP()
peft_model = get_peft_model(model, config)
peft_model.print_trainable_parameters()

trainable params: 56,164 || all params: 4,100,164 || trainable%: 1.369798866581922


In [27]:
peft_model.to('cuda')

PeftModel(
  (base_model): LoraModel(
    (model): MLP(
      (seq): Sequential(
        (0): Linear(
          in_features=20, out_features=2000, bias=True
          (lora_dropout): ModuleDict(
            (default): Identity()
          )
          (lora_A): ModuleDict(
            (default): Linear(in_features=20, out_features=8, bias=False)
          )
          (lora_B): ModuleDict(
            (default): Linear(in_features=8, out_features=2000, bias=False)
          )
          (lora_embedding_A): ParameterDict()
          (lora_embedding_B): ParameterDict()
        )
        (1): ReLU()
        (2): Linear(
          in_features=2000, out_features=2000, bias=True
          (lora_dropout): ModuleDict(
            (default): Identity()
          )
          (lora_A): ModuleDict(
            (default): Linear(in_features=2000, out_features=8, bias=False)
          )
          (lora_B): ModuleDict(
            (default): Linear(in_features=8, out_features=2000, bias=False)
         