In [None]:
!nvidia-smi

# Children will code lora
1. Implementasi dulu
2. Perbandingan finetune vs LoRA (Number of trainable params (memory) + train time) (Gauge skill mereka) Time limit, keliling
3. See the effect plug and playnya. 2 LoRA plug and play.. LoRA MBZUAI LoRA beda task..

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import math
from typing import Optional, List
from transformers import AutoModelForCausalLM, AutoTokenizer
import pandas as pd
import datasets

In [None]:
device = "cuda" # the device to load the model onto

model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen1.5-0.5B-Chat",
)
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B-Chat")
 

In [None]:
class LoRALayer():
    def __init__(
        self, 
        r: int, 
        lora_alpha: int, 
        lora_dropout: float,
        merge_weights: bool,
    ):
        """
        LoRA layer for the Qwen model.

        Args:
            r: The number of rnaks to consider for the LoRA .
            lora_alpha: The alpha value for the LoRA layer.
            lora_dropout: The dropout rate for the LoRA layer.
            merge_weights: Whether to merge the weights of the LoRA layer.
        """
        self.r = r
        self.lora_alpha = lora_alpha
        # Optional dropout
        if lora_dropout > 0.:
            self.lora_dropout = nn.Dropout(p=lora_dropout)
        else:
            self.lora_dropout = lambda x: x
        # Mark the weight as unmerged
        self.merged = False  # Whether the weights have been merged yet
        self.merge_weights = merge_weights

In [None]:
class LinearLORA(nn.Module, LoRALayer):
    """
    LORA for nn.Embedding
    """

    def __init__(
        self,
        in_features: int,
        out_features: int,
        r: int = 0,
        lora_alpha: int = 1,
        lora_dropout: float = 0.,
        fan_in_fan_out: bool = False,
        merge_weights: bool = True,
        **kwargs
    ):
        """
        Args:
            num_embeddings: Number of embeddings.
            embedding_dim: The size of each embedding vector.
            r: The number of ranks to consider for the LoRA.
            lora_alpha: The alpha value for the LoRA layer.
            merge_weights: Whether to merge the weights of the LoRA layer.
            kwargs: Other parameters for nn.Embedding.
        """
        nn.Module.__init__(self)
        LoRALayer.__init__(self, r=r, lora_alpha=lora_alpha, lora_dropout=lora_dropout,
                           merge_weights=merge_weights)
        self.linear = nn.Linear(in_features, out_features, **kwargs)        
        # Actual trainable parameters
        if r > 0:
            self.lora_A = nn.Parameter(self.linear.weight.new_zeros((in_features, r)))
            self.lora_B = nn.Parameter(self.linear.weight.new_zeros((r, out_features)))
            self.scaling = self.lora_alpha / self.r
            # Freezing the pre-trained weight matrix (Embedding)
            self.linear.weight.requires_grad = False
        self.reset_parameters()


    def assign_object(self, obj: nn.Linear):
        """
        Assign the object to the current object.
        Useful to copy the parameters of an existing object.

        Args:
            obj: The object to assign.
        """
        self.linear = obj

    def reset_parameters(self):
        self.linear.reset_parameters()
        if hasattr(self, 'lora_A'):
            # initialize A the same way as the default for nn.Linear and B to zero
            nn.init.kaiming_uniform(self.lora_A, a=math.sqrt(5))
            nn.init.zeros_(self.lora_B)

    def train(self, mode: bool = True):
        """
        From the original paper........
        """
        self.linear.train(mode)
        if mode:
            if self.merge_weights and self.merged:
                # Make sure that the weights are not merged
                if self.r > 0:
                    self.linear.weight.data -= (self.lora_A @ self.lora_B) * self.scaling
                self.merged = False
        else:
            if self.merge_weights and not self.merged:
                # Merge the weights and mark it
                if self.r > 0:
                    self.linear.weight.data += (self.lora_A @ self.lora_B) * self.scaling
                self.merged = True
    
    def merge_weights(self):
        """
        Merge the weights of the LoRA layer.
        """
        if self.r > 0:
            self.linear.weight.data += (self.lora_A @ self.lora_B) * self.scaling
            self.merged = True
        else:
            raise ValueError("The rank parameter is not set.")
    
    def unmerge_weights(self):
        """
        Unmerge the weights of the LoRA layer.
        """
        if self.r > 0:
            self.linear.weight.data -= (self.lora_A @ self.lora_B) * self.scaling
            self.merged = False
        else:
            raise ValueError("The rank parameter is not set.")

    def forward(self, x: torch.Tensor):
        # Code this func
        if self.r > 0 and not self.merged:
            result = self.linear(x)
            result += (self.lora_dropout(x) @ self.lora_A @ self.lora_B) * self.scaling
            return result
        else:
            return self.linear.forward(x)
            

In [None]:
class EmbeddingLORA(nn.Module, LoRALayer):
    """
    LORA for nn.Embedding
    """

    def __init__(
        self,
        num_embeddings: int,
        embedding_dim: int,
        r: int = 0,
        lora_alpha: int = 1,
        merge_weights: bool = True,
        **kwargs
    ):
        """
        Args:
            num_embeddings: Number of embeddings.
            embedding_dim: The size of each embedding vector.
            r: The number of ranks to consider for the LoRA.
            lora_alpha: The alpha value for the LoRA layer.
            merge_weights: Whether to merge the weights of the LoRA layer.
            kwargs: Other parameters for nn.Embedding.
        """
        nn.Module.__init__(self)
        LoRALayer.__init__(self, r=r, lora_alpha=lora_alpha, lora_dropout=0,
                           merge_weights=merge_weights)
        self.embedding = nn.Embedding(num_embeddings, embedding_dim, **kwargs)        
        # Actual trainable parameters
        if r > 0:
            self.lora_A = nn.Parameter(self.embedding.weight.new_zeros((num_embeddings, r)))
            self.lora_B = nn.Parameter(self.embedding.weight.new_zeros((r, embedding_dim)))
            self.scaling = self.lora_alpha / self.r
            # Freezing the pre-trained weight matrix (Embedding)
            self.embedding.weight.requires_grad = False
        self.reset_parameters()


    def assign_object(self, obj: nn.Embedding):
        """
        Assign the object to the current object.
        Useful to copy the parameters of an existing object.

        Args:
            obj: The object to assign.
        """
        self.embedding = obj

    def reset_parameters(self):
        self.embedding.reset_parameters()
        if hasattr(self, 'lora_A'):
            # initialize A the same way as the default for nn.Linear and B to zero
            nn.init.zeros_(self.lora_A)
            nn.init.normal_(self.lora_B)

    def train(self, mode: bool = True):
        """
        From the original paper........
        """
        self.embedding.train(mode)
        if mode:
            if self.merge_weights and self.merged:
                # Make sure that the weights are not merged
                if self.r > 0:
                    self.embedding.weight.data -= (self.lora_A @ self.lora_B) * self.scaling
                self.merged = False
        else:
            if self.merge_weights and not self.merged:
                # Merge the weights and mark it
                if self.r > 0:
                    self.embedding.weight.data += (self.lora_A @ self.lora_B) * self.scaling
                self.merged = True
    
    def merge_weights(self):
        """
        Merge the weights of the LoRA layer.
        """
        if self.r > 0:
            self.embedding.weight.data += (self.lora_A @ self.lora_B) * self.scaling
            self.merged = True
        else:
            raise ValueError("The rank parameter is not set.")
    
    def unmerge_weights(self):
        """
        Unmerge the weights of the LoRA layer.
        """
        if self.r > 0:
            self.embedding.data -= (self.lora_A @ self.lora_B) * self.scaling
            self.merged = False
        else:
            raise ValueError("The rank parameter is not set.")

    def forward(self, x: torch.Tensor):
        # Let them code this func
        if self.r > 0 and not self.merged:
            result = self.embedding.forward(x)
            after_A = F.embedding(
                x, self.lora_A, self.embedding.padding_idx, self.embedding.max_norm,
                self.embedding.norm_type,  self.embedding.scale_grad_by_freq,  self.embedding.sparse
            )
            result += (after_A @ self.lora_B) * self.scaling
            return result
        else:
            return self.embedding.forward(x)
            

In [None]:
model

In [None]:
embed = EmbeddingLORA(50, 100, r=2)
test_input  = torch.randint(0, 50, (4, 2))
embed.train()
embed(test_input).shape

In [None]:
from tqdm import tqdm

In [None]:
def switch_merged(
    model,
):
    """
    Recursively replaces all Embedding and Linear layers in a PyTorch model with a LORA layer.

    Args:
        model: The PyTorch model to modify.
    """
    for name, module in model.named_children():
        if isinstance(module, EmbeddingLORA) or isinstance(module, LinearLORA):
            module.merged=not module.merged
            print(module)
            print(module.merged, end=" ")


In [None]:
def apply_lora(
    model,
    r: int,
    lora_alpha: int,
    merge_weights: bool = False,
    lora_dropout: float = 0.0,
    replace_embedding: bool = True,
    replace_linear: bool = True
):
    """
    Recursively replaces all Embedding and Linear layers in a PyTorch model with a LORA layer.

    Args:
        model: The PyTorch model to modify.
    """
    for name, module in model.named_children():
        if isinstance(module, EmbeddingLORA) or isinstance(module, LinearLORA):
            continue
        if isinstance(module, nn.Embedding) and replace_embedding:
            # Create a new instance of EmbeddingLORA with the same configurations
            new_module = EmbeddingLORA(
                num_embeddings=module.num_embeddings,
                embedding_dim=module.embedding_dim,
                padding_idx=module.padding_idx,
                max_norm=module.max_norm,
                norm_type=module.norm_type,
                scale_grad_by_freq=module.scale_grad_by_freq,
                sparse=module.sparse,
                r=r,
                lora_alpha=lora_alpha,
                merge_weights=merge_weights,
            )
            # Copy the weights from the original embedding to the new LORA embedding
            new_module.assign_object(module)
            # Replace the module in the model with the new one
            setattr(model, name, new_module)
        elif isinstance(module, nn.Linear) and replace_linear:
            new_module = LinearLORA(
                in_features = module.in_features,
                out_features= module.out_features,
                r = r,
                lora_alpha = lora_alpha,
                lora_dropout = lora_dropout,
                fan_in_fan_out = False,
                merge_weights = merge_weights,
            )
            new_module.assign_object(module)
            setattr(model, name, new_module)
        else:
            # Recursively apply the function to submodules
            apply_lora(module, r, lora_alpha, merge_weights, lora_dropout=lora_dropout, replace_embedding=replace_embedding, replace_linear=replace_linear)

In [None]:
apply_lora(model, r=2, lora_alpha=1)

In [None]:
model

In [None]:
df_mbzuai = pd.read_csv('mbzuai.csv')

In [None]:
def convert_to_qwen_format(data, tokenizer):
    messages = [
        {"role": "user", "content": data['user']},
        {"role": "assistant", "content": data['assistant']}
    ]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    return text

In [None]:
df_mbzuai['text'] = df_mbzuai.apply(lambda x: convert_to_qwen_format(x, tokenizer), axis=1)

In [None]:
df_mbzuai

In [None]:
train_data = datasets.Dataset.from_pandas(df_mbzuai)

In [None]:
def tokenize(x, tokenizer):
    return tokenizer(x['text'])

In [None]:
df_mbzuai.columns

In [None]:
train_dataset = train_data.map(lambda x: tokenize(x, tokenizer), batched=True, batch_size=16, remove_columns=df_mbzuai.columns.to_list())

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

for name, param in model.named_parameters():
    if "lora" not in name:
        param.requires_grad = False

In [None]:
count_parameters(model)

In [None]:
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer, mlm=False,
)

from transformers import Trainer, TrainingArguments


training_args = TrainingArguments(
    output_dir="./mbzuai",
    overwrite_output_dir=True,
    num_train_epochs= 50,
    per_device_train_batch_size=16,
    save_total_limit=1,
    learning_rate=1e-4
)

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
)

trainer.train()


In [None]:
def generate_answer(model, tokenizer, prompt):
    
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(device)

    generated_ids = model.generate(
        model_inputs.input_ids,
        max_new_tokens=512
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]

    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return response


In [None]:
generate_answer(model, tokenizer, "Who is Prabowo Subianto?")

In [None]:
generate_answer(model, tokenizer, "Which one is the best? MBZUAI or Universitas Indonesia? ")

## Set off

In [None]:
def switch_merged(
    model,
):
    """
    Recursively replaces all Embedding and Linear layers in a PyTorch model with a LORA layer.

    Args:
        model: The PyTorch model to modify.
    """
    for name, module in model.named_children():
        switch_merged(module)
        if isinstance(module, EmbeddingLORA) or isinstance(module, LinearLORA):
            module.merged=not module.merged


In [None]:
print("Without LORA")
print("-------")
switch_merged(model)
print(generate_answer(model, tokenizer, "How to register to MBZUAI?"))
print(generate_answer(model, tokenizer, "Write a poem related to MBZUAI!"))
print("-------")

print("With LORA")
print("-------")

switch_merged(model)
print(generate_answer(model, tokenizer, "How to register to MBZUAI?"))
print(generate_answer(model, tokenizer, "Write a poem related to MBZUAI!"))


In [None]:
print(generate_answer(model, tokenizer, "Where is MBZUAI located?"))


## TODO

1. Tidy the notebook
2. Add save LORA weight function
3. Add attach LORA weight function
4. Add remove LORA weight function