<a href="https://colab.research.google.com/github/githubpradeep/notebooks/blob/main/20_LLM_Compression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install rouge
!pip install -q bitsandbytes datasets accelerate loralib
!pip install -q git+https://github.com/huggingface/transformers.git@main git+https://github.com/huggingface/peft.git
# install additional dependencies needed for training
!pip install rouge-score tensorboard py7zr
!pip install datasets

In [None]:
!pip install einops

Collecting einops
  Downloading einops-0.6.1-py3-none-any.whl (42 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: einops
Successfully installed einops-0.6.1


In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

In [2]:
torch.set_default_device('cuda')


In [3]:
torch.set_default_device('cuda')
model = AutoModelForCausalLM.from_pretrained("microsoft/phi-1_5", trust_remote_code=True, torch_dtype="auto")
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5", trust_remote_code=True, torch_dtype="auto")
inputs = tokenizer('''```python
def print_prime(n):
   """
   Print all primes between 1 and n
   """''', return_tensors="pt", return_attention_mask=False)

outputs = model.generate(**inputs, max_length=200)
text = tokenizer.batch_decode(outputs)[0]
print(text)

```python
def print_prime(n):
   """
   Print all primes between 1 and n
   """
   primes = []
   for num in range(2, n+1):
       is_prime = True
       for i in range(2, int(num**0.5)+1):
           if num % i == 0:
               is_prime = False
               break
       if is_prime:
           primes.append(num)
   print(primes)

print_prime(20)
```

## Exercises

1. Write a Python function that takes a list of numbers and returns the sum of all even numbers in the list.

```python
def sum_even(numbers):
   """
   Returns the sum of all even numbers in the list
   """
   return sum(num for num in numbers if


In [4]:
from dataclasses import dataclass

@dataclass
class LowRankConfig:
    rank:int
    target_modules: list[str]

In [None]:
model

In [5]:
#low rank decomposition of SelfAttention Key, Query and Value Matrices
config = LowRankConfig(
    rank= 384,
    target_modules=["Wqkv"]
)

In [6]:
from torch import nn
from dataclasses import dataclass
from torch.nn import functional as F
class LowRankLayer(nn.Module):
    """given a linear layer find low rank decomposition"""
    def __init__(self, rank, full_rank_layer):
        super().__init__()
        self.rank = rank

        U, S, Vh = torch.linalg.svd(full_rank_layer.weight.double())
        S_diag = torch.diag(S)
        self.U = U[:, :self.rank].half()
        self.S = S_diag[:self.rank, :self.rank].half()
        self.Vh = Vh[:self.rank, :].half()

    def forward(self, x):
        aprox_weight_matrix = self.U @ self.S @ self.Vh
        output = F.linear(x, aprox_weight_matrix)
        return output


In [7]:
#find the module that ends target suffix
def get_submodules(model, key):
    parent = model.get_submodule(".".join(key.split(".")[:-1]))
    target_name = key.split(".")[-1]
    target = model.get_submodule(key)
    return parent, target, target_name

# this function replaces a target layer with low rank layer
def recursive_setattr(obj, attr, value):
    attr = attr.split('.', 1)
    if len(attr) == 1:
        setattr(obj, attr[0], value)
    else:
        recursive_setattr(getattr(obj, attr[0]), attr[1], value)


In [8]:
import copy
model_lr = copy.deepcopy(model)


In [9]:
for key, module in model.named_modules():
    target_module_found = any(key.endswith("." + target_key) for target_key in config.target_modules)
    if target_module_found:
        low_rank_layer = LowRankLayer(config.rank, module)
        #replace target layer with low rank layer
        recursive_setattr(model_lr, key, low_rank_layer)

In [10]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [11]:
print_trainable_parameters(model)

trainable params: 1418270720 || all params: 1418270720 || trainable%: 100.0


In [12]:
print_trainable_parameters(model_lr)

trainable params: 1116133376 || all params: 1116133376 || trainable%: 100.0


In [14]:
1-1116133376/1418270720

0.2130322086886205

In [15]:
model.save_pretrained("model", from_pt=True)


In [16]:
model_lr.save_pretrained("model_lr", from_pt=True)


In [17]:
!ls -lh model/pytorch_model.bin

-rw-r--r-- 1 root root 2.7G Sep 15 17:00 model/pytorch_model.bin


In [18]:
!ls -lh model_lr/pytorch_model.bin

-rw-r--r-- 1 root root 2.1G Sep 15 17:01 model_lr/pytorch_model.bin


In [19]:
1-2.1/2.7

0.2222222222222222

In [22]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

torch.set_default_device('cuda')
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5", trust_remote_code=True, torch_dtype="auto")
inputs = tokenizer('''```python
def add(x,y):
   ''', return_tensors="pt", return_attention_mask=False)

outputs = model_lr.generate(**inputs, max_length=200)
text = tokenizer.batch_decode(outputs)[0]
print(text)

```python
def add(x,y):
   return x + y

# Test
print(add(1,2)

# Solution
def add(x,y):
    return x + y

# Test
print(add(1,2)

# Solution
def add(x,y)
    return x + y

# Test
print(add(1,2)

# Solution
def add(x,y)
    return x + y

# Test
print(add(1,2)

# Solution
def add(x,y)
    return x + y

# Test
print(add(1,2)

# Solution
def add(x,y)
    return x + y

# Test
print(add(1,2)

# Solution
def add(x,y)
    return


In [24]:
inputs = tokenizer('''```python
def compare(a, b):
    Compares two strings
   ''', return_tensors="pt", return_attention_mask=False)

outputs = model_lr.generate(**inputs, max_length=200)
text = tokenizer.batch_decode(outputs)[0]
print(text)

```python
def compare(a, b):
    Compares two strings 
   
    def compare(a, b):
        if a == b:
            return True
        else:
        return False

# Test
print(compare('hello', 'hello')

# Output: True

# Exercise 2
def is_palindrome(s):
    if s.count('a') == 0:
        return True
    else:
        return False

# Test
print(is_palindrome('hello')

# Output: True

# Exercise 3
def is_palindrome(s):
    if s.count('a') == 0:
        return True
    else:
        return False

# Test
print(is_palindrome('hello')

# Exercise 3
def is_palindrome(s):
    


In [26]:
inputs = tokenizer('''```python
def sum(array):
    For loop to compute sum of numbers in an array
   ''', return_tensors="pt", return_attention_mask=False)

outputs = model_lr.generate(**inputs, max_length=200)
text = tokenizer.batch_decode(outputs)[0]
print(text)

```python
def sum(array):
    For loop to compute sum of numbers in an array
   
    def sum(array):
        sum = 0
    for i in array:
        sum += i
    return sum

# Test
print(sum([1, 2, 3, 4, 5] )

# Solution
def sum(array):
    sum = 0
    for i in array:
        sum += i
    return sum

# Test
print(sum([1, 2, 3, 4, 5] )

# Solution
def sum(array):
    sum = 0
    for i in array:
        sum += i
    return sum

# Test
print(sum([1, 2, 3, 4, 5] )

# Solution
def sum(array):
    sum = 0
    for i in array:
        sum += i
