In [None]:
import locale
def getpreferredencoding(do_setlocale = True):
    return "UTF-8"
locale.getpreferredencoding = getpreferredencoding

!pip -q install datasets
!pip -q install transformers
!pip -q install peft
!pip -q install -U bitsandbytes

In [1]:
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
)

import torch
import os

from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
def get_model_size(model):
    torch.save(model.state_dict(), "temp_model.pth")
    size_mb = os.path.getsize("temp_model.pth") / (1024 ** 3)
    os.remove("temp_model.pth")
    return size_mb

def get_num_trainable_parameters(model):
    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return num_params

def get_lora_config(target_modules):
    return LoraConfig(
        r=8,
        target_modules=target_modules,
        lora_alpha=32,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
    )

def compute_model_sizes(model_name, quantization_enabled=False, lora_config=None):
    model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)

    if lora_config:
        params_before_lora = get_num_trainable_parameters(model)
        print(f"Num trainable before Lora: {params_before_lora}")

        lora_model = prepare_model_for_kbit_training(model)
        lora_model = get_peft_model(lora_model, lora_config)
        params_after_lora = get_num_trainable_parameters(lora_model)
        print(f"Num trainable after Lora: {params_after_lora}")
        print(f"% of all params: {(params_after_lora / params_before_lora):.6f}")

    if quantization_enabled:
        quant_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.bfloat16,
        )
        quant_model = AutoModelForCausalLM.from_pretrained(
            model_name, quantization_config=quant_config, device_map={"": 0}, trust_remote_code=True
        )
        size_before_quantization = get_model_size(model)
        size_after_quantization = get_model_size(quant_model)
        print(f"Size before quantization: {size_before_quantization:.4f} GB")
        print(f"Size after quantization: {size_after_quantization:.4f} GB")
        return

    # If quantization is not enabled
    size = get_model_size(model)
    print(f"Model size without quantization: {size:.4f} GB")

    return

In [34]:
# Results for CodeParrot Small
model_name = "codeparrot/codeparrot-small"
target_modules=["c_attn", "c_proj", "c_fc"]
lora_config = get_lora_config(target_modules)

compute_model_sizes(model_name, quantization_enabled=True, lora_config=lora_config)

Num trainable before Lora: 111008256
Num trainable after Lora: 1179648
% of all params: 0.010627
Size before quantization: 0.4180 GB
Size after quantization: 0.0896 GB


In [33]:
# Results for CodeParrot Large
model_name = "codeparrot/codeparrot"
target_modules=["c_attn", "c_proj", "c_fc"]
lora_config = get_lora_config(target_modules)

compute_model_sizes(model_name, quantization_enabled=True, lora_config=lora_config)

Num trainable before Lora: 1529628800
Num trainable after Lora: 9830400
% of all params: 0.006427
Size before quantization: 5.7354 GB
Size after quantization: 0.8117 GB


In [3]:
# Results for CodeGen
model_name = "Salesforce/codegen-2B-mono"
target_modules=["qkv_proj", "out_proj", "fc_in", "fc_out"]
lora_config = get_lora_config(target_modules)

compute_model_sizes(model_name, quantization_enabled=True, lora_config=lora_config)

Num trainable before Lora: 2779356160
Num trainable after Lora: 10485760
% of all params: 0.003773
Size before quantization: 10.3932 GB
Size after quantization: 1.6990 GB


In [4]:
# Results for DeciCoder
model_name = "Deci/DeciCoder-1b"
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
lora_config = get_lora_config(target_modules)

compute_model_sizes(model_name, quantization_enabled=True, lora_config=lora_config)

Num trainable before Lora: 1113671680
Num trainable after Lora: 5857280
% of all params: 0.005259
Size before quantization: 4.1708 GB
Size after quantization: 0.8139 GB
