In [34]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline
)

from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training
from trl import SFTTrainer

In [36]:
base_model = "NousResearch/Llama-2-7b-hf"
new_model = "llama-2-7b-miniplatypus"

dataset = load_dataset("mlabonne/mini-platypus", split="train")
tokenizer = AutoTokenizer.from_pretrained(base_model, use_fast=True)
tokenizer.pad_token = tokenizer.unk_token
tokenizer.padding_side = "right"


In [1]:
import torch

# Example tensor x with shape (B, T, C)
x = torch.tensor([[[1.0, 2.0, 3.0, 4.0], 
                   [5.0, 6.0, 7.0, 8.0], 
                   [9.0, 10.0, 11.0, 12.0]],

                  [[-1.0, -2.0, -3.0, -4.0], 
                   [-5.0, -6.0, -7.0, -8.0], 
                   [-9.0, -10.0, -11.0, -12.0]]])

# Parameters
maxq = 15
sym = True

# Finding xmin and xmax
xmin, xmax = x.min(), x.max()

# Symmetric quantization adjustments
if sym:
    xmax = max(abs(xmin), xmax)
    xmin = -xmax

# Calculate scale and zero-point
scale = (xmax - xmin) / maxq
zero = (maxq + 1) / 2 if sym else round(-xmin / scale)

print(f"Scale: {scale}, Zero-Point: {zero}")


Scale: 1.600000023841858, Zero-Point: 8.0


In [46]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

peft_config = LoraConfig(
    lora_alpha=32,
    lora_dropout=.05,
    r=16,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["up_proj", "down_proj", 'gate_proj', 'k_proj', 'v_proj', 'q_proj', 'o_proj'],
)

import torch

if not torch.backends.mps.is_available():
    if not torch.backends.mps.is_built():
        print("MPS not available because the current PyTorch install was not "
              "built with MPS enabled.")
    else:
        print("MPS not available because the current MacOS version is not 12.3+ "
              "and/or you do not have an MPS-enabled device on this machine.")
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
else:
    device = 'mps'
    print("MPS is available and will be used.")

model = AutoModelForCausalLM.from_pretrained(base_model, quantization_config=bnb_config, load_in_8bit=False, device_map=device)

model = prepare_model_for_kbit_training(
    model
)

MPS is available and will be used.


ImportError: Using `load_in_8bit=True` requires Accelerate: `pip install accelerate` and the latest version of bitsandbytes `pip install -i https://test.pypi.org/simple/ bitsandbytes` or pip install bitsandbytes` 