In [1]:
import torch.nn as nn
from datasets import Dataset, DatasetDict

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model, PeftModel
from datasets import load_dataset
from trl import SFTTrainer
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(device)

cuda


In [6]:
compute_dtype = getattr(torch, "float16")
bnb_config = BitsAndBytesConfig(load_in_4bit=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=compute_dtype,
            bnb_4bit_use_double_quant=True,
)

model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-2-7b-hf",
    quantization_config=bnb_config,
    cache_dir = "/Llama2-hf",
    device_map="auto",
    token="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"
)

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf", cache_dir="/Llama2-hf")
tokenizer.pad_token = tokenizer.unk_token
tokenizer.padding_side = "left"

Downloading shards:   0%|          | 0/2 [00:49<?, ?it/s]


KeyboardInterrupt: 

In [None]:
train_dataset = Dataset.from_csv('dataset/train.csv')
validation_dataset = Dataset.from_csv('dataset/validation.csv')
test_dataset = Dataset.from_csv('dataset/test.csv')

In [None]:
dataset = DatasetDict({
    'train': train_dataset,
    'validation': validation_dataset,
    'test': test_dataset
})

In [None]:
peft_config = LoraConfig(
    lora_alpha=16, 
    lora_dropout=0.05,
    r=16,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules= ["down_proj","up_proj","gate_proj"]
)

In [None]:
training_arguments = TrainingArguments(
    output_dir="working/results/",
    evaluation_strategy="steps",
    optim="paged_adamw_8bit",
    save_steps=100,
    log_level="debug",
    logging_steps=100,
    learning_rate=1e-4,
    eval_steps=100,
    fp16=True,
    do_eval=True,
    per_device_train_batch_size=48,
    per_device_eval_batch_size=48,
    gradient_accumulation_steps=2,
    warmup_steps=50,
    max_steps=500,
    lr_scheduler_type="linear"
)

In [None]:
!nvidia-smi

In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset['train'],
    eval_dataset=dataset['validation'],
    peft_config=peft_config,
    dataset_text_field="translations",
    max_seq_length=48,
    tokenizer=tokenizer,
    args=training_arguments
)

trainer.train()

In [None]:
base_model = "meta-llama/Llama-2-7b-hf"
compute_dtype = getattr(torch, "float16")
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=True,
)
model = AutoModelForCausalLM.from_pretrained(
    base_model, device_map={"": 0}, quantization_config=bnb_config
)
tokenizer = AutoTokenizer.from_pretrained(base_model, use_fast=True)

In [None]:
model = PeftModel.from_pretrained(model, "working/results/checkpoint-500/")