In [1]:
!pip install -q -U transformers peft accelerate datasets trl bitsandbytes

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf 24.4.1 requires cubinlinker, which is not installed.
cudf 24.4.1 requires cupy-cuda11x>=12.0.0, which is not installed.
cudf 24.4.1 requires ptxcompiler, which is not installed.
cuml 24.4.0 requires cupy-cuda11x>=12.0.0, which is not installed.
dask-cudf 24.4.1 requires cupy-cuda11x>=12.0.0, which is not installed.
apache-beam 2.46.0 requires dill<0.3.2,>=0.3.1.1, but you have dill 0.3.8 which is incompatible.
apache-beam 2.46.0 requires numpy<1.25.0,>=1.14.3, but you have numpy 1.26.4 which is incompatible.
apache-beam 2.46.0 requires pyarrow<10.0.0,>=3.0.0, but you have pyarrow 16.1.0 which is incompatible.
beatrix-jupyterlab 2023.128.151533 requires jupyterlab~=3.6.0, but you have jupyterlab 4.2.1 which is incompatible.
cudf 24.4.1 requires cuda-python<12.0a0,>=11.7.1, but you have cuda-python 12.5.0 

In [2]:
import torch
from datasets import load_dataset
from peft import LoraConfig, prepare_model_for_kbit_training, PeftModel
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    BitsAndBytesConfig
)
from trl import SFTTrainer

2024-07-01 11:43:07.652280: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-01 11:43:07.652390: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-01 11:43:07.771255: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
# Load Platypus dataset
data_name = "jester20/data0"
dataset = load_dataset(data_name, split="train")
dataset

Downloading readme:   0%|          | 0.00/316 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/2.25M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1000 [00:00<?, ? examples/s]

Dataset({
    features: ['instruction', 'output'],
    num_rows: 1000
})

In [4]:
# Set model names
model_name = "NousResearch/Llama-2-7b-chat-hf"
refined_model = "Megnis/saiga_llama3_8b-qdora-4bit"


tokenizer_config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

In [6]:
if torch.cuda.is_bf16_supported():
  !pip install flash_attn
  compute_dtype = torch.bfloat16
  attn_implementation = 'flash_attention_2'
else:
  compute_dtype = torch.float16
  attn_implementation = 'sdpa'


# Load Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, add_eos_token=True, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'left' #Necessary for FlashAttention compatibility
# Set Quantization config
bnb_config = BitsAndBytesConfig(
        load_in_8bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=compute_dtype,
        bnb_4bit_use_double_quant=False,
)
# Load model
model = AutoModelForCausalLM.from_pretrained(
          model_name, quantization_config=bnb_config, device_map={"": 0}, torch_dtype=compute_dtype, use_cache=False, attn_implementation=attn_implementation
)
model.config.pretraining_tp = 1

model = prepare_model_for_kbit_training(model)
model.gradient_checkpointing_enable({'use_reentrant':True})

config.json:   0%|          | 0.00/583 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

In [7]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("huggingface")

from huggingface_hub import login
login(secret_value_0)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [8]:
# Set Training Config
peft_config = LoraConfig(
        lora_alpha=15,
        lora_dropout=0.1,
        bias="none",
        use_dora=True,
        task_type="CAUSAL_LM",
)


training_arguments = TrainingArguments(
        output_dir="./qdora2/",
        #evaluation_strategy="steps",
        do_eval=True,
        optim="paged_adamw_32bit",
        per_device_train_batch_size=1,
        per_device_eval_batch_size=1,
        #log_level="debug",
        logging_steps=25,
        learning_rate=2e-4,
        #eval_steps=25,
        num_train_epochs=1,
        fp16= False,
        bf16= False,
        max_grad_norm=0.3,
        max_steps=-1,
        save_strategy='epoch',
        warmup_ratio=0.03,
        warmup_steps=10,
        lr_scheduler_type="linear",
        #push_to_hub = True
)
# Load Triner
trainer = SFTTrainer(
        model=model,
        train_dataset=dataset,
        eval_dataset=dataset,
        dataset_text_field="instruction",
        #max_seq_length=4096,
        peft_config=peft_config,
        tokenizer=tokenizer,
        args=training_arguments,
)
# Implement training loop
trainer.train()


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


NotImplementedError: igemmlt not available (probably built with NO_CUBLASLT)

In [None]:
refined_model = "Megnis/qdora2"

In [None]:
# Save model
trainer.model.save_pretrained(refined_model)

In [None]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("huggingface")

from huggingface_hub import login
login(secret_value_0)

In [None]:
# Push model to hugginface hub
trainer.push_to_hub(refined_model)