In [1]:
from datasets import Dataset, DatasetDict, load_dataset
import ast
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from unsloth import FastLanguageModel

In [3]:

PROMPT_TEMPLATE = "[INST] Extract span of text from the customer review associated with the topic - {}. Customer Review : '{}'[/INST] {}{}"
csv_path = "./arcelik_llm_training_set.csv"
base_model_id = "unsloth/mistral-7b-instruct-v0.2-bnb-4bit"
max_seq_length = 130
load_in_4bit =True

In [4]:
def get_model_and_tokenizer(model_id: str, max_length, load_in_4bit,):
    model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_id, # Choose ANY! eg teknium/OpenHermes-2.5-Mistral-7B
    max_seq_length = max_seq_length,
    dtype = None,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
    )
    return model, tokenizer

In [5]:

model, tokenizer = get_model_and_tokenizer(base_model_id,
                                           max_seq_length,
                                           load_in_4bit)

config.json: 100%|██████████| 1.07k/1.07k [00:00<00:00, 3.33MB/s]


==((====))==  Unsloth: Fast Mistral patching release 2024.2
   \\   /|    GPU: NVIDIA RTX A4000. Max memory: 15.731 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.2.0+cu121. CUDA = 8.6. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. Xformers = 0.0.24. FA = False.
 "-____-"     Apache 2 free license: http://github.com/unslothai/unsloth


model.safetensors: 100%|██████████| 4.13G/4.13G [00:23<00:00, 177MB/s] 
generation_config.json: 100%|██████████| 116/116 [00:00<00:00, 349kB/s]
tokenizer_config.json: 100%|██████████| 1.46k/1.46k [00:00<00:00, 4.41MB/s]
tokenizer.model: 100%|██████████| 493k/493k [00:00<00:00, 2.31MB/s]
tokenizer.json: 100%|██████████| 1.80M/1.80M [00:00<00:00, 16.9MB/s]
special_tokens_map.json: 100%|██████████| 438/438 [00:00<00:00, 1.36MB/s]


In [8]:

EOS_TOKEN = tokenizer.eos_token

def format_row(row):
    indices = ast.literal_eval(row['indices'])
    span = row['sentence'][indices[-1]:indices[1]]
    try:
        assert span == row['span']
    except Exception:
        print(row['sentence'])
        print(span)
        print(row['span'])
        print(indices)
        print("next")
    formatted_string = PROMPT_TEMPLATE.format(row['topic'], row['sentence'], span, EOS_TOKEN)
    # result = tokenizer(formatted_string,truncation=True,
        # max_length=max_length,
        # padding="max_length",)
    # result["input_ids"].append(tokenizer.eos_token_id)
    # new_record ["labels"] = result["input_ids"].copy()
    return {'text' : formatted_string}

def get_data(csv_path, valid_set_ratio = 0.15):
    data = load_dataset('csv', data_files="arcelik_llm_training_set.csv")
    dataset = data.map(format_row, remove_columns=data.column_names['train'])
    # Split the dataset into training and evaluation sets
    train_test_split = dataset['train'].train_test_split(test_size=0.15)
    dataset = DatasetDict({
        'train': train_test_split['train'],
        'eval': train_test_split['test']
    })
    return dataset



In [9]:

train_dataset = get_data(csv_path)

In [10]:
train_dataset

DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 22825
    })
    eval: Dataset({
        features: ['text'],
        num_rows: 4029
    })
})

In [11]:
train_dataset['train'][0]

{'text': "[INST] Extract span of text from the customer review associated with the topic - Clean. Customer Review : 'Wash well, programs are short and efficient and the noise is decent and hardly vibrates at all.'[/INST] </s>"}

In [13]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    use_gradient_checkpointing = True,
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2024.2 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [14]:
from trl import SFTTrainer
from transformers import TrainingArguments

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset['train'],
    eval_dataset = train_dataset['eval'],
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        learning_rate = 2e-4,
        num_train_epochs=2,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 500,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [15]:
trainer_stats = trainer.train()

Step,Training Loss
500,1.1192
1000,0.9223
1500,0.8385
2000,0.7952
2500,0.76
3000,0.6635
3500,0.5142
4000,0.5061
4500,0.4886
5000,0.4679


In [16]:
#@title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

NameError: name 'start_gpu_memory' is not defined

In [17]:
def format_prompt(topic, sentence):
    return PROMPT_TEMPLATE.format(topic, sentence, "sdf","dsdf")

In [24]:
train_dataset['train'][34]

{'text': "[INST] Extract span of text from the customer review associated with the topic - Weight. Customer Review : 'Good lightweight turkish coffee machine'[/INST] </s>"}

In [None]:
format_prompt(train_dataset['eval']O)

In [None]:
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Continue the fibonnaci sequence.", # instruction
        "1, 1, 2, 3, 5, 8", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
tokenizer.batch_decode(outputs)