In [1]:
import torch
from datasets import load_dataset, Dataset
from peft import LoraConfig, AutoPeftModelForCausalLM
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from trl import SFTTrainer
import os
import json

  from .autonotebook import tqdm as notebook_tqdm


## Data loading

In [2]:
with open('gen-ai-ucu-2024-task-3/zno.train.jsonl', 'r') as json_file:
    json_list = list(json_file)

all_questions = []
for json_str in json_list:
    result = json.loads(json_str)
    result['correct_answers'] = result['correct_answers'][0]
    all_questions.append(result)

train_set, test_set = all_questions[int(len(all_questions)*0.2):], all_questions[:int(len(all_questions)*0.2)]

In [3]:
import pandas as pd
df_data = pd.DataFrame(train_set)

In [4]:
from unsloth import FastLanguageModel
max_seq_length = 4096
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",#"unsloth/Llama-3.2-3B-Instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,                                                                
)

ü¶• Unsloth: Will patch your computer to enable 2x faster free finetuning.
ü¶• Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.1.5: Fast Llama patching. Transformers: 4.48.0.
   \\   /|    GPU: NVIDIA GeForce RTX 3090. Max memory: 23.576 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 8.6. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [5]:
# model = FastLanguageModel.get_peft_model(
#     model,
#     r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
#     target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
#                       "gate_proj", "up_proj", "down_proj",],
#     lora_alpha = 16,
#     lora_dropout = 0, # Supports any, but = 0 is optimized
#     bias = "none",    # Supports any, but = "none" is optimized
#     # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
#     use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
#     random_state = 3407,
#     use_rslora = False,  # We support rank stabilized LoRA
#     loftq_config = None, # And LoftQ
# )

model = FastLanguageModel.get_peft_model(
    model,
    r = 32,  # Increase from 16 to 32 for better capacity
    target_modules = [
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_alpha = 32,  # Increase to match rank
    lora_dropout = 0.05,  # Add small dropout for regularization
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,  # Enable rank stabilized LoRA for better convergence
)

Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.05.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.
Unsloth 2025.1.5 patched 32 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


In [6]:
llama31_prompt='''<|start_header_id|>system<|end_header_id|>
Your input fields are:
1. `question` (str)
2. `options` (list[dict[str, str]])

Your output fields are:
1. `correct_marker` (str)

All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## question ## ]]
{question}

[[ ## options ## ]]
{options}

[[ ## correct_marker ## ]]
{correct_marker}

[[ ## completed ## ]]

In adhering to this structure, your objective is: 
 Solve exam problem.<|eot_id|>

<|start_header_id|>user<|end_header_id|>
[[ ## question ## ]]
–©–æ –±—É–ª–æ —Ö–∞—Ä–∞–∫—Ç–µ—Ä–Ω–∏–º –¥–ª—è —Ä–æ–∑–≤–∏—Ç–∫—É —É–∫—Ä–∞—ó–Ω—Å—å–∫–æ—ó –∫—É–ª—å—Ç—É—Ä–∏ –ø–µ—Ä—à–æ—ó –ø–æ–ª–æ–≤–∏–Ω–∏ –•–Ü–• —Å—Ç.?

[[ ## options ## ]]
[{"marker": "–ê", "text": "–ø–æ—à–∏—Ä–µ–Ω–Ω—è —Å—Ç–∏–ª—é –±–∞—Ä–æ–∫–æ –≤ –∞—Ä—Ö—ñ—Ç–µ–∫—Ç—É—Ä—ñ"}, {"marker": "–ë", "text": "–∫–æ–ø—ñ—é–≤–∞–Ω–Ω—è –∫—Ä–∞—â–∏—Ö –∑—Ä–∞–∑–∫—ñ–≤ –∫–Ω—è–∂–æ—ó –¥–æ–±–∏"}, {"marker": "–í", "text": "—Ñ–æ—Ä–º—É–≤–∞–Ω–Ω—è –Ω–æ–≤–æ—ó —É–∫—Ä–∞—ó–Ω—Å—å–∫–æ—ó –ª—ñ—Ç–µ—Ä–∞—Ç—É—Ä–Ω–æ—ó –º–æ–≤–∏"}, {"marker": "–ì", "text": "–≤–∏–Ω–∏–∫–Ω–µ–Ω–Ω—è —Ç—Ä–∞–¥–∏—Ü—ñ–π–Ω–æ–≥–æ –ø–µ—Ä–µ—Å—É–≤–Ω–æ–≥–æ –ª—è–ª—å–∫–æ–≤–æ–≥–æ —Ç–µ–∞—Ç—Ä—É"}]

Respond with the corresponding output fields, starting with the field `[[ ## correct_marker ## ]]`, and then ending with the marker for `[[ ## completed ## ]]`.<|eot_id|>

<|start_header_id|>assistant<|end_header_id|>
[[ ## correct_marker ## ]]
–í

[[ ## completed ## ]]<|eot_id|>

<|start_header_id|>user<|end_header_id|>
[[ ## question ## ]]
{question_input}

[[ ## options ## ]]
{options_input}

Respond with the corresponding output fields, starting with the field `[[ ## correct_marker ## ]]`, and then ending with the marker for `[[ ## completed ## ]]`.<|eot_id|>

<|start_header_id|>assistant<|end_header_id|>
[[ ## correct_marker ## ]]
{correct_marker_input}

[[ ## completed ## ]]<|eot_id|>'''

In [7]:
import pandas as pd
def format_prompt(row):
    return (llama31_prompt
            .replace("{question_input}", row["question"])
            .replace("{options_input}", str(row["answers"]))
            .replace("{correct_marker_input}", row["correct_answers"]))


def prepare_train_datav2(train_data:dict):
    # Convert the datax to a Pandas DataFrame
    data_df = pd.DataFrame(train_data)
    # Create a new column called "text"
    data_df["text"] = data_df.apply(format_prompt, axis=1)
    # Create a new Dataset from the DataFrame
    data = Dataset.from_pandas(data_df)
    return data

In [8]:
dataset = prepare_train_datav2(train_set)

In [9]:
print(dataset[-1]['text'])

<|start_header_id|>system<|end_header_id|>
Your input fields are:
1. `question` (str)
2. `options` (list[dict[str, str]])

Your output fields are:
1. `correct_marker` (str)

All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## question ## ]]
{question}

[[ ## options ## ]]
{options}

[[ ## correct_marker ## ]]
{correct_marker}

[[ ## completed ## ]]

In adhering to this structure, your objective is: 
 Solve exam problem.<|eot_id|>

<|start_header_id|>user<|end_header_id|>
[[ ## question ## ]]
–©–æ –±—É–ª–æ —Ö–∞—Ä–∞–∫—Ç–µ—Ä–Ω–∏–º –¥–ª—è —Ä–æ–∑–≤–∏—Ç–∫—É —É–∫—Ä–∞—ó–Ω—Å—å–∫–æ—ó –∫—É–ª—å—Ç—É—Ä–∏ –ø–µ—Ä—à–æ—ó –ø–æ–ª–æ–≤–∏–Ω–∏ –•–Ü–• —Å—Ç.?

[[ ## options ## ]]
[{"marker": "–ê", "text": "–ø–æ—à–∏—Ä–µ–Ω–Ω—è —Å—Ç–∏–ª—é –±–∞—Ä–æ–∫–æ –≤ –∞—Ä—Ö—ñ—Ç–µ–∫—Ç—É—Ä—ñ"}, {"marker": "–ë", "text": "–∫–æ–ø—ñ—é–≤–∞–Ω–Ω—è –∫—Ä–∞—â–∏—Ö –∑—Ä–∞–∑–∫—ñ–≤ –∫–Ω—è–∂–æ—ó –¥–æ–±–∏"}, {"marker": "–í", "text": "—Ñ–æ—Ä–º—É–≤–∞–Ω–Ω—è –Ω–æ–≤–æ—ó —É–∫—Ä–∞—ó–Ω—Å—å–∫–æ—ó –ª—ñ—

In [10]:
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM

response_template = "<|start_header_id|>assistant<|end_header_id|>"
collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)

In [11]:
# Tokenize
tokenized = tokenizer(dataset[-1]['text'], return_tensors="pt", padding=True)

# Convert to the format expected by the collator
tokenized_example = {
    'input_ids': tokenized['input_ids'][0],  # remove the batch dimension
    'attention_mask': tokenized['attention_mask'][0]
}

# Apply the collator - it expects a list of examples
processed = collator([tokenized_example])

# Decode the labels to see what part is being trained on
labels = processed['labels'][0]
mask = labels != -100
training_text = tokenizer.decode(labels[mask])
print("Training target:", training_text)

Training target: 
[[ ## correct_marker ## ]]
–ê

[[ ## completed ## ]]<|eot_id|>


In [12]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    data_collator=collator,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 10,
        num_train_epochs = 5, # Set this for 1 full training run.
        # max_steps = 120,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "5epoch_increased_not_rslora",
    ),
)

Map (num_proc=2): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2451/2451 [00:01<00:00, 1375.98 examples/s]


In [14]:
trainer_stats = trainer.train(resume_from_checkpoint=True)

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 2,451 | Num Epochs = 5
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 1,530
 "-____-"     Number of trainable parameters = 83,886,080


Step,Training Loss
1,0.1241
2,0.092
3,0.0953
4,0.0952
5,0.0862
6,0.0881
7,0.1121
8,0.1008
9,0.0877
10,0.1099


In [15]:
model.save_pretrained("./lorai_adapter_llama8b_5epochs_nrs")
tokenizer.save_pretrained("./lorai_adapter_llama8b_5epochs_nrs")

('./lorai_adapter_llama8b_5epochs_nrs/tokenizer_config.json',
 './lorai_adapter_llama8b_5epochs_nrs/special_tokens_map.json',
 './lorai_adapter_llama8b_5epochs_nrs/tokenizer.json')