In [None]:
!pip install transformers datasets trl huggingface_hub

In [None]:

from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from trl import SFTConfig, SFTTrainer, setup_chat_format
import torch
model_name = "Qwen/Qwen2.5-7B-instruct"

# Load model across multiple GPUs
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",  # Enables multi-GPU support
    torch_dtype=torch.bfloat16,  # Reduce memory usage
)

tokenizer = AutoTokenizer.from_pretrained(model_name)
finetune_name = "ESI"

# Generate the train_prompt:

In [None]:
train_prompt_style = """
### Instruction:
You are an expert with advanced knowledge in solving temporal reasoning problems.
Before answering, classify the task given and then create a step-by-step reasoning to ensure a logical and accurate response.
Please answer the following questions.

### Question:
{}
### Task:
{}
### Reasoning:
{}
### Final Answer:
{}"""

In [None]:
EOS_TOKEN = tokenizer.eos_token  # Must add EOS_TOKEN

def formatting_prompts_func(examples):
    inputs = examples["question"]
    cots = examples["reasoning"]  # Có thể chứa giá trị None
    outputs = examples["reasoner_answer"]
    tasks = examples["task"]
    texts = []

    for input, task, cot_list, output in zip(inputs,tasks, cots, outputs ):
        if cot_list is None:  
            cot = "N/A"
        elif isinstance(cot_list, list):  
            cot = "\n".join(map(str, cot_list))
        else:  
            cot = str(cot_list)

        text = train_prompt_style.format(input, task, cot, output) + EOS_TOKEN
        texts.append(text)

    return {"text": texts}

# Split data:

In [None]:
from datasets import load_dataset
dataset = load_dataset("ESITime/timesi-arithmetic", split = "train",trust_remote_code=True)
dataset = dataset.shuffle(seed=42)
# split_dataset = dataset.train_test_split(test_size=0.2)

train_dataset = dataset
train_dataset = train_dataset.map(formatting_prompts_func, batched=True)
# val_dataset = val_dataset.map(formatting_prompts_func, batched=True)
# print("val data example:")
# print(val_dataset["text"][0])
print("train data example:")
print(train_dataset["text"][0])



# Setting up models


In [None]:
from peft import LoraConfig

# TODO: Configure LoRA parameters
# r: rank dimension for LoRA update matrices (smaller = more compression)
rank_dimension = 8
# lora_alpha: scaling factor for LoRA layers (higher = stronger adaptation)
lora_alpha = 8
# lora_dropout: dropout probability for LoRA layers (helps prevent overfitting)
lora_dropout = 0.05

peft_config = LoraConfig(
    r=rank_dimension,  # Rank dimension - typically between 4-32
    lora_alpha=lora_alpha,  # LoRA scaling factor - typically 2x rank
    lora_dropout=lora_dropout,  # Dropout probability for LoRA layers
    bias="none",  # Bias type for LoRA. the corresponding biases will be updated during training.
    target_modules="all-linear",  # Which modules to apply LoRA to
    task_type="CAUSAL_LM",  # Task type for model architecture
)

In [None]:
args = SFTConfig(
    # Output settings
    output_dir=finetune_name,  # Directory to save model checkpoints
    # Training duration
    num_train_epochs=1,  # Number of training epochs
    dataset_text_field="text",# Focus on what column we train
    # Batch size settings
    # max_steps=400,
    per_device_train_batch_size=2,  # Batch size per GPU
    # per_device_eval_batch_size=2,
    gradient_accumulation_steps=2,  # Accumulate gradients for larger effective batch
    # Memory optimization
    gradient_checkpointing=True,  # Trade compute for memory savings
    # Optimizer settings
    optim="adamw_torch_fused",  # Use fused AdamW for efficiency
    learning_rate=2e-4,  # Learning rate (QLoRA paper)
    max_grad_norm=0.3,  # Gradient clipping threshold
    # Learning rate schedule
    warmup_ratio=0.03,  # Portion of steps for warmup
    lr_scheduler_type="constant",  # Keep learning rate constant after warmup
    # Logging and saving
    logging_steps=80,  # Log metrics every N steps
    save_steps=80,  # Save checkpoint every epoch
    # save_strategy="epoch", 
    # eval_strategy="steps",
    
    # Precision settings
    bf16=True,  # Use bfloat16 precision
    # Integration settings
    push_to_hub=False,  # Don't push to HuggingFace Hub
    report_to="none",  # Disable external logging
)

In [None]:
from transformers import TrainingArguments, EarlyStoppingCallback
max_seq_length = 2160  # max sequence length for model and packing of the dataset
tokenizer.model_max_length = max_seq_length  # Adjust the sequence length
# Create SFTTrainer with LoRA configuration
trainer = SFTTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    # eval_dataset=val_dataset,
    peft_config=peft_config,  # LoRA configuration
    # max_seq_length=max_seq_length,  # Maximum sequence length
    tokenizer=tokenizer,
    # callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]  # Dừng nếu không cải thiện sau 3 epoch
    # packing=True,  # Enable input packing for efficiency
    # dataset_kwargs={
    #     "add_special_tokens": False,  # Special tokens handled by template
    #     "append_concat_token": False,  # No additional separator needed
    # },
)

# Train model

In [None]:
# start training, the model will be automatically saved to the hub and the output directory
trainer.train()

# save model
trainer.save_model("ESI/final")