In [1]:
from datasets import load_dataset

import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments

from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training

import torch

from trl import SFTTrainer

In [2]:
train_dataset = load_dataset("json", data_files="mistral_haiku_dataset.jsonl", split="train")

In [3]:
train_dataset

Dataset({
    features: ['messages'],
    num_rows: 3865
})

In [4]:
train_dataset["messages"]

Column([[{'role': 'system', 'content': "You're a multilinguall haiku-poetician who writes haiku in Japanese and provides Russian translation."}, {'role': 'user', 'content': 'Напиши хайку о весне'}, {'role': 'assistant', 'content': '春立つや昼の灯くらき山社\n\nПришла весна — \nи днём огонёк светильника \nв горном святилище тускл'}], [{'role': 'system', 'content': "You're a multilinguall haiku-poetician who writes haiku in Japanese and provides Russian translation."}, {'role': 'user', 'content': 'Создай хайку о времени'}, {'role': 'assistant', 'content': '筆を噛んでいまだ書かざる妹の春\n\nГрызу кисть — \nи всё ещё не написано \nо весне сестры'}], [{'role': 'system', 'content': "You're a multilinguall haiku-poetician who writes haiku in Japanese and provides Russian translation."}, {'role': 'user', 'content': 'Сочини хайку о природе'}, {'role': 'assistant', 'content': '寝ごゝろやいづちともなく春は来ぬ\n\nНа сердце лёгкость — \nневедомо откуда \nприходит весна'}], [{'role': 'system', 'content': "You're a multilinguall haiku-poetici

In [5]:
BASE_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
FINE_TUNED_MODEL_NAME="Mistral-7B-Haiku-Generator"
LORA_R=16
LORA_ALPHA=32
LORA_DROPOUT=0.05
BNB_4BIT_COMPUTE_DTYPE = "float16"
BNB_4_BIT_QUANT_TYPE = "nf4"
COMPUTE_DTYPE = getattr(torch, BNB_4BIT_COMPUTE_DTYPE)

NUM_TRAIN_EPOCHS=5
PER_DEVICE_TRAIN_BATCH_SIZE=4
PER_DEVICE_EVAL_BATCH_SIZE=4
GRADIENT_ACCUMULATION_STEPS=2
OPTIM="paged_adamw_32bit"
MAX_GRAD_NORM=0.3
LEARNING_RATE=5e-6
WEIGHT_DECAY=0.001
LR_SCHEDULER_TYPE="constant"
WARMUP_RATIO=0.03
MAX_STEPS=-1
SAVE_STEPS=1000
LOGGING_STEPS=100

MAX_SEQ_LENGTH=None
DEVICE_MAP={"":0}

REPORT_TO="tensorboard"

TOKENIZER_DIR = "./tokenizer"
OUTPUT_DIR="./outputs"

In [6]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type=BNB_4_BIT_QUANT_TYPE,
    bnb_4bit_compute_dtype=COMPUTE_DTYPE,
    bnb_4bit_use_double_quant=False
)
base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto"
)
base_model.config.use_cache = False
base_model.config.pretraining_tp = 1
base_model.gradient_checkpointing_enable()

In [7]:
base_model = prepare_model_for_kbit_training(base_model)

In [7]:
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [8]:
peft_config = LoraConfig(
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    r=LORA_R,
    target_modules=[
        "q_proj",
        "v_proj",
        "k_proj",
        "o_proj",
        "up_proj",
        "down_proj"
    ],
    bias="none",
    task_type="CAUSAL_LM"
)

In [9]:
training_arguments = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=NUM_TRAIN_EPOCHS,
    per_device_train_batch_size=PER_DEVICE_TRAIN_BATCH_SIZE,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    optim=OPTIM,
    save_steps=SAVE_STEPS,
    logging_steps=LOGGING_STEPS,
    learning_rate=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY,
    fp16=True,
    bf16=False,
    max_grad_norm=MAX_GRAD_NORM,
    max_steps=MAX_STEPS, 
    warmup_ratio=WARMUP_RATIO,
    group_by_length=True,
    lr_scheduler_type=LR_SCHEDULER_TYPE,
    report_to=REPORT_TO
)

In [11]:
base_model = get_peft_model(base_model, peft_config)

In [12]:
trainer = SFTTrainer(
    model=base_model,
    train_dataset=train_dataset,
    peft_config=peft_config,
    processing_class=tokenizer,
    args=training_arguments
)



In [13]:
print("Started training...")
trainer.train()
trainer.model.save_pretrained(FINE_TUNED_MODEL_NAME)
print("Training completed")

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151645}.


Started training...


  return fn(*args, **kwargs)


Step,Training Loss
100,4.3703
200,2.3572
300,1.9938
400,1.8827
500,1.825
600,1.7576
700,1.7022
800,1.6631
900,1.6421
1000,1.6221


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


Training completed


In [10]:
ADAPTER_PATH = "./Mistral-7B-Instruct-v0.3"

model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)

model.eval()

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Qwen3ForCausalLM(
      (model): Qwen3Model(
        (embed_tokens): Embedding(151936, 1024)
        (layers): ModuleList(
          (0-27): 28 x Qwen3DecoderLayer(
            (self_attn): Qwen3Attention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=1024, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=1024, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lor

In [16]:
messages = [
    {"role": "system", "content": "You're a multilingual haiku-poetician who writes haiku in Japanese and provides Russian translation."},
    {"role": "user", "content": "Создай хайку о море и персиках"}
]

prompt = ""
for msg in messages:
    prompt += f"{msg['role']}: {msg['content']}\n"
prompt += "assistant:"


In [17]:
prompt

"system: You're a multilingual haiku-poetician who writes haiku in Japanese and provides Russian translation.\nuser: Создай хайку о море и персиках\nassistant:"

In [13]:
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

with torch.inference_mode():
    output_ids = model.generate(
        **inputs
    )

output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
print(output_text)

Сочини хайку о море, где все дни в году пропадают и т.д. 
Требования
