In [1]:
from pathlib import Path
from tqdm import tqdm
import joblib
from typing import NamedTuple, TypedDict

from rich import print
from rich.table import Table

from treelib import Tree # type: ignore

import torch

from datasets import Dataset

from transformers import (
    AutoModelForCausalLM, # type: ignore
    AutoTokenizer, # type: ignore
    TrainingArguments, # type: ignore
    Trainer, # type: ignore
    BitsAndBytesConfig, # type: ignore
    DataCollatorForLanguageModeling, # type: ignore
)

from peft import LoraConfig, get_peft_model

### Config

In [2]:
MODEL_NAME = "Qwen/Qwen3-1.7B"
DATA_DIR = Path("../dataset/dump/short-fantasy-quests")

MODEL_SAVE_DIR = Path("../models/llm")

In [3]:
assert DATA_DIR.exists() and DATA_DIR.is_dir()

if not MODEL_SAVE_DIR.exists():
    MODEL_SAVE_DIR.mkdir()

### Data

In [4]:
trees: list[Tree] = [joblib.load(f) for f in DATA_DIR.iterdir()]

In [5]:
table = Table(title="Basic data counts")
table.add_column("Name")
table.add_column("Value")

table.add_row("Number of trees", str(len(trees)))
table.add_row("Total number of nodes", str(sum(len(tree) for tree in trees)))
table.add_row("Count non empty nodes", str(sum(len(list(tree.filter_nodes(lambda n: n.data['text'] is not None and n.data['text'] != ""))) for tree in trees if len(tree) > 0)))
table.add_row("Average number of nodes per tree", str(sum(len(tree) for tree in trees) / len(trees)))

print(table)


In [6]:
class Triplet(NamedTuple):
    context: str
    statement: str
    answer: str

In [7]:
def get_triplets(tree: Tree, nid: str) -> list[Triplet]:
    current = tree.get_node(nid)
    if current is None:
        return []
    children = tree.children(nid)
    triplets: list[Triplet] = []
    for child in children:
        triplet = Triplet(
            context=current.data['text'],
            statement=child.tag,
            answer=child.data['text']
        )
        if triplet.context is not None and triplet.context != "" and triplet.statement is not None and triplet.statement != "" and triplet.answer is not None and triplet.answer != "": 
            triplets.append(triplet)
    return triplets


In [8]:
triplets = []
for tree in trees:
    for node in tree.nodes:
        triplets.extend(get_triplets(tree, node))

In [9]:
print(f"Number of Triplets: {len(triplets)}")

In [10]:
def triplet2train(triplet: Triplet) -> dict:
    return dict(prompt=f"[CONTEXT] {triplet.context} [STATEMENT] {triplet.statement}", response=triplet.answer)

dataset = Dataset.from_list([triplet2train(triplet) for triplet in triplets]).train_test_split(0.1)
print(dataset)

### Model Train

In [11]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

tokenizer_config.json:   0%|          | 0.00/9.73k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

In [12]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
)


config.json:   0%|          | 0.00/726 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.6k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/622M [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

In [13]:
def format_and_tokenize(examples):
    texts = [
        f"[im_start]system\nYOU ARE Dungeon Master[im_end]\n"
        f"[im_start]user\n{str(prompt)}[im_end]\n"
        f"[im_start]assistant\n{str(response)}[im_end]"
        for prompt, response in zip(examples['prompt'], examples['response'])
    ]

    tokenized = tokenizer(
        texts,
        padding="max_length",
        truncation=True,
        max_length=512,
        return_tensors="pt",
        add_special_tokens=False
    )

    return {
        "input_ids": tokenized["input_ids"].tolist(),
        "attention_mask": tokenized["attention_mask"].tolist(),
        "labels": tokenized["input_ids"].tolist()
    }

In [14]:
dataset = dataset.map(
    format_and_tokenize,
    batched=True,
    remove_columns=["prompt", "response"],
    desc="Formatting and tokenizing"
)

Formatting and tokenizing:   0%|          | 0/303 [00:00<?, ? examples/s]

Formatting and tokenizing:   0%|          | 0/34 [00:00<?, ? examples/s]

In [15]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 1,605,632 || all params: 1,722,180,608 || trainable%: 0.0932


In [16]:
training_args = TrainingArguments(
    output_dir=MODEL_SAVE_DIR,
    per_device_train_batch_size=4,  # Увеличен размер батча (если позволяет память GPU)
    gradient_accumulation_steps=2,  # Увеличена аккумуляция градиентов
    learning_rate=2e-4,
    num_train_epochs=1,
    logging_steps=100,
    fp16=True,
    optim="adamw_bnb_8bit",  # Более эффективный оптимизатор
    save_strategy="steps",
    eval_strategy="steps",
    eval_steps=500,
    save_steps=500,
    report_to="none",
    gradient_checkpointing=False,  # Отключено для ускорения
    load_best_model_at_end=True,
    remove_unused_columns=True,
    label_names=["labels"],
    max_grad_norm=0.3,
    dataloader_num_workers=2,  # Параллельная загрузка данных
    torch_compile=False  # Компиляция графа вычислений
)

In [17]:
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
    pad_to_multiple_of=8
)

In [18]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    data_collator=data_collator,
    processing_class=tokenizer,
)

In [19]:
trainer.train()
model.save_pretrained(MODEL_SAVE_DIR)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Step,Training Loss,Validation Loss


In [44]:
def generate_action(context: str, action: str) -> str:
    prompt = f"""
[im_start]system
YOU ARE Dungeon Master[im_end]
[im_start]user [CONTEXT] {context} [STATEMENT] {action}[im_end]
[im_start]assistant
"""

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=500,
        temperature=0.9,
        top_p=0.8,
        repetition_penalty=1.1,
        eos_token_id=tokenizer.eos_token_id
    )

    full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return full_response

In [46]:
print(generate_action(
    context="Earlier, the player stole a coin from a tavern",
    action="I went into the tavern"
))