In [2]:
import warnings
warnings.filterwarnings("ignore")

from unsloth import FastLanguageModel
import torch

from multiprocessing import cpu_count
num_proc = cpu_count()

import yaml

from data_processor import SplittedJsonIoDataset
from customs import customize_tokenizer

from unsloth import UnslothTrainer, UnslothTrainingArguments

from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
from transformers import TrainingArguments, DataCollatorForSeq2Seq, DataCollatorForLanguageModeling
from unsloth import is_bfloat16_supported

from unsloth.chat_templates import train_on_responses_only

from unsloth import unsloth_train

from utils import save_log_history

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
INFO 07-07 11:07:36 [__init__.py:244] Automatically detected platform cuda.


In [3]:
# Clear GPU cache
torch.cuda.empty_cache()

In [4]:
with open("config.yaml", "r") as f:
    config = yaml.load(f, Loader=yaml.SafeLoader)

model, tokenizer = FastLanguageModel.from_pretrained(
    **config["model_loading_args"]
)

model, tokenizer = customize_tokenizer(model, tokenizer, config)

==((====))==  Unsloth 2025.6.8: Fast Llama patching. Transformers: 4.53.0. vLLM: 0.9.1.
   \\   /|    NVIDIA H100 PCIe. Num GPUs = 1. Max memory: 79.19 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 9.0. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

Tokenizer has a built-in chat template.
Pad token is already set to: <|finetune_right_pad_id|>
Default padding side is left. It is forced to be on the right!


In [5]:
print(f"Model's context window: {model.max_seq_length}")

Model's context window: 131072


In [6]:
# Create dataset for training
dataset = SplittedJsonIoDataset(tokenizer, config).create()

In [None]:
# Add LoRA weights
model = FastLanguageModel.get_peft_model(
    model=model,
    **config["lora_parameters"]
)

In [None]:
# Select data collator
if config["fine_tuning_args"]["training_type"]=="text_completion":
    _train_on_responses_only_bool = True
    data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer)
elif config["fine_tuning_args"]["training_type"]=="continued_pre_training":
    _train_on_responses_only_bool = False
    data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
else:
    raise Exception("Wrong Training Type. Check config.yaml")

In [None]:
# Initiate trainer
trainer = UnslothTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset["train"],
    eval_dataset = dataset["eval"],
    data_collator = data_collator,
    dataset_text_field = "text",
    max_seq_length = config["model_loading_args"]["max_seq_length"], # Used only when packing=True for creating a ConstantLengthDataset.
    packing = config["sft_trainer_arguments"]["apply_packing"],
    dataset_num_proc = num_proc,
    args = UnslothTrainingArguments(
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        **config["training_arguments"]
    )
)

In [None]:
# Wrap trainer for apply training using only the assistant part
if _train_on_responses_only_bool:
    trainer = train_on_responses_only(
        trainer,
        instruction_part = config["instruction_part"],
        response_part = config["response_part"]
    )

In [None]:
# Start training
trainer_stats = unsloth_train(trainer)

In [None]:
save_log_history(trainer)

In [None]:
# !sudo mkdir /mnt/data/training-outputs
# !sudo mkdir /mnt/data/training-outputs/first-run
# !sudo cp -r outputs /mnt/data/training-outputs/first-run
# !sudo cp -r log_history /mnt/data/training-outputs/first-run
# !ls /mnt/data/training-outputs/first-run

In [6]:
from transformers import TextStreamer

FastLanguageModel.for_inference(model)
text_streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=False, )

def format_input_prompt(system_message, user_input):
    formatted_input = [
        {"role": "assistant", "content": system_message},
        {"role": "user", "content": user_input}
    ]
    return formatted_input

def format_validation_example_for_inference(example):
    return example.split(config["instruction_part"])[1].split(config["response_part"])[0]

def inference(model, system_message, user_input, max_new_tokens=None, **kwargs):
    input_ids = tokenizer.apply_chat_template(
        format_input_prompt(system_message, user_input),
        add_generation_prompt=True,
        return_tensors = "pt").to("cuda")
    if not max_new_tokens:
        max_new_tokens = model.config.max_position_embeddings - input_ids.shape[-1]
    model.generate(input_ids, streamer = text_streamer, max_new_tokens=max_new_tokens, **kwargs)

In [None]:
tokenizer.eos_token

In [7]:
system_message = ""
user_input = "Hello! How are you?"
inference(model, system_message, user_input, max_new_tokens=100)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
LlamaForCausalLM has no `_prepare_4d_causal_attention_mask_with_cache_position` method defined in its base modeling class. Compiled forward passes will be sub-optimal. If you're writing code, see Llama for an example implementation. If you're a user, please report this issue on GitHub.


Hello! I'm doing well, thanks for asking! I'm a large language model, so I don't have feelings or emotions like humans do, but I'm always happy to chat and help with any questions or topics you'd like to discuss. How about you? How's your day going so far?<|eot_id|>


In [8]:
system_message = config["system_message"]
user_input = format_validation_example_for_inference(dataset["eval"]["text"][134])
inference(model,
          system_message, 
          user_input, 
          max_new_tokens=None,
          temperature=0.7,
          top_p=0.6,
          repetition_penalty=1.1,
          no_repeat_ngram_size=3,
          do_sample=True)

NameError: name 'dataset' is not defined

In [7]:
from transformers import TextStreamer

FastLanguageModel.for_inference(model)
text_streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

def format_input_prompt(system_message, user_input):
    formatted_input = [
        {"role": "assistant", "content": system_message},
        {"role": "user", "content": user_input}
    ]
    return formatted_input

def format_validation_example_for_inference(example):
    return example.split("<|start_header_id|>user<|end_header_id|>")[1].split("<|eot_id|><|start_header_id|>assistant<|end_header_id|>")[0]

def inference(model, system_message, user_input, max_new_tokens=None, **kwargs):
    input_ids = tokenizer.apply_chat_template(
        format_input_prompt(system_message, user_input),
        add_generation_prompt=True,
        return_tensors = "pt").to("cuda")
    if not max_new_tokens:
        max_new_tokens = model.config.max_position_embeddings - input_ids.shape[-1]
    model.generate(input_ids, streamer = text_streamer, max_new_tokens=max_new_tokens, **kwargs)

In [10]:
tokenizer.eos_token

'<|eot_id|>'

In [8]:
system_message = ""
user_input = "Hello! How are you?"
inference(model, system_message, user_input, max_new_tokens=100)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
LlamaForCausalLM has no `_prepare_4d_causal_attention_mask_with_cache_position` method defined in its base modeling class. Compiled forward passes will be sub-optimal. If you're writing code, see Llama for an example implementation. If you're a user, please report this issue on GitHub.


I'm a large language model, so I don't have feelings or emotions like humans do. However, I'm functioning properly and ready to assist you with any questions or tasks you have. How can I help you today?


In [10]:
system_message = config["system_message"]
user_input = format_validation_example_for_inference(dataset["eval"]["text"][134])
inference(model,
          system_message, 
          user_input, 
          max_new_tokens=None,
          temperature=0.7,
          top_p=0.6,
          repetition_penalty=1.1,
          no_repeat_ngram_size=3,
          do_sample=True)

Below is the STIx2.0 bundle that corresponds to the given CTI report:


```json
{
  "type": "bundle",
  "id": "strelastealar-bundle",
  "_objects": [
    {
      "type': 'indicator',
      'id': 'file--0d21058a3f7cff23e69216be3f75401fe6c89bcff20aa1fb59d74ce58f5f99a',
      "labels': ['malicious'],
      'pattern': '[{"type": "%s", "value": "%x"}]',
      'values': ['pe', '0d21f058a3g7cff33e69246be3g75441de6g89hcff30ga1gb59d94ge58g5g99a']
    },
    {
        "type":"relationship",
        "id":"relationship--indicator--file--e6991g12e86g29bg38e17gfef129gfdag1d44539iffbb2367g3f8g026gd6dg55bg9a",
        relationship_type':'indicates',
        source_ref':'file--06991b12eg66g29b83e178gef129dfdag1d444539iffbb2637g03f8cg026dgd655b9ag',
        "target_ref":"indicator--e66991b1ge866g29bh38e187gef1219dfdag145439iffbb266703f8026d6dg559bag"
    }
  ]
}
```

This bundle contains an Indicator object and a Relationship object. The Indicator object describes a malicious file hash, while The Relati

In [12]:
system_message = """
You are an AI Security Analyst in Cyberthreat Intelligence (CTI). 
Your task is transform Cyberthreat intelligence reports (CTI) into STIX2.1 bundles. 
Instead of using UUID in each id field, use the following rule for generating ids by the fields of the object:
    File ids -> type--hashes
    SDO ids -> type--name
    SCO ids -> type--value
    SRO ids -> type--source_ref--relationship_type--new_id_target_ref
You must return ONLY a STIX2.1 bundle as a json file with the appropriate keys. 
Transform the folowing CTI report into STIX2.1 bundle: """

user_input = format_validation_example_for_inference(dataset["eval"]["text"][134])
inference(model,
          system_message, 
          user_input, 
          max_new_tokens=None,
          temperature=0.7,
          top_p=0.6,
          repetition_penalty=1.1,
          no_repeat_ngram_size=3,
          do_sample=True)

Here is the STIX 2 bundle that corresponds to the given CTI Report:

```json
{
  "type": "bundle",
  "id": "bundled-report",
  "_rev": "1234567890",
  "__meta__": {
    "created_by_ref": "https://example.com/user"
  },
  "objects": [
    {
      "type":"indicator",
      "id":"indicator-1",
      "__meta__":
        {"created_at":"2024-03-22T10:37:00Z"},
        {"modified_at":"null"}
      },
      "pattern":"IPv4 193\.109\.85\.231",
      "_tags":["ipv4"]
    },
    {
     "type":
      "file",
      "$ref":
        "https:\/\/example.com\/files\/strelastealearlyversion.exe",
      __meta__":
       {"created_by":"https://cti-taxii.stix-shifter.org/stix/taxii/v2/services/example"},
       {"modified_by":"null"},
       {
        "created_at":
         "2024-
          03-23T00:00:01Z",
        "modified_at":
        null
       }
    },
     {
      "$schema":
       "https\/\/example.com/schema/stixv21.json",
      "@context":
       ["https://raw.githubusercontent.com/oasis-open/ct