In [8]:
import os
import re
import yaml
import json
import torch
import pickle
from unsloth import FastLanguageModel
from tqdm import tqdm
import pandas as pd


Please restructure your imports with 'import unsloth' at the top of your file.
  from unsloth import FastLanguageModel


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
INFO 10-16 13:14:23 [__init__.py:244] Automatically detected platform cuda.


In [44]:
sft_model = "/mnt/data/training-outputs/Llama-3.1-8B-Domains-Expert/checkpoint-339"

sft_system_message = """You are an AI Security Analyst in Cyberthreat Intelligence (CTI). 
                    Your task is to identify all malicious domain names referenced in a CTI report. 
                    You MUST return a json with a field "objects" being a list of json objects 
                    that describe malicious domain names.
                    To describe a malicious domain name you should provide the fields id, type and value.
                    Instead of using UUID in the id field, use the rule type--value for generating ids.
                    If no malicious domain names are identified return a json with an empty list "objects".
                    Identify all malicious domain names in the folowing CTI report: """

In [3]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = sft_model,
    fast_inference = False,
    load_in_4bit = False,
    max_seq_length = None,
    gpu_memory_utilization = 0.8
)

==((====))==  Unsloth 2025.6.8: Fast Llama patching. Transformers: 4.53.0. vLLM: 0.9.1.
   \\   /|    NVIDIA H100 PCIe. Num GPUs = 1. Max memory: 79.179 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 9.0. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Unsloth 2025.6.8 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [4]:
from transformers import TextStreamer

FastLanguageModel.for_inference(model)
text_streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

def format_input_prompt(system_message, user_input):
    formatted_input = [
        {"role": "assistant", "content": system_message},
        {"role": "user", "content": user_input}
    ]
    return formatted_input

def format_validation_example_for_inference(example):
    return example.split("<|start_header_id|>user<|end_header_id|>")[1].split("<|eot_id|><|start_header_id|>assistant<|end_header_id|>")[0]

def inference(model, system_message, user_input, max_new_tokens=None, **kwargs):
    input_ids = tokenizer.apply_chat_template(
        format_input_prompt(system_message, user_input),
        add_generation_prompt=True,
        return_tensors = "pt").to("cuda")
    if not max_new_tokens:
        max_new_tokens = model.config.max_position_embeddings - input_ids.shape[-1]
    model.generate(input_ids, streamer = text_streamer, max_new_tokens=max_new_tokens, **kwargs)

def predict(model, system_message, user_input, max_new_tokens=None, **kwargs):
    input_ids = tokenizer.apply_chat_template(
        format_input_prompt(system_message, user_input),
        add_generation_prompt=True,
        return_tensors = "pt").to("cuda")
    if not max_new_tokens:
        max_new_tokens = model.config.max_position_embeddings - input_ids.shape[-1]
    
    output_ids = model.generate(input_ids, max_new_tokens=max_new_tokens, **kwargs)
    result = tokenizer.batch_decode(output_ids)
    processed_result = result[0].split("<|start_header_id|>assistant<|end_header_id|>\n\n")[-1].split("<|eot_id|>")[0]
    return processed_result

In [6]:
def load_json(path:str, filename:str):
    with open(os.path.join(path, filename), mode="r", encoding="utf-8") as f:
        return json.load(f)
    
def format_example(example:dict, system_message):
        formatted_example = [
            {"role": "assistant", "content": system_message},
            {"role": "user", "content": example["input"]},
            {"role": "assistant", "content": json.dumps(example["output"])}
        ]
        return formatted_example

In [9]:
test_path = "/mnt/data/openCTI/splitted-io-pairs/test"
inputs = []
outputs = []
include_cti_type = ["domain-name"]

for file in os.listdir(test_path):
    cti_type = file.split("--")[0]
    if cti_type not in include_cti_type:
        continue
    example = load_json(test_path, file)
    inputs.append(example["input"])
    outputs.append(example["output"])

In [10]:
print(outputs[0])

{'objects': [{'id': 'domain-name--teamsbusiness.org', 'type': 'domain-name', 'value': 'teamsbusiness.org'}, {'id': 'domain-name--voipfaqs.com', 'type': 'domain-name', 'value': 'voipfaqs.com'}, {'id': 'domain-name--locallyhyped.com', 'type': 'domain-name', 'value': 'locallyhyped.com'}]}


In [12]:
system_message = sft_system_message
user_input = inputs[0]
inference(model,
          system_message, 
          user_input, 
          max_new_tokens=500,
          temperature=0.6,
          top_p=0.2,
          repetition_penalty=1.1,
          no_repeat_ngram_size=3,
          do_sample=True)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
LlamaForCausalLM has no `_prepare_4d_causal_attention_mask_with_cache_position` method defined in its base modeling class. Compiled forward passes will be sub-optimal. If you're writing code, see Llama for an example implementation. If you're a user, please report this issue on GitHub.


{"objects": [{"id": "domain-name--voipfaqsc.om", "type": "DomainName", "value": "voipfqsc.om"}, {"id": "", "type"": "Domainname", "Value": "teamsbusiness.org"}, {"Id": "domian-name--locallyhyped.com", "Type": "Domian-name", "Vale": "locallyhypped.com"}]}


In [13]:
system_message = sft_system_message
inputs = inputs
outputs = outputs

preds = [predict(model,
                 system_message,
                 user_input,
                 max_new_tokens=500,
                 temperature=0.6,
                 top_p=0.2,
                 repetition_penalty=1.1,
                 no_repeat_ngram_size=3,
                 do_sample=True) for user_input in tqdm(inputs)]

100%|██████████| 298/298 [17:52<00:00,  3.60s/it]


In [34]:
preds4eval = []
failed_preds = []
pattern = r'\{\s*"id"\s*:\s*"[^"]*"\s*,\s*"type"\s*:\s*"[^"]*"\s*,\s*"value"\s*:\s*"[^"]*"\s*\}'

for p in preds:
    p = p.lower().replace("\t", "")
    try:
        preds4eval.append(
            {
                "objects":json.loads(p)["objects"]
            }
        )
    except:
        objects = re.findall(pattern, p)
        if not objects:
            failed_preds.append(p)
        else:
            try:
                valid_objs = [json.loads(obj) for obj in objects]
            except:
                print(p)
                print(objects)
                valid_objs = []
        preds4eval.append(
            {
                "objects":valid_objs
                }
        )

In [35]:
print(f"Percenrage of failed json outputs: {'{:.1f}'.format(100 * len(failed_preds) / len(inputs))}%")

Percenrage of failed json outputs: 1.3%


In [46]:
# Post processing
processed_preds4eval = []

def fix_domain_name_id(wrong_id: str) -> str:
    """
    Fixes malformed domain-name IDs according to the observed patterns.
    """

    # Trim spaces
    s = wrong_id.strip().lower()

    # Remove leading underscores or hyphens
    s = re.sub(r'^[-_]+', '', s)

    # Remove redundant 'malware' if it’s at the start but malformed
    s = re.sub(r'^(domain-name[-_]+)', '', s)

    # Handle duplicated name parts (e.g., 'fatboy--fatboy')
    parts = re.split(r'--+', s)
    if len(parts) == 2 and parts[0] == parts[1]:
        s = parts[0]

    # Prepend 'malware--'
    corrected = f"domain-name--{s}"

    # Ensure only one double dash after 'malware'
    corrected = re.sub(r'^domain-name-+', 'domain-name--', corrected)

    return corrected

# Step 1
for p in preds4eval:
    objects = []
    for obj in p["objects"]:
        # Step 1
        if "id" in obj.keys():
            ID = fix_domain_name_id(obj["id"].strip())

        if "value" in obj.keys():
            VALUE = obj["value"]
        else:
            VALUE = ID.split("domain-name--")[-1]

        objects.append(
            {
                "id":ID,
                "type":"domain-name",
                "value":VALUE,
            }
        )

    processed_preds4eval.append(
                {
                    "objects":objects
                }
            )

In [47]:
for i in range(len(outputs)):
    print(f"Actual: {outputs[i]}")
    print(f"Predicted: {processed_preds4eval[i]}")
    print("\n\n")

Actual: {'objects': [{'id': 'domain-name--teamsbusiness.org', 'type': 'domain-name', 'value': 'teamsbusiness.org'}, {'id': 'domain-name--voipfaqs.com', 'type': 'domain-name', 'value': 'voipfaqs.com'}, {'id': 'domain-name--locallyhyped.com', 'type': 'domain-name', 'value': 'locallyhyped.com'}]}
Predicted: {'objects': [{'id': 'domain-name--voipfaqsc.om', 'type': 'domain-name', 'value': 'voipfqsc.om'}]}



Actual: {'objects': []}
Predicted: {'objects': []}



Actual: {'objects': []}
Predicted: {'objects': []}



Actual: {'objects': [{'id': 'domain-name--siamaster.com.mx', 'type': 'domain-name', 'value': 'siamaster.com.mx'}, {'id': 'domain-name--cv-builder.site', 'type': 'domain-name', 'value': 'cv-builder.site'}, {'id': 'domain-name--chatgptex.us', 'type': 'domain-name', 'value': 'chatgptex.us'}, {'id': 'domain-name--allfreesoftware.online', 'type': 'domain-name', 'value': 'allfreesoftware.online'}, {'id': 'domain-name--all-free-software.online', 'type': 'domain-name', 'value': 'all-free-

In [48]:
# for i in range(len(outputs)):
#     for p in processed_preds4eval[i]["objects"]:
#         print(p)
#         print("\n")

In [53]:
from evaluation.stix_evaluator import STIXEvaluator

evaluator = STIXEvaluator(comparison_values=["id"], cti_object_types=["domain-name"])

In [54]:
p, r, f1, full_res = evaluator._evaluate_(predicted=processed_preds4eval, actual=outputs)
print(f"Precison: {p}\nRecall: {r}\nF1-Score: {f1}")

Precison: 0.21023
Recall: 0.02668
F1-Score: 0.04734


In [45]:
from langchain_ollama.llms import OllamaLLM

model = OllamaLLM(model="gpt-oss:20b")

In [46]:
preds = [model.invoke(sft_system_message + user_input) for user_input in tqdm(inputs)]

100%|██████████| 298/298 [1:15:53<00:00, 15.28s/it]


In [130]:
preds4eval = []
failed_preds = []
pattern = r'\{\s*"id"\s*:\s*"[^"]*"\s*,\s*"type"\s*:\s*"[^"]*"\s*,\s*"value"\s*:\s*"[^"]*"\s*\}'

for p in preds:

    p = p.lower().replace("\t", "")

    try:
        objects = json.loads(p)["objects"]
    except:
        objects = re.findall(pattern, p)
        if not objects and '"objects": []' not in p:
            failed_preds.append(p)
        elif objects:
            objects = [json.loads(obj) for obj in objects]
    
    preds4eval.append(
            {
                "objects":objects
                }
        )

In [131]:
print(f"Percenrage of failed json outputs: {'{:.1f}'.format(100 * len(failed_preds) / len(inputs))}%")

Percenrage of failed json outputs: 44.0%


In [132]:
# Post processing
processed_preds4eval = []

def fix_domain_name_id(wrong_id: str) -> str:
    """
    Fixes malformed domain-name IDs according to the observed patterns.
    """

    # Trim spaces
    s = wrong_id.strip().lower()

    # Remove leading underscores or hyphens
    s = re.sub(r'^[-_]+', '', s)

    # Remove redundant 'malware' if it’s at the start but malformed
    s = re.sub(r'^(domain-name[-_]+)', '', s)

    # Handle duplicated name parts (e.g., 'fatboy--fatboy')
    parts = re.split(r'--+', s)
    if len(parts) == 2 and parts[0] == parts[1]:
        s = parts[0]

    # Prepend 'malware--'
    corrected = f"domain-name--{s}"

    # Ensure only one double dash after 'malware'
    corrected = re.sub(r'^domain-name-+', 'domain-name--', corrected)

    return corrected

# Step 1
for p in preds4eval:
    objects = []
    for obj in p["objects"]:
        # Step 1
        if "id" in obj.keys():
            ID = fix_domain_name_id(obj["id"].strip())

        if "value" in obj.keys():
            VALUE = obj["value"]
        else:
            VALUE = ID.split("domain-name--")[-1]

        objects.append(
            {
                "id":ID,
                "type":"domain-name",
                "value":VALUE,
            }
        )

    processed_preds4eval.append(
                {
                    "objects":objects
                }
            )

In [147]:
from evaluation.stix_evaluator import STIXEvaluator
import warnings

warnings.filterwarnings(action="ignore")

evaluator = STIXEvaluator(comparison_values=["type", "name"], cti_object_types=["domain-name"])

In [148]:
p, r, f1, full_res = evaluator._evaluate_(predicted=processed_preds4eval, actual=outputs)
print(f"Precison: {p}\nRecall: {r}\nF1-Score: {f1}")

Precison: 0.46285
Recall: 0.32283
F1-Score: 0.38036


In [95]:
processed_preds4eval[3]

{'objects': [{'id': 'domain-name--domain--cv-builder.site',
   'type': 'domain-name',
   'value': 'cv-builder.site'},
  {'id': 'domain-name--domain--siamaster.com.mx',
   'type': 'domain-name',
   'value': 'siamaster.com.mx'},
  {'id': 'domain-name--domain--chatgptex.us',
   'type': 'domain-name',
   'value': 'chatgptex.us'},
  {'id': 'domain-name--domain--allfreesoftware.online',
   'type': 'domain-name',
   'value': 'allfreesoftware.online'},
  {'id': 'domain-name--domain--all-free-software.online',
   'type': 'domain-name',
   'value': 'all-free-software.online'}]}

In [96]:
outputs[3]

{'objects': [{'id': 'domain-name--siamaster.com.mx',
   'type': 'domain-name',
   'value': 'siamaster.com.mx'},
  {'id': 'domain-name--cv-builder.site',
   'type': 'domain-name',
   'value': 'cv-builder.site'},
  {'id': 'domain-name--chatgptex.us',
   'type': 'domain-name',
   'value': 'chatgptex.us'},
  {'id': 'domain-name--allfreesoftware.online',
   'type': 'domain-name',
   'value': 'allfreesoftware.online'},
  {'id': 'domain-name--all-free-software.online',
   'type': 'domain-name',
   'value': 'all-free-software.online'}]}

In [97]:
evaluator.evaluate_single(predicted=processed_preds4eval[3], actual=outputs[3])

(1.0,
 1.0,
 1.0,
 {'domain-name': {'precision': 1.0,
   'recall': 1.0,
   'f1': 1.0,
   'pred_count': 5,
   'actual_count': 5,
   'true_positives': 5,
   'false_positives': 0,
   'false_negatives': 0,
   'weight': 5}})

In [63]:
for i, file in enumerate(os.listdir(test_path)):
    cti_type = file.split("--")[0]
    if cti_type not in include_cti_type:
        continue
    print(i, file)

1 domain-name--ebdd1253-5d27-4c84-a9eb-09d534408d90.json
17 domain-name--28faa33a-990c-4a54-8750-fc09a515a236.json
36 domain-name--facefbb1-fe87-477f-aea8-2b3d856d49de.json
50 domain-name--f9583e0d-f3ac-4d4a-b20a-754b26bc7ecb.json
68 domain-name--76dae889-6ef3-4697-83c0-d47fa8c9dd71.json
95 domain-name--d7f6ec01-9499-4128-b010-32cba147d534.json
101 domain-name--979aad5d-b8d9-4368-acff-e2049ee5faca.json
133 domain-name--9eed6e0c-62b6-4795-b0d0-5571d63557a8.json
150 domain-name--06317c2b-ce2f-4a57-9bdd-2a5270447de8.json
169 domain-name--c9e742f1-4ea1-4e3e-8af5-2816052f8444.json
206 domain-name--047622b2-335f-4db6-ab5e-a4444016ba8e.json
212 domain-name--9837fa37-56cd-4d83-9292-b19be681453e.json
219 domain-name--49608eb2-8d5c-4840-a20c-842d02b63a85.json
262 domain-name--51858252-43d2-44e5-ac0c-b4da95a3afac.json
272 domain-name--e95ae185-32e4-41b2-832b-5de15d602ada.json
286 domain-name--ae4a2d1f-3859-492e-87e0-360d511b70b3.json
332 domain-name--fdca0b56-22a5-455a-89b2-6ec5f5e8a599.json
335 

In [98]:
for i in range(len(processed_preds4eval)):
    print(f"Predicted: {processed_preds4eval[i]}")
    print(f"Actual: {outputs[i]}")
    print("\n\n")

Predicted: {'objects': [{'id': 'domain-name--domain-voipfaqs.com', 'type': 'domain-name', 'value': 'voipfaqs.com'}, {'id': 'domain-name--domain-teamsbusiness.org', 'type': 'domain-name', 'value': 'teamsbusiness.org'}, {'id': 'domain-name--domain-locallyhyped.com', 'type': 'domain-name', 'value': 'locallyhyped.com'}]}
Actual: {'objects': [{'id': 'domain-name--teamsbusiness.org', 'type': 'domain-name', 'value': 'teamsbusiness.org'}, {'id': 'domain-name--voipfaqs.com', 'type': 'domain-name', 'value': 'voipfaqs.com'}, {'id': 'domain-name--locallyhyped.com', 'type': 'domain-name', 'value': 'locallyhyped.com'}]}



Predicted: {'objects': [{'id': 'domain-name--domain-voipfaqs.com', 'type': 'domain-name', 'value': 'voipfaqs.com'}, {'id': 'domain-name--domain-teamsbusiness.org', 'type': 'domain-name', 'value': 'teamsbusiness.org'}, {'id': 'domain-name--domain-locallyhyped.com', 'type': 'domain-name', 'value': 'locallyhyped.com'}]}
Actual: {'objects': []}



Predicted: {'objects': [{'id': 'domai

In [102]:
for i in range(len(preds)-1):
    if preds[i]==preds[i+1]:
        print(i)

In [104]:
print(preds[0])

```json
{
  "objects": [
    {
      "id": "domain-voipfaqs.com",
      "type": "domain",
      "value": "voipfaqs.com"
    },
    {
      "id": "domain-teamsbusiness.org",
      "type": "domain",
      "value": "teamsbusiness.org"
    },
    {
      "id": "domain-locallyhyped.com",
      "type": "domain",
      "value": "locallyhyped.com"
    }
  ]
}
```



In [105]:
print(preds[1])

The Play ransomware team that stole data from the IACS environment used the following TTPs (ATT&CK® techniques) in their attack chain:

| ATT&CK ID | Technique | How it was used |
|-----------|-----------|-----------------|
| **T1568.002** | **Domain Generation Algorithms** | The Play campaign used a hidden link‑shortener service (Prolific Puma) that generated dozens of short domains to host the download payload and C2 URLs. |
| **T1489** | **Service Stop** | The Linux variant shuts down ESXi services (via `esxcli`) and stops all VMs before encrypting. |
| **T1083** | **File & Directory Discovery** | The malware enumerates VM files and other targets, collecting a list of items to encrypt. |
| **T1491.001** | **Internal Defacement** | The ransom note (PLAY_Readme.txt) is written to the ESXi host client and console, and a Tor‑link is supplied to the victim. |
| **T1041** | **Exfiltration Over C2 Channel** | The Coroxy backdoor (and the Play Linux binary) communicates with the remote C2 s

In [118]:
preds4eval = []
failed_preds = []
pattern = r'\{\s*"id"\s*:\s*"[^"]*"\s*,\s*"type"\s*:\s*"[^"]*"\s*,\s*"value"\s*:\s*"[^"]*"\s*\}'

for p in preds[:2]:

    p = p.lower().replace("\t", "")

    try:
        objects = json.loads(p)["objects"]
    except:
        objects = re.findall(pattern, p)
        if not objects and '"objects": []' not in p:
            failed_preds.append(p)
        elif objects:
            objects = [json.loads(obj) for obj in objects]
    
    preds4eval.append(
            {
                "objects":objects
                }
        )

In [119]:
len(preds4eval)

2

In [120]:
preds4eval[0]

{'objects': [{'id': 'domain-voipfaqs.com',
   'type': 'domain',
   'value': 'voipfaqs.com'},
  {'id': 'domain-teamsbusiness.org',
   'type': 'domain',
   'value': 'teamsbusiness.org'},
  {'id': 'domain-locallyhyped.com',
   'type': 'domain',
   'value': 'locallyhyped.com'}]}

In [121]:
preds4eval[1]

{'objects': []}