In [None]:
import os
import re
import yaml
import json
import torch
import pickle
from unsloth import FastLanguageModel
from tqdm import tqdm
import pandas as pd

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
INFO 07-15 08:26:10 [__init__.py:244] Automatically detected platform cuda.


In [2]:
base_model = "meta-llama/Llama-3.1-8B-Instruct"
sft_model = "/mnt/data/training-outputs/Llama/Llama-3.1-8B-Instruct-Not-Quantized/checkpoint-190"
grpo_model = "/mnt/data/training-outputs/LlamaGRPO/grpo_outputs/checkpoint-442"

In [3]:
with open("grpo_config.yaml", "r") as f:
    grpo_config = yaml.load(f, Loader=yaml.SafeLoader)

with open("config.yaml", "r") as f:
    sft_config = yaml.load(f, Loader=yaml.SafeLoader)

grpo_system_message = grpo_config["system_message"]
sft_system_message = sft_config["system_message"]
base_system_message = sft_system_message

In [4]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = sft_model,
    fast_inference = False,
    load_in_4bit = False,
    max_seq_length = None,
    gpu_memory_utilization = 0.7
)

model = model.for_inference()

==((====))==  Unsloth 2025.6.8: Fast Llama patching. Transformers: 4.53.0. vLLM: 0.9.1.
   \\   /|    NVIDIA H100 PCIe. Num GPUs = 1. Max memory: 79.19 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 9.0. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Unsloth 2025.6.8 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [5]:
def format_input_prompt(system_message, user_input):
    formatted_input = [
        {"role": "assistant", "content": system_message},
        {"role": "user", "content": user_input}
    ]
    return formatted_input

def sft_post_process(text):
    text = text.split(sft_config["response_part"])[-1]
    text = text[2:] if text[:2]=="\n\n" else text
    text = re.sub(r'<\|eot_id\|>', '', text)
    return text

def inference(model, system_message, user_input, max_new_tokens=None, **kwargs):
    input_ids = tokenizer.apply_chat_template(
        format_input_prompt(system_message, user_input),
        add_generation_prompt=True,
        return_tensors = "pt").to("cuda")
    if not max_new_tokens:
        max_new_tokens = model.config.max_position_embeddings - input_ids.shape[-1]
    output_ids = model.generate(input_ids, max_new_tokens=max_new_tokens, **kwargs)
    output_text = tokenizer.batch_decode(output_ids)[0]
    return sft_post_process(output_text)

In [6]:
user_input = "How are you?"
result = inference(model,
                   sft_system_message, 
                   user_input, 
                   max_new_tokens=None,
                   temperature=0.7,
                   top_p=0.6,
                   repetition_penalty=1.1,
                   no_repeat_ngram_size=3,
                   do_sample=True)
result

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
LlamaForCausalLM has no `_prepare_4d_causal_attention_mask_with_cache_position` method defined in its base modeling class. Compiled forward passes will be sub-optimal. If you're writing code, see Llama for an example implementation. If you're a user, please report this issue on GitHub.


"I'm functioning properly. What's your request? You want me to transform a Cyberthreat intel report into a STIK2.0 bundle?"

In [5]:
def load_single_example(path:str, filename:str):
    with open(os.path.join(path, filename), mode="r", encoding="utf-8") as f:
        return json.load(f)

In [6]:
test_data_path = "/mnt/data/openCTI/io-pairs/test/"
test_data = [load_single_example(test_data_path, filename) for filename in os.listdir(test_data_path)]
test_inputs = [example["input"] for example in test_data]
test_outputs = [example["output"] for example in test_data]

In [None]:
# preds = [inference(model,
#                    sft_system_message, 
#                    example, 
#                    max_new_tokens=32768,
#                    temperature=0.7,
#                    top_p=0.6,
#                    repetition_penalty=1.1,
#                    no_repeat_ngram_size=3,
#                    do_sample=True) for example in tqdm(test_inputs)]

# with open('sft_32768_token_limit_preds.pkl', 'wb') as file:
#     pickle.dump(preds, file)

 63%|██████▎   | 135/214 [9:28:22<1:39:10, 75.32s/it]  

In [9]:
with open("sft_16384_token_limit_preds.pkl", "rb") as file:
    preds = pickle.load(file)

In [14]:
size = [len(p) for p in preds]
pd.Series(size).describe()

count      214.000000
mean      6846.971963
std       9961.374318
min        306.000000
25%       1784.250000
50%       4169.500000
75%       7247.250000
max      84231.000000
dtype: float64

In [18]:
json.loads(
    preds[0].replace("\'", '"')
)

JSONDecodeError: Expecting ',' delimiter: line 1 column 1288 (char 1287)

In [22]:
print(preds[0].replace("\'", '"'))

{"id": "", "type": "bundle", "objects": [{"id": "report--New Malvolent PyPI packages used by Lazurus", "type": "report", "name": "New Malovolent PyPI packets used by Lazorus", "description": "PyPI packages released to PyPi by Lzorus", "labels": ["python", "lazurus", "pypiconf","swampool", "quasarlub", "pycryptovn", "typosquatling"], "report_types": ["threat-report"], "created": "2024-02-29 18:22", "object_refs": ["report--Lazurus", "indicator--https://blockchan-newtech.con/download/dowload.asap", "malware--comebacker", "attack-pattern--T1573", "intrusion-set--Lazarus", "vulnerability--CVE-2023-27362", "location--Europe", "identity--Python", "file--e88528ace23092bas628523564ad8abc", "domain-name--chaingrowen.com", "url--http://91,206,178,125/upload/uplod/asap", "domain-name-->blockchain.newtech.com", "url-->https://fastte.com/user/agencys.ap", "attack-pattern-->T1064","malware-->pycryptocon", "location-->Asia", "vulnerabilty-->CVE-2018-1333", "identity-->QuasarLib", "file-->b4a048450bb7

In [23]:
import ast
d = ast.literal_eval(preds[0].replace("\'", '"'))

SyntaxError: closing parenthesis '}' does not match opening parenthesis '[' (<unknown>, line 1)

In [26]:
import evaluate

In [29]:
rouge = evaluate.load('rouge')
results = rouge.compute(predictions=preds, references=test_outputs)

In [30]:
results

{'rouge1': np.float64(0.007351425341432802),
 'rouge2': np.float64(0.0),
 'rougeL': np.float64(0.007384059934463555),
 'rougeLsum': np.float64(0.007402110976928185)}