In [1]:
import dspy
import utils
import typing
import os 
import json
from dspy.teleprompt import MIPROv2

from BasicHtmlToTextParser import BasicHtmlToTextParser
from metrics import stixnet_f1, semantic_match_hungarian
from sentence_transformers import SentenceTransformer

from collections import defaultdict

from pydantic import BaseModel, Field
from pydantic_core import ValidationError

from dotenv import load_dotenv
_ = load_dotenv("../.env")

In [2]:
llama_3_1_8b_instruct_deepinfra = dspy.LM(model="openai/meta-llama/Meta-Llama-3.1-8B-Instruct", api_key=os.environ.get("DEEPINFRA_API_KEY"), base_url="https://api.deepinfra.com/v1/openai", temperature=0.1, max_tokens=1024)

llama_3_1_8b_instruct_no_cache_deepinfra = dspy.LM(model="openai/meta-llama/Meta-Llama-3.1-8B-Instruct", api_key=os.environ.get("DEEPINFRA_API_KEY"), base_url="https://api.deepinfra.com/v1/openai", temperature=0.3, max_tokens=1024, cache=False, cache_in_memory=False)




llama_3_1_70b_instruct_turbo_deepinfra = dspy.LM(model="openai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", api_key=os.environ.get("DEEPINFRA_API_KEY"), base_url="https://api.deepinfra.com/v1/openai", temperature=0.1, max_tokens=1024)

llama_3_1_70b_instruct_turbo_no_cache_deepinfra = dspy.LM(model="openai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", api_key=os.environ.get("DEEPINFRA_API_KEY"), base_url="https://api.deepinfra.com/v1/openai", temperature=0.3, max_tokens=1024, cache=False, cache_in_memory=False)



llama_3_2_3b_instruct_deepinfra = dspy.LM(model="openai/meta-llama/Llama-3.2-3B-Instruct", api_key=os.environ.get("DEEPINFRA_API_KEY"), base_url="https://api.deepinfra.com/v1/openai", temperature=0.1, max_tokens=1024)

llama_3_2_3b_instruct_no_cache_deepinfra = dspy.LM(model="openai/meta-llama/Llama-3.2-3B-Instruct", api_key=os.environ.get("DEEPINFRA_API_KEY"), base_url="https://api.deepinfra.com/v1/openai", temperature=0.3, max_tokens=1024, cache=False, cache_in_memory=False)



llama_3_2_1b_instruct_deepinfra = dspy.LM(model="openai/meta-llama/Llama-3.2-1B-Instruct", api_key=os.environ.get("DEEPINFRA_API_KEY"), base_url="https://api.deepinfra.com/v1/openai", temperature=0.1, max_tokens=1024)

llama_3_2_1b_instruct_no_cache_deepinfra = dspy.LM(model="openai/meta-llama/Llama-3.2-1B-Instruct", api_key=os.environ.get("DEEPINFRA_API_KEY"), base_url="https://api.deepinfra.com/v1/openai", temperature=0.3, max_tokens=1024, cache=False, cache_in_memory=False)






# qwen 
qwen_2_5_7b_instruct_deepinfra = dspy.LM(model="openai/Qwen/Qwen2.5-7B-Instruct", api_key=os.environ.get("DEEPINFRA_API_KEY"), base_url="https://api.deepinfra.com/v1/openai", temperature=0.1, max_tokens=1024)

qwen_2_5_7b_instruct_no_cache_deepinfra = dspy.LM(model="openai/Qwen/Qwen2.5-7B-Instruct", api_key=os.environ.get("DEEPINFRA_API_KEY"), base_url="https://api.deepinfra.com/v1/openai", temperature=0.3, max_tokens=1024, cache=False, cache_in_memory=False)


qwen_2_5_72b_instruct_deepinfra = dspy.LM(model="openai/Qwen/Qwen2.5-72B-Instruct", api_key=os.environ.get("DEEPINFRA_API_KEY"), base_url="https://api.deepinfra.com/v1/openai", temperature=0.1, max_tokens=1024)

qwen_2_5_72b_instruct_no_cache_deepinfra = dspy.LM(model="openai/Qwen/Qwen2.5-72B-Instruct", api_key=os.environ.get("DEEPINFRA_API_KEY"), base_url="https://api.deepinfra.com/v1/openai", temperature=0.3, max_tokens=1024, cache=False, cache_in_memory=False)

In [3]:
dataset = utils.get_dspy_examples_randomized("../LADDER-Dataset/", BasicHtmlToTextParser(include_images=False), random_seed=1337)

trainset, devset = utils.split_dataset(split_at=47, dataset=dataset)

trainset, devset = utils.generate_all_attack_patterns_dataset(trainset), utils.generate_all_attack_patterns_dataset(devset)


trainset_ids, devset_ids = [example.id for example in trainset], [example.id for example in devset]
assert len(set(trainset_ids) & set(devset_ids)) == 0

print(len(trainset), len(devset), len(trainset)/(len(trainset)+len(devset)), len(devset)/(len(trainset)+len(devset)))

47 100 total: 147 train-ratio: 0.3197278911564626 dev-ratio: 0.6802721088435374
47 100 0.3197278911564626 0.6802721088435374


In [4]:
model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", token=os.environ.get("HF_API_KEY"))

In [5]:
class triple(BaseModel):
    source: str = Field(description="name of the STIX Domain Object (SDO)")
    source_type: typing.Literal["malware", "threat_actor", "campaign", "course_of_action", "indicator", "intrusion_set"] = Field(description="type of the SDO")

    relationship: typing.Literal["uses", "mitigates", "indicates"] = Field(description="STIX Relationship Object (SRO) that connects the source object with the attack_pattern")

    target_attack_pattern: str = Field(description="The attack-pattern exactly as written in the threat report (like a directly cited quote)")
    target_type: typing.Literal["attack_pattern"] = Field(description="The tail of the triple is always of type attack_pattern")




def enforce_stix(attack_pattern_triples: typing.List[triple]):
    filered_triples = []

    for triple in attack_pattern_triples:
        if triple.source_type == "malware":
            if triple.relationship == "uses":
                filered_triples.append(triple)
        
        # add more rules to ensure coorect relationships between objects 
        # for example: 
        # elif triple.source_type == "indicator": 
            # if triple.relationship == "indicates":
                # filered_triples.append(triple)
    return filered_triples

In [7]:
PRECISION_STORE, RECALL_STORE, F1_STORE = [], [], []

def metric(example, pred, trace=None): 
    # all_mentioned_malware = example.mentioned_malwares # this comes from the dataset and is not relevant for metric. pred does not have this value!

    malwares_with_attack_patterns = set()
    for triple in example.attack_pattern_triples: 
        malwares_with_attack_patterns.add(triple.source)

    gold_malwares = [malware.lower() for malware in malwares_with_attack_patterns]
    pred_malwares = list({triple.source.lower() for triple in pred.attack_pattern_triples})
    malware_matches = list(set(gold_malwares) & set(pred_malwares))
    
   
    gold_attack_patterns_by_malware = defaultdict(set)
    for triple in example.attack_pattern_triples:
        gold_attack_patterns_by_malware[triple.source.lower()].add(triple.target_attack_pattern.lower())

    pred_attack_patterns_by_malware = defaultdict(set)
    for triple in pred.attack_pattern_triples:
        pred_attack_patterns_by_malware[triple.source.lower()].add(triple.target_attack_pattern.lower())

    
    total_tp, total_fp, total_fn = 0, 0, 0 

    for malware in malware_matches: 
        gold_attack_patterns = gold_attack_patterns_by_malware.get(malware, list())
        pred_attack_patterns = pred_attack_patterns_by_malware.get(malware, list())

        tp, fp, fn = semantic_match_hungarian(pred_attack_patterns, gold_attack_patterns, model=model, threshold=0.8)

        total_tp += tp
        total_fp += fp
        total_fn += fn


    missing_malwares = set(gold_malwares) - set(pred_malwares)
    for missing_malware in missing_malwares:
        total_fn += len(gold_attack_patterns_by_malware.get(missing_malware, []))

    extra_malwares = set(pred_malwares) - set(gold_malwares)
    for extra_malware in extra_malwares:
        total_fp += len(pred_attack_patterns_by_malware.get(extra_malware, []))


    precision, recall, f1 = stixnet_f1(total_tp, total_fp, total_fn)

    PRECISION_STORE.append(precision)
    RECALL_STORE.append(recall)
    F1_STORE.append(f1)

    return f1



for example in devset + trainset:
    assert metric(example, example) == 1.0


def save_and_evaluate(program: dspy.Predict, llm: dspy.LM, llm_no_cache: dspy.LM, llm_id: str, base_path: str, valset: typing.List[dspy.Example]):
    # #################################################################################################
    global PRECISION_STORE
    global RECALL_STORE
    global F1_STORE

    PRECISION_STORE, RECALL_STORE, F1_STORE = [], [], []
    # #################################################################################################
    
    retry_stats = []
    for i, obj in enumerate(valset): 
        
        print(f"{i+1}/{len(valset)}")

        retries, max_retries = 0, 5
        while True:
            try: 
                print(f"Retry {retries}/{max_retries}")
                if retries > 0:
                    with dspy.settings.context(lm=llm_no_cache):
                        attack_pattern_triples = program(**obj.inputs()).attack_pattern_triples
                else:
                    with dspy.settings.context(lm=llm):
                        attack_pattern_triples = program(**obj.inputs()).attack_pattern_triples

                # enforce_stix(attack_pattern_triples) # it does not care about this function call! 
                pred = dspy.Prediction(attack_pattern_triples=attack_pattern_triples)

                f1 = metric(obj, pred)
                retry_stats.append({"finished": True, "retries": retries})
                print(f"✔️ done with {retries} retries")
                break

            except ValidationError as e:
                retries += 1
                if retries == max_retries:  
                    retry_stats.append({"finished": False, "retries": retries})
                    RECALL_STORE.append(0)
                    PRECISION_STORE.append(0)
                    F1_STORE.append(0)
                    print(f"❌ Failed after {retries} retries")
                    break
                
            except Exception as e: 
                retries += 1
                if retries == max_retries:  
                    retry_stats.append({"finished": False, "retries": retries})
                    RECALL_STORE.append(0)
                    PRECISION_STORE.append(0)
                    F1_STORE.append(0)
                    print(f"❌ Failed after {retries} retries")
                    break    
            
    # store result
    with open(f"{base_path}/{llm_id}_precision.json", "w") as fp:
        json.dump(PRECISION_STORE, fp)

    with open(f"{base_path}/{llm_id}_recall.json", "w") as fp:
        json.dump(RECALL_STORE, fp)

    with open(f"{base_path}/{llm_id}_f1.json", "w") as fp:
        json.dump(F1_STORE, fp)

    with open(f"{base_path}/{llm_id}_retry_stats.json", "w") as fp:
        json.dump(retry_stats, fp)

    return retry_stats

In [8]:
class AttackPatternsExtractionSignature(dspy.Signature):
    """
    You are a cyber threat intelligence expert and your task is to analyse and extract all meaningful and relevant Attack Patterns from the provided threat report. Your sepcific task is to extract triple (simplified structure: "<source, relationship, attack_pattern/>"). The extracted CTI will be used to populate a STIX-Bundle that is generated for this specific threat report. This means the source of every triple you extract need to be a STIX Domain Object (SDO), the relationship needs to be a STIX Relationship Object (SRO) and the tail is always needs to be an Attack-Pattern! 
    
    The following triples are considered valid and conform to the STIX 2.1 specification. Only extract triples that match these rules: 
        1. <campaign, uses, attack_pattern/>     (additional context: This Relationship describes that attacks carried out as part of the Campaign typically use the related Attack Pattern.)
        2. <course_of_action, mitigates, attack_pattern/>    (additional context: This Relationship describes that the Course of Action can mitigate (e.g. respond to a threat) the related Attack Pattern.)
        3. <indicator, indicates, attack_pattern/>   (additional context: This Relationship describes that the Indicator can detect evidence of the related Attack Pattern.)
        4. <intrusion_set, uses, attack_pattern/>    (additional context: This Relationship describes that attacks carried out as part of the Intrusion Set typically use the related Attack Pattern.)
        5. <threat_actor, uses, attack_pattern/>     (additional context: This Relationship describes that attacks carried out as part of the Threat Actor typically use the related Attack Pattern.)
        6. <malware, uses, attack_pattern/>  (additional context: This Relationship documents that this malware instance or family uses the attack pattern.)

    *Example for "<malware, uses, attack_pattern>" based on one sentence*
        1. The provided threat report contains the sentence: "Nexus malware is an Android banking trojan promoted as a malware-as-a-service (MaaS) offering that can be used for account takeover (ATO) attacks." 
        2. You need to recognize the CTI that is contained in the sentence. In this specific example: The malware "Nexus" is using the Attack-Pattern "account takeover (ATO)".
        3. Do this for the whole threat report and extract all triples to represent the contained CTI. 
        4. Follow the provided format_instructions to generate a valid and machine readable output.
        
    *Final hints*
        - Make sure to stricly cite the Attack-Pattern word by word
        - Consider mentioned_malwares, mentioned_threat_actors as hints for the source of the triples
    """
    threat_report: str = dspy.InputField()
    mentioned_malwares: typing.List[str] = dspy.InputField(desc="A list of malware names that are mentioned in the threat report")
    mentioned_threat_actors: typing.List[str] = dspy.InputField(desc="A list of threat actors that are mentioned in the threat report")
    attack_pattern_triples: typing.List[triple] = dspy.OutputField()

In [None]:
assert False, "ALL DONE"

BASE_PATH = "./AttackPatternExtractor/Baseline"
mlflow.autolog()
mlflow.set_experiment(BASE_PATH)

to_do = [
    ("llama_3_2_1b_instruct", llama_3_2_1b_instruct_deepinfra, llama_3_2_1b_instruct_no_cache_deepinfra), # done 
    ("llama_3_2_3b_instruct", llama_3_2_3b_instruct_deepinfra, llama_3_2_3b_instruct_no_cache_deepinfra), done
    ("llama_3_1_8b_instruct", llama_3_1_8b_instruct_deepinfra, llama_3_1_8b_instruct_no_cache_deepinfra),
    ("llama_3_1_70b_instruct", llama_3_1_70b_instruct_turbo_deepinfra, llama_3_1_70b_instruct_turbo_no_cache_deepinfra),
    ("qwen_2_5_7b_instruct", qwen_2_5_7b_instruct_deepinfra, qwen_2_5_7b_instruct_no_cache_deepinfra),
    ("qwen_2_5_72b_instruct", qwen_2_5_72b_instruct_deepinfra, qwen_2_5_72b_instruct_no_cache_deepinfra),
]


for llm_id, llm, llm_no_cache in to_do:
    program = dspy.Predict(AttackPatternsExtractionSignature)
    
    retry_stats = save_and_evaluate(program=program, base_path=BASE_PATH, llm_id=llm_id, llm=llm, llm_no_cache=llm_no_cache, valset=devset)

In [9]:
# use for example vllm  https://github.com/vllm-project/vllm
API_KEY = None
BASE_URL = None

qwen_2_5_1p5b_instruct_vllm = dspy.LM("openai/Qwen/Qwen2.5-1.5B-Instruct",  api_key=API_KEY, base_url=BASE_URL, temperature=0.1, max_tokens=1024)
qwen_2_5_1p5b_instruct_no_cache_vllm = dspy.LM("openai/Qwen/Qwen2.5-1.5B-Instruct",  api_key=API_KEY, base_url=BASE_URL, temperature=0.3, max_tokens=1024, cache=False, cache_in_memory=False)


qwen_2_5_3b_instruct_vllm = dspy.LM(model="openai/Qwen/Qwen2.5-3B-Instruct", api_key=API_KEY, base_url=BASE_URL, temperature=0.1, max_tokens=1024)
qwen_2_5_3b_instruct_no_cache_vllm = dspy.LM(model="openai/Qwen/Qwen2.5-3B-Instruct", api_key=API_KEY, base_url=BASE_URL, temperature=0.3, max_tokens=1024, cache=False, cache_in_memory=False)

In [None]:
assert False, "DONE"

to_do = [
    ("qwen_2_5_1p5b_instruct", qwen_2_5_1p5b_instruct_vllm, qwen_2_5_1p5b_instruct_no_cache_vllm)
]

BASE_PATH = "./AttackPatternExtractor/Baseline"

mlflow.autolog()
mlflow.set_experiment(BASE_PATH)

for llm_id, llm, llm_no_cache in to_do: 
    program = dspy.Predict(AttackPatternsExtractionSignature)

    save_and_evaluate(program, llm, llm_no_cache, llm_id, base_path=BASE_PATH, valset=devset)

In [None]:
assert False, "DONE"

to_do = [
    ("qwen_2_5_3b_instruct", qwen_2_5_3b_instruct_vllm, qwen_2_5_3b_instruct_no_cache_vllm)
]

BASE_PATH = "./AttackPatternExtractor/Baseline"

mlflow.autolog()
mlflow.set_experiment(BASE_PATH)

for llm_id, llm, llm_no_cache in to_do: 
    program = dspy.Predict(AttackPatternsExtractionSignature)
    
    save_and_evaluate(program=program, base_path=BASE_PATH, llm_id=llm_id, llm=llm, llm_no_cache=llm_no_cache, valset=devset)

# ZERO SHOT OPTIMIERUNG

In [None]:
o1_optimizer_settings = dict(num_candidates = 10, max_bootstrapped_demos = 0, max_labeled_demos = 0, metric_threshold = None, init_temperature = 0.5, task_model = None, num_threads = 16, max_errors = 30, prompt_model=llama_3_1_70b_instruct_turbo_deepinfra, teacher_settings=dict(lm=llama_3_1_70b_instruct_turbo_deepinfra), auto=None)


o1_compiler_settings = dict(num_trials = 30, minibatch = True, minibatch_size = 25, minibatch_full_eval_steps = 10, program_aware_proposer=True, data_aware_proposer=True, view_data_batch_size=10, tip_aware_proposer=True, fewshot_aware_proposer=True, requires_permission_to_run=False)

In [None]:
assert False, "already finished and optimizer steps are not cached!"
BASE_PATH = "./AttackPatternExtractor/ZERO-O1"
# mlflow.autolog()
# mlflow.set_experiment(BASE_PATH)


to_do = [
    ("llama_3_2_1b_instruct", llama_3_2_1b_instruct_deepinfra, llama_3_2_1b_instruct_no_cache_deepinfra),
    ("llama_3_2_3b_instruct", llama_3_2_3b_instruct_deepinfra, llama_3_2_3b_instruct_no_cache_deepinfra),
    ("llama_3_1_8b_instruct", llama_3_1_8b_instruct_deepinfra, llama_3_1_8b_instruct_no_cache_deepinfra),
    ("llama_3_1_70b_instruct", llama_3_1_70b_instruct_turbo_deepinfra, llama_3_1_70b_instruct_turbo_no_cache_deepinfra),

    ("qwen_2_5_7b_instruct", qwen_2_5_7b_instruct_deepinfra, qwen_2_5_7b_instruct_no_cache_deepinfra),
    ("qwen_2_5_72b_instruct", qwen_2_5_72b_instruct_deepinfra, qwen_2_5_72b_instruct_no_cache_deepinfra), # tbd
]


for llm_id, llm, llm_no_cache in to_do: 
    default_program = dspy.Predict(AttackPatternsExtractionSignature)

    o1_optimizer = MIPROv2(metric=metric, **o1_optimizer_settings)
                    
    with dspy.settings.context(lm=llm):
        optimized_program = o1_optimizer.compile(student=default_program, trainset=trainset, valset=devset, **o1_compiler_settings)

    # store program (cloudpickle)
    optimized_program.save(f"{BASE_PATH}/{llm_id}", save_program=True)

In [None]:
assert False, "already finished and optimizer steps are not cached!"


BASE_PATH = "./AttackPatternExtractor/ZERO-O1"
# mlflow.autolog()
# mlflow.set_experiment(BASE_PATH)


to_do = [
    ("qwen_2_5_1p5b_instruct", qwen_2_5_1p5b_instruct_vllm, qwen_2_5_1p5b_instruct_no_cache_vllm)
]


for llm_id, llm, llm_no_cache in to_do: 
    default_program = dspy.Predict(AttackPatternsExtractionSignature)
    
    o1_optimizer = MIPROv2(metric=metric, **o1_optimizer_settings)
                    
    with dspy.settings.context(lm=llm):
        optimized_program = o1_optimizer.compile(student=default_program, trainset=trainset, valset=devset, **o1_compiler_settings)

    # store program (cloudpickle)
    optimized_program.save(f"{BASE_PATH}/{llm_id}", save_program=True)

In [None]:
assert False, "already finished and optimizer steps are not cached!"

BASE_PATH = "./AttackPatternExtractor/ZERO-O1"


to_do = [
    ("qwen_2_5_3b_instruct", qwen_2_5_3b_instruct_vllm, qwen_2_5_3b_instruct_no_cache_vllm)
]


for llm_id, llm, llm_no_cache in to_do: 
    default_program = dspy.Predict(AttackPatternsExtractionSignature)
    
    o1_optimizer = MIPROv2(metric=metric, **o1_optimizer_settings)
                    
    with dspy.settings.context(lm=llm):
        optimized_program = o1_optimizer.compile(student=default_program, trainset=trainset, valset=devset, **o1_compiler_settings)

    # store program (cloudpickle)
    optimized_program.save(f"{BASE_PATH}/{llm_id}", save_program=True)

In [None]:
o2_optimizer_settings = dict(num_candidates = 25, max_bootstrapped_demos = 0, max_labeled_demos = 0, metric_threshold = None, init_temperature = 0.5, task_model = None, num_threads = 16, max_errors = 30, prompt_model=llama_3_1_70b_instruct_turbo_deepinfra, teacher_settings=dict(lm=llama_3_1_70b_instruct_turbo_deepinfra), auto=None)


o2_compiler_settings = dict(num_trials = 50, minibatch = True, minibatch_size = 25, minibatch_full_eval_steps = 10, program_aware_proposer=True, data_aware_proposer=True, view_data_batch_size=10, tip_aware_proposer=True, fewshot_aware_proposer=True, requires_permission_to_run=False)

In [None]:
assert False, "already finished and optimizer steps are not cached!"

BASE_PATH = "./AttackPatternExtractor/ZERO-O2"

to_do = [
    ("llama_3_2_1b_instruct", llama_3_2_1b_instruct_deepinfra, llama_3_2_1b_instruct_no_cache_deepinfra),
    ("llama_3_2_3b_instruct", llama_3_2_3b_instruct_deepinfra, llama_3_2_3b_instruct_no_cache_deepinfra),
    ("llama_3_1_8b_instruct", llama_3_1_8b_instruct_deepinfra, llama_3_1_8b_instruct_no_cache_deepinfra),
    ("llama_3_1_70b_instruct", llama_3_1_70b_instruct_turbo_deepinfra, llama_3_1_70b_instruct_turbo_no_cache_deepinfra),

    ("qwen_2_5_7b_instruct", qwen_2_5_7b_instruct_deepinfra, qwen_2_5_7b_instruct_no_cache_deepinfra),
    ("qwen_2_5_72b_instruct", qwen_2_5_72b_instruct_deepinfra, qwen_2_5_72b_instruct_no_cache_deepinfra),
]


for llm_id, llm, llm_no_cache in to_do: 
    default_program = dspy.Predict(AttackPatternsExtractionSignature)

    o2_optimizer = MIPROv2(metric=metric, **o2_optimizer_settings)
                    
    with dspy.settings.context(lm=llm):
        optimized_program = o2_optimizer.compile(student=default_program, trainset=trainset, valset=devset, **o2_compiler_settings)

    # store program (cloudpickle)
    optimized_program.save(f"{BASE_PATH}/{llm_id}", save_program=True)

In [None]:
assert False, "already finished and optimizer steps are not cached!"


BASE_PATH = "./AttackPatternExtractor/ZERO-O2"


to_do = [
    ("qwen_2_5_1p5b_instruct", qwen_2_5_1p5b_instruct_vllm, qwen_2_5_1p5b_instruct_no_cache_vllm)
]


for llm_id, llm, llm_no_cache in to_do: 
    # default_program = AttackPatternExtractionModule(llm=llm, llm_no_cache=llm_no_cache, attack_pattern_extractor_program=dspy.Predict(AttackPatternsExtractionSignature), retry_stats=[])
    default_program = dspy.Predict(AttackPatternsExtractionSignature)
    
    o2_optimizer = MIPROv2(metric=metric, **o2_optimizer_settings)
                    
    with dspy.settings.context(lm=llm):
        optimized_program = o2_optimizer.compile(student=default_program, trainset=trainset, valset=devset, **o2_compiler_settings)

    # store program (cloudpickle)
    optimized_program.save(f"{BASE_PATH}/{llm_id}", save_program=True)

In [None]:
assert False, "already finished and optimizer steps are not cached!"
BASE_PATH = "./AttackPatternExtractor/ZERO-O2"


to_do = [
    ("qwen_2_5_3b_instruct", qwen_2_5_3b_instruct_vllm, qwen_2_5_3b_instruct_no_cache_vllm)
]


for llm_id, llm, llm_no_cache in to_do: 
    default_program = dspy.Predict(AttackPatternsExtractionSignature)
    
    o2_optimizer = MIPROv2(metric=metric, **o2_optimizer_settings)
                    
    with dspy.settings.context(lm=llm):
        optimized_program = o2_optimizer.compile(student=default_program, trainset=trainset, valset=devset, **o2_compiler_settings)

    # store program (cloudpickle)
    optimized_program.save(f"{BASE_PATH}/{llm_id}", save_program=True)

# FEW SHOT OPTIMIERUNG O1

In [None]:
o1_optimizer_settings = dict(num_candidates = 10, max_bootstrapped_demos = 1, max_labeled_demos = 2, metric_threshold = None, init_temperature = 0.5, task_model = None, num_threads = 42, max_errors = 100, prompt_model=llama_3_1_70b_instruct_turbo_deepinfra, teacher_settings=dict(lm=llama_3_1_70b_instruct_turbo_deepinfra), auto=None)


o1_compiler_settings = dict(num_trials = 30, minibatch = True, minibatch_size = 25, minibatch_full_eval_steps = 10, program_aware_proposer=True, data_aware_proposer=True, view_data_batch_size=10, tip_aware_proposer=True, fewshot_aware_proposer=True, requires_permission_to_run=False)

In [None]:
assert False, "already finished and optimizer steps are not cached!"
BASE_PATH = "./AttackPatternExtractor/FS-O1"


to_do = [
    ("llama_3_2_1b_instruct", llama_3_2_1b_instruct_deepinfra, llama_3_2_1b_instruct_no_cache_deepinfra),
    ("llama_3_2_3b_instruct", llama_3_2_3b_instruct_deepinfra, llama_3_2_3b_instruct_no_cache_deepinfra),
    ("llama_3_1_8b_instruct", llama_3_1_8b_instruct_deepinfra, llama_3_1_8b_instruct_no_cache_deepinfra),
    ("llama_3_1_70b_instruct", llama_3_1_70b_instruct_turbo_deepinfra, llama_3_1_70b_instruct_turbo_no_cache_deepinfra),

    ("qwen_2_5_7b_instruct", qwen_2_5_7b_instruct_deepinfra, qwen_2_5_7b_instruct_no_cache_deepinfra),
    ("qwen_2_5_72b_instruct", qwen_2_5_72b_instruct_deepinfra, qwen_2_5_72b_instruct_no_cache_deepinfra),
]


for llm_id, llm, llm_no_cache in to_do: 
    default_program = dspy.Predict(AttackPatternsExtractionSignature)

    o1_optimizer = MIPROv2(metric=metric, **o1_optimizer_settings)
                    
    with dspy.settings.context(lm=llm):
        optimized_program = o1_optimizer.compile(student=default_program, trainset=trainset, valset=devset, **o1_compiler_settings)

    # store program (cloudpickle)
    optimized_program.save(f"{BASE_PATH}/{llm_id}", save_program=True)

In [None]:
assert False, "already finished and optimizer steps are not cached!"


BASE_PATH = "./AttackPatternExtractor/FS-O1"


to_do = [
    ("qwen_2_5_1p5b_instruct", qwen_2_5_1p5b_instruct_vllm, qwen_2_5_1p5b_instruct_no_cache_vllm)
]


for llm_id, llm, llm_no_cache in to_do: 
    default_program = dspy.Predict(AttackPatternsExtractionSignature)
    
    o1_optimizer = MIPROv2(metric=metric, **o1_optimizer_settings)
                    
    with dspy.settings.context(lm=llm):
        optimized_program = o1_optimizer.compile(student=default_program, trainset=trainset, valset=devset, **o1_compiler_settings)

    # store program (cloudpickle)
    optimized_program.save(f"{BASE_PATH}/{llm_id}", save_program=True)

In [None]:
assert False, "already finished and optimizer steps are not cached!"

BASE_PATH = "./AttackPatternExtractor/FS-O1"

to_do = [
    ("qwen_2_5_3b_instruct", qwen_2_5_3b_instruct_vllm, qwen_2_5_3b_instruct_no_cache_vllm)
]


for llm_id, llm, llm_no_cache in to_do: 
    default_program = dspy.Predict(AttackPatternsExtractionSignature)
    
    o1_optimizer = MIPROv2(metric=metric, **o1_optimizer_settings)
                    
    with dspy.settings.context(lm=llm):
        optimized_program = o1_optimizer.compile(student=default_program, trainset=trainset, valset=devset, **o1_compiler_settings)

    # store program (cloudpickle)
    optimized_program.save(f"{BASE_PATH}/{llm_id}", save_program=True)

# FEW SHOT OPTIMIERUNG O2

In [None]:
o2_optimizer_settings = dict(num_candidates = 25, max_bootstrapped_demos = 1, max_labeled_demos = 2, metric_threshold = None, init_temperature = 0.5, task_model = None, num_threads = 16, max_errors = 100, prompt_model=llama_3_1_70b_instruct_turbo_deepinfra, teacher_settings=dict(lm=llama_3_1_70b_instruct_turbo_deepinfra), auto=None)


o2_compiler_settings = dict(num_trials = 50, minibatch = True, minibatch_size = 25, minibatch_full_eval_steps = 10, program_aware_proposer=True, data_aware_proposer=True, view_data_batch_size=10, tip_aware_proposer=True, fewshot_aware_proposer=True, requires_permission_to_run=False)

In [None]:
assert False, "already finished and optimizer steps are not cached!"
BASE_PATH = "./AttackPatternExtractor/FS-O2"

to_do = [
    ("llama_3_2_1b_instruct", llama_3_2_1b_instruct_deepinfra, llama_3_2_1b_instruct_no_cache_deepinfra),
    ("llama_3_2_3b_instruct", llama_3_2_3b_instruct_deepinfra, llama_3_2_3b_instruct_no_cache_deepinfra),
    ("llama_3_1_8b_instruct", llama_3_1_8b_instruct_deepinfra, llama_3_1_8b_instruct_no_cache_deepinfra),
    ("llama_3_1_70b_instruct", llama_3_1_70b_instruct_turbo_deepinfra, llama_3_1_70b_instruct_turbo_no_cache_deepinfra),

    ("qwen_2_5_7b_instruct", qwen_2_5_7b_instruct_deepinfra, qwen_2_5_7b_instruct_no_cache_deepinfra),
    ("qwen_2_5_72b_instruct", qwen_2_5_72b_instruct_deepinfra, qwen_2_5_72b_instruct_no_cache_deepinfra),
]


for llm_id, llm, llm_no_cache in to_do: 
    default_program = dspy.Predict(AttackPatternsExtractionSignature)

    o2_optimizer = MIPROv2(metric=metric, **o2_optimizer_settings)
                    
    with dspy.settings.context(lm=llm):
        optimized_program = o2_optimizer.compile(student=default_program, trainset=trainset, valset=devset, **o2_compiler_settings)

    # store program (cloudpickle)
    optimized_program.save(f"{BASE_PATH}/{llm_id}", save_program=True)

In [None]:
assert False, "already finished and optimizer steps are not cached!"


BASE_PATH = "./AttackPatternExtractor/FS-O2"


to_do = [
    ("qwen_2_5_1p5b_instruct", qwen_2_5_1p5b_instruct_vllm, qwen_2_5_1p5b_instruct_no_cache_vllm)
]


for llm_id, llm, llm_no_cache in to_do: 
    default_program = dspy.Predict(AttackPatternsExtractionSignature)
    
    o2_optimizer = MIPROv2(metric=metric, **o2_optimizer_settings)
                    
    with dspy.settings.context(lm=llm):
        optimized_program = o2_optimizer.compile(student=default_program, trainset=trainset, valset=devset, **o2_compiler_settings)

    # store program (cloudpickle)
    optimized_program.save(f"{BASE_PATH}/{llm_id}", save_program=True)

In [None]:
assert False, "already finished and optimizer steps are not cached!"

BASE_PATH = "./AttackPatternExtractor/FS-O2"


to_do = [
    ("qwen_2_5_3b_instruct", qwen_2_5_3b_instruct_vllm, qwen_2_5_3b_instruct_no_cache_vllm)
]


for llm_id, llm, llm_no_cache in to_do: 
    default_program = dspy.Predict(AttackPatternsExtractionSignature)
    
    o2_optimizer = MIPROv2(metric=metric, **o2_optimizer_settings)
                    
    with dspy.settings.context(lm=llm):
        optimized_program = o2_optimizer.compile(student=default_program, trainset=trainset, valset=devset, **o2_compiler_settings)

    # store program (cloudpickle)
    optimized_program.save(f"{BASE_PATH}/{llm_id}", save_program=True)

# EVALUATION OF THE OPTIMIZED PROGRAMS

In [None]:
assert False, "already finished and optimizer steps are not cached!"
BASE_PATH = "./AttackPatternExtractor/ZERO-O1"


to_do = [
    ("llama_3_2_1b_instruct", llama_3_2_1b_instruct_deepinfra, llama_3_2_1b_instruct_no_cache_deepinfra),
    ("llama_3_2_3b_instruct", llama_3_2_3b_instruct_deepinfra, llama_3_2_3b_instruct_no_cache_deepinfra),
    ("llama_3_1_8b_instruct", llama_3_1_8b_instruct_deepinfra, llama_3_1_8b_instruct_no_cache_deepinfra),
    ("llama_3_1_70b_instruct", llama_3_1_70b_instruct_turbo_deepinfra, llama_3_1_70b_instruct_turbo_no_cache_deepinfra),

    ("qwen_2_5_7b_instruct", qwen_2_5_7b_instruct_deepinfra, qwen_2_5_7b_instruct_no_cache_deepinfra),
    ("qwen_2_5_72b_instruct", qwen_2_5_72b_instruct_deepinfra, qwen_2_5_72b_instruct_no_cache_deepinfra),
]


for llm_id, llm, llm_no_cache in to_do: 
    program = dspy.load(f"{BASE_PATH}/{llm_id}")
    save_and_evaluate(program, llm, llm_no_cache, llm_id, base_path=BASE_PATH, valset=devset)

In [None]:
assert False, "already finished and optimizer steps are not cached!"

BASE_PATH = "./AttackPatternExtractor/ZERO-O1"
mlflow.autolog()
mlflow.set_experiment(BASE_PATH)

to_do = [
    ("qwen_2_5_1p5b_instruct", qwen_2_5_1p5b_instruct_vllm, qwen_2_5_1p5b_instruct_no_cache_vllm)
]


for llm_id, llm, llm_no_cache in to_do:
    program = dspy.load(f"{BASE_PATH}/{llm_id}")
    save_and_evaluate(program, llm, llm_no_cache, llm_id, base_path=BASE_PATH, valset=devset)

In [None]:
assert False, "already finished and optimizer steps are not cached!"
BASE_PATH = "./AttackPatternExtractor/ZERO-O1"


to_do = [
    ("qwen_2_5_3b_instruct", qwen_2_5_3b_instruct_vllm, qwen_2_5_3b_instruct_no_cache_vllm)
]


for llm_id, llm, llm_no_cache in to_do: 
    program = dspy.load(f"{BASE_PATH}/{llm_id}")
    save_and_evaluate(program, llm, llm_no_cache, llm_id, base_path=BASE_PATH, valset=devset)

In [None]:
assert False, "already finished and optimizer steps are not cached!"
BASE_PATH = "./AttackPatternExtractor/ZERO-O2"


to_do = [
    ("llama_3_2_1b_instruct", llama_3_2_1b_instruct_deepinfra, llama_3_2_1b_instruct_no_cache_deepinfra),
    ("llama_3_2_3b_instruct", llama_3_2_3b_instruct_deepinfra, llama_3_2_3b_instruct_no_cache_deepinfra),
    ("llama_3_1_8b_instruct", llama_3_1_8b_instruct_deepinfra, llama_3_1_8b_instruct_no_cache_deepinfra),
    ("llama_3_1_70b_instruct", llama_3_1_70b_instruct_turbo_deepinfra, llama_3_1_70b_instruct_turbo_no_cache_deepinfra),

    ("qwen_2_5_7b_instruct", qwen_2_5_7b_instruct_deepinfra, qwen_2_5_7b_instruct_no_cache_deepinfra),
    ("qwen_2_5_72b_instruct", qwen_2_5_72b_instruct_deepinfra, qwen_2_5_72b_instruct_no_cache_deepinfra),
]



for llm_id, llm, llm_no_cache in to_do: 
    program = dspy.load(f"{BASE_PATH}/{llm_id}")
    save_and_evaluate(program, llm, llm_no_cache, llm_id, base_path=BASE_PATH, valset=devset)

In [None]:
assert False, "already finished and optimizer steps are not cached!"
BASE_PATH = "./AttackPatternExtractor/ZERO-O2"


to_do = [
    ("qwen_2_5_1p5b_instruct", qwen_2_5_1p5b_instruct_vllm, qwen_2_5_1p5b_instruct_no_cache_vllm)
]

for llm_id, llm, llm_no_cache in to_do: 
    program = dspy.load(f"{BASE_PATH}/{llm_id}")
    save_and_evaluate(program, llm, llm_no_cache, llm_id, base_path=BASE_PATH, valset=devset)

In [None]:
assert False, "already finished and optimizer steps are not cached!"

BASE_PATH = "./AttackPatternExtractor/ZERO-O2"


to_do = [
    ("qwen_2_5_3b_instruct", qwen_2_5_3b_instruct_vllm, qwen_2_5_3b_instruct_no_cache_vllm)
]


for llm_id, llm, llm_no_cache in to_do: 
    program = dspy.load(f"{BASE_PATH}/{llm_id}")
    save_and_evaluate(program, llm, llm_no_cache, llm_id, base_path=BASE_PATH, valset=devset)

## FEW SHOT Evaluation

In [None]:
assert False, "already finished and optimizer steps are not cached!"
BASE_PATH = "./AttackPatternExtractor/FS-O1"


to_do = [
    ("llama_3_2_1b_instruct", llama_3_2_1b_instruct_deepinfra, llama_3_2_1b_instruct_no_cache_deepinfra),
    ("llama_3_2_3b_instruct", llama_3_2_3b_instruct_deepinfra, llama_3_2_3b_instruct_no_cache_deepinfra),
    ("llama_3_1_8b_instruct", llama_3_1_8b_instruct_deepinfra, llama_3_1_8b_instruct_no_cache_deepinfra),
    ("llama_3_1_70b_instruct", llama_3_1_70b_instruct_turbo_deepinfra, llama_3_1_70b_instruct_turbo_no_cache_deepinfra),

    ("qwen_2_5_7b_instruct", qwen_2_5_7b_instruct_deepinfra, qwen_2_5_7b_instruct_no_cache_deepinfra),
    ("qwen_2_5_72b_instruct", qwen_2_5_72b_instruct_deepinfra, qwen_2_5_72b_instruct_no_cache_deepinfra),
]



for llm_id, llm, llm_no_cache in to_do: 
    program = dspy.load(f"{BASE_PATH}/{llm_id}")
    save_and_evaluate(program, llm, llm_no_cache, llm_id, base_path=BASE_PATH, valset=devset)

In [None]:
assert False, "already finished and optimizer steps are not cached!"

BASE_PATH = "./AttackPatternExtractor/FS-O1"


to_do = [
    ("qwen_2_5_1p5b_instruct", qwen_2_5_1p5b_instruct_vllm, qwen_2_5_1p5b_instruct_no_cache_vllm)
]


for llm_id, llm, llm_no_cache in to_do: 
    program = dspy.load(f"{BASE_PATH}/{llm_id}")
    save_and_evaluate(program, llm, llm_no_cache, llm_id, base_path=BASE_PATH, valset=devset)

In [None]:
assert False, "already finished and optimizer steps are not cached!"

BASE_PATH = "./AttackPatternExtractor/FS-O1"


to_do = [
    ("qwen_2_5_3b_instruct", qwen_2_5_3b_instruct_vllm, qwen_2_5_3b_instruct_no_cache_vllm)
]


for llm_id, llm, llm_no_cache in to_do: 
    program = dspy.load(f"{BASE_PATH}/{llm_id}")
    save_and_evaluate(program, llm, llm_no_cache, llm_id, base_path=BASE_PATH, valset=devset)

In [None]:
assert False, "already finished and optimizer steps are not cached!"
BASE_PATH = "./AttackPatternExtractor/FS-O2"


to_do = [
    ("llama_3_2_1b_instruct", llama_3_2_1b_instruct_deepinfra, llama_3_2_1b_instruct_no_cache_deepinfra),
    ("llama_3_2_3b_instruct", llama_3_2_3b_instruct_deepinfra, llama_3_2_3b_instruct_no_cache_deepinfra),
    ("llama_3_1_8b_instruct", llama_3_1_8b_instruct_deepinfra, llama_3_1_8b_instruct_no_cache_deepinfra),
    ("llama_3_1_70b_instruct", llama_3_1_70b_instruct_turbo_deepinfra, llama_3_1_70b_instruct_turbo_no_cache_deepinfra),

    ("qwen_2_5_7b_instruct", qwen_2_5_7b_instruct_deepinfra, qwen_2_5_7b_instruct_no_cache_deepinfra),
    ("qwen_2_5_72b_instruct", qwen_2_5_72b_instruct_deepinfra, qwen_2_5_72b_instruct_no_cache_deepinfra),
]


for llm_id, llm, llm_no_cache in to_do: 
    program = dspy.load(f"{BASE_PATH}/{llm_id}")
    save_and_evaluate(program, llm, llm_no_cache, llm_id, base_path=BASE_PATH, valset=devset)

In [None]:
assert False, "already finished and optimizer steps are not cached!"
BASE_PATH = "./AttackPatternExtractor/FS-O2"


to_do = [
    ("qwen_2_5_1p5b_instruct", qwen_2_5_1p5b_instruct_vllm, qwen_2_5_1p5b_instruct_no_cache_vllm)
]


for llm_id, llm, llm_no_cache in to_do: 
    program = dspy.load(f"{BASE_PATH}/{llm_id}")
    save_and_evaluate(program, llm, llm_no_cache, llm_id, base_path=BASE_PATH, valset=devset)

In [None]:
assert False, "already finished and optimizer steps are not cached!"

BASE_PATH = "./AttackPatternExtractor/FS-O2"


to_do = [
    ("qwen_2_5_3b_instruct", qwen_2_5_3b_instruct_vllm, qwen_2_5_3b_instruct_no_cache_vllm)
]


for llm_id, llm, llm_no_cache in to_do: 
    program = dspy.load(f"{BASE_PATH}/{llm_id}")
    save_and_evaluate(program, llm, llm_no_cache, llm_id, base_path=BASE_PATH, valset=devset)