In [5]:
import dspy
import os 
import utils
import typing
import json

from dspy.teleprompt import MIPROv2
from BasicHtmlToTextParser import BasicHtmlToTextParser
from metrics import stixnet_f1

from typing import List
from dspy import InputField, OutputField

from dotenv import load_dotenv
_ = load_dotenv("../.env")

In [3]:
dataset = utils.get_dspy_examples_randomized("../LADDER-Dataset/", BasicHtmlToTextParser(include_images=False), random_seed=1337)

trainset, devset = utils.split_dataset(split_at=47, dataset=dataset)

trainset, devset = utils.generate_threat_actors_extraction_dataset(trainset), utils.generate_threat_actors_extraction_dataset(devset)

47 100 total: 147 train-ratio: 0.3197278911564626 dev-ratio: 0.6802721088435374


In [4]:
PRECISION_STORE, RECALL_STORE, F1_STORE = [], [], []

def metric(example, pred, trace=None):
    gold_threat_actors: typing.List[str] = example.threat_actors
    pred_threat_actors: typing.List[str] = pred.threat_actors
    
    gold_threat_actors = [threat_actor.lower() for threat_actor in gold_threat_actors]
    pred_threat_actors = [threat_actor.lower() for threat_actor in pred_threat_actors]

    if gold_threat_actors == [] and pred_threat_actors == []:
        precision, recall, f1 = 1, 1, 1

    else:
        true_positives = len(set(pred_threat_actors) & set(gold_threat_actors))
        false_positives = len(set(pred_threat_actors) - set(gold_threat_actors))
        false_negatives = len(set(gold_threat_actors) - set(pred_threat_actors))

        precision, recall, f1 = stixnet_f1(true_positives, false_positives, false_negatives)

    PRECISION_STORE.append(precision)
    RECALL_STORE.append(recall)
    F1_STORE.append(f1)

    if trace is None:
        return f1
    return f1 == 1



for example in devset + trainset:
    assert metric(example, example) == 1.0



def save_and_evaluate(program: dspy.Predict, llm: dspy.LM, llm_id: str, base_path: str, valset: typing.List[dspy.Example]):
    # #################################################################################################
    global PRECISION_STORE
    global RECALL_STORE
    global F1_STORE

    PRECISION_STORE, RECALL_STORE, F1_STORE = [], [], []
    # #################################################################################################

    with dspy.settings.context(lm=llm):
        for obj in valset:
            try:
                pred = program(**obj.inputs())
            except Exception as e:
                PRECISION_STORE.append(0)
                RECALL_STORE.append(0)
                F1_STORE.append(0)
                continue
            
            f1 = metric(obj, pred)

    # store result
    with open(f"{base_path}/{llm_id}_precision.json", "w") as fp:
        json.dump(PRECISION_STORE, fp)

    with open(f"{base_path}/{llm_id}_recall.json", "w") as fp:
        json.dump(RECALL_STORE, fp)

    with open(f"{base_path}/{llm_id}_f1.json", "w") as fp:
        json.dump(F1_STORE, fp)

In [6]:
llama_3_1_8b_instruct_deepinfra = dspy.LM(model="openai/meta-llama/Meta-Llama-3.1-8B-Instruct", api_key=os.environ.get("DEEPINFRA_API_KEY"), base_url="https://api.deepinfra.com/v1/openai", temperature=0.1, max_tokens=1024)

llama_3_1_70b_instruct_turbo_deepinfra = dspy.LM(model="openai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", api_key=os.environ.get("DEEPINFRA_API_KEY"), base_url="https://api.deepinfra.com/v1/openai", temperature=0.1, max_tokens=1024)

llama_3_2_3b_instruct_deepinfra = dspy.LM(model="openai/meta-llama/Llama-3.2-3B-Instruct", api_key=os.environ.get("DEEPINFRA_API_KEY"), base_url="https://api.deepinfra.com/v1/openai", temperature=0.1, max_tokens=1024)

llama_3_2_1b_instruct_deepinfra = dspy.LM(model="openai/meta-llama/Llama-3.2-1B-Instruct", api_key=os.environ.get("DEEPINFRA_API_KEY"), base_url="https://api.deepinfra.com/v1/openai", temperature=0.1, max_tokens=1024)

qwen_2_5_7b_instruct_deepinfra = dspy.LM(model="openai/Qwen/Qwen2.5-7B-Instruct", api_key=os.environ.get("DEEPINFRA_API_KEY"), base_url="https://api.deepinfra.com/v1/openai", temperature=0.1, max_tokens=1024)

qwen_2_5_72b_instruct_deepinfra = dspy.LM(model="openai/Qwen/Qwen2.5-72B-Instruct", api_key=os.environ.get("DEEPINFRA_API_KEY"), base_url="https://api.deepinfra.com/v1/openai", temperature=0.1, max_tokens=1024)



# use for example vllm  https://github.com/vllm-project/vllm
API_KEY = None
BASE_URL = None
qwen_2_5_1p5b_instruct_vllm = dspy.LM("openai/Qwen/Qwen2.5-1.5B-Instruct",  api_key=API_KEY, base_url=BASE_URL, temperature=0.1, max_tokens=1024)
qwen_2_5_3b_instruct_vllm = dspy.LM(model="openai/Qwen/Qwen2.5-3B-Instruct", api_key=API_KEY, base_url=BASE_URL, temperature=0.1, max_tokens=1024)

In [7]:
class ThreatActorExtractionSignature(dspy.Signature):
    """
    Extract Threat Actors mentioned in the threat report. 
    
    ***
    Threat Actors are actual individuals, groups, or organizations believed to be operating with malicious intent. A Threat Actor is not an Intrusion Set but may support or be affiliated with various Intrusion Sets, groups, or organizations over time. Threat Actors leverage their resources, and possibly the resources of an Intrusion Set, to conduct attacks and run Campaigns against targets. Threat Actors can be characterized by their motives, capabilities, goals, sophistication level, past activities, resources they have access to, and their role in the organization.
    ***
    
    Dont include every enitity in the result - instead focus only on relevant and established threat actors!
        
    Good examples: "apt-c-27", "_stealth_mango_and_tangelo_", "blackgear", "cl-sta-0043", "flying_kitten", "storm-1295",  "ta2722", "water_curupira" and other unique names of threat actors.
    
    Bad examples:  "Malware developers", "Hackers", "Hackers from korea", "Unknown", "Cyber criminals", "Hacker teams" and other names that are universal.
    """
    threat_report: str = InputField()
    threat_actors: List[str] = OutputField()

# ZERO SHOT OPTIMIERUNG

In [None]:
o1_optimizer_settings = dict(num_candidates = 10, max_bootstrapped_demos = 0, max_labeled_demos = 0, metric_threshold = None, init_temperature = 0.5, task_model = None, num_threads = 16,max_errors = 10, prompt_model=llama_3_1_70b_instruct_turbo_deepinfra, teacher_settings=dict(lm=llama_3_1_70b_instruct_turbo_deepinfra), auto=None)


o1_compiler_settings = dict(num_trials = 30, minibatch = True, minibatch_size = 25, minibatch_full_eval_steps = 10, program_aware_proposer=True, data_aware_proposer=True, view_data_batch_size=10, tip_aware_proposer=True, fewshot_aware_proposer=True, requires_permission_to_run=False)

In [None]:
assert False, "already finished and optimizer steps are not cached!"

BASE_PATH = "./ThreatActorExtractor/ZERO-O1"
# mlflow.autolog()
# mlflow.set_experiment(BASE_PATH)


to_do = [
    ("llama_3_2_1b_instruct", llama_3_2_1b_instruct_deepinfra),
    ("llama_3_2_3b_instruct", llama_3_2_3b_instruct_deepinfra),
    ("llama_3_1_8b_instruct", llama_3_1_8b_instruct_deepinfra),
    ("llama_3_1_70b_instruct", llama_3_1_70b_instruct_turbo_deepinfra),

    ("qwen_2_5_7b_instruct", qwen_2_5_7b_instruct_deepinfra),
    ("qwen_2_5_72b_instruct", qwen_2_5_72b_instruct_deepinfra),
]


for llm_id, llm in to_do: 
    default_program = dspy.Predict(ThreatActorExtractionSignature)

    o1_optimizer = MIPROv2(metric=metric, **o1_optimizer_settings)
                    
    with dspy.settings.context(lm=llm):
        optimized_program = o1_optimizer.compile(student=default_program, trainset=trainset, valset=devset, **o1_compiler_settings)

    # store program (cloudpickle)
    optimized_program.save(f"{BASE_PATH}/{llm_id}", save_program=True)

In [None]:
assert False, "already finished and optimizer steps are not cached!"

BASE_PATH = "./ThreatActorExtractor/ZERO-O1"

to_do = [
    ("qwen_2_5_1p5b_instruct", qwen_2_5_1p5b_instruct_vllm)
]


for llm_id, llm in to_do: 
    default_program = dspy.Predict(ThreatActorExtractionSignature)

    o1_optimizer = MIPROv2(metric=metric, **o1_optimizer_settings)
                    
    with dspy.settings.context(lm=llm):
        optimized_program = o1_optimizer.compile(student=default_program, trainset=trainset, valset=devset, **o1_compiler_settings)

    # store program (cloudpickle)
    optimized_program.save(f"{BASE_PATH}/{llm_id}", save_program=True)

In [None]:
assert False, "already finished and optimizer steps are not cached!"


BASE_PATH = "./ThreatActorExtractor/ZERO-O1"



to_do = [
    ("qwen_2_5_3b_instruct", qwen_2_5_3b_instruct_vllm)
]


for llm_id, llm in to_do: 
    default_program = dspy.Predict(ThreatActorExtractionSignature)

    o1_optimizer = MIPROv2(metric=metric, **o1_optimizer_settings)
                    
    with dspy.settings.context(lm=llm):
        optimized_program = o1_optimizer.compile(student=default_program, trainset=trainset, valset=devset, **o1_compiler_settings)

    # store program (cloudpickle)
    optimized_program.save(f"{BASE_PATH}/{llm_id}", save_program=True)

In [None]:
o2_optimizer_settings = dict(num_candidates = 25, max_bootstrapped_demos = 0, max_labeled_demos = 0, metric_threshold = None, init_temperature = 0.5, task_model = None, num_threads = 16,max_errors = 10, prompt_model=llama_3_1_70b_instruct_turbo_deepinfra, teacher_settings=dict(lm=llama_3_1_70b_instruct_turbo_deepinfra), auto=None)


o2_compiler_settings = dict(num_trials = 50, minibatch = True, minibatch_size = 25, minibatch_full_eval_steps = 10, program_aware_proposer=True, data_aware_proposer=True, view_data_batch_size=10, tip_aware_proposer=True, fewshot_aware_proposer=True, requires_permission_to_run=False)

In [None]:
assert False, "already finished and optimizer steps are not cached!"

BASE_PATH = "./ThreatActorExtractor/ZERO-O2"
# mlflow.autolog()
# mlflow.set_experiment(BASE_PATH)

to_do = [
    ("llama_3_2_1b_instruct", llama_3_2_1b_instruct_deepinfra),
    ("llama_3_2_3b_instruct", llama_3_2_3b_instruct_deepinfra),
    ("llama_3_1_8b_instruct", llama_3_1_8b_instruct_deepinfra),
    ("llama_3_1_70b_instruct", llama_3_1_70b_instruct_turbo_deepinfra),

    ("qwen_2_5_7b_instruct", qwen_2_5_7b_instruct_deepinfra),
    ("qwen_2_5_72b_instruct", qwen_2_5_72b_instruct_deepinfra),
]

for llm_id, llm in to_do: 
    default_program = dspy.Predict(ThreatActorExtractionSignature)

    o2_optimizer = MIPROv2(metric=metric, **o2_optimizer_settings)

    with dspy.settings.context(lm=llm):
        optimized_program = o2_optimizer.compile(student=default_program, trainset=trainset, valset=devset, **o2_compiler_settings)

    # store program (cloudpickle)
    optimized_program.save(f"{BASE_PATH}/{llm_id}", save_program=True)

In [None]:
assert False, "already finished and optimizer steps are not cached!"

BASE_PATH = "./ThreatActorExtractor/ZERO-O2"


to_do = [
    ("qwen_2_5_1p5b_instruct", qwen_2_5_1p5b_instruct_vllm)
]


for llm_id, llm in to_do: 
    default_program = dspy.Predict(ThreatActorExtractionSignature)

    o2_optimizer = MIPROv2(metric=metric, **o2_optimizer_settings)
                    
    with dspy.settings.context(lm=llm):
        optimized_program = o2_optimizer.compile(student=default_program, trainset=trainset, valset=devset, **o2_compiler_settings)

    # store program (cloudpickle)
    optimized_program.save(f"{BASE_PATH}/{llm_id}", save_program=True)

In [None]:
assert False, "already finished and optimizer steps are not cached!"


BASE_PATH = "./ThreatActorExtractor/ZERO-O2"


to_do = [
    ("qwen_2_5_3b_instruct", qwen_2_5_3b_instruct_vllm)
]


for llm_id, llm in to_do: 
    default_program = dspy.Predict(ThreatActorExtractionSignature)

    o2_optimizer = MIPROv2(metric=metric, **o2_optimizer_settings)
                    
    with dspy.settings.context(lm=llm):
        optimized_program = o2_optimizer.compile(student=default_program, trainset=trainset, valset=devset, **o2_compiler_settings)

    # store program (cloudpickle)
    optimized_program.save(f"{BASE_PATH}/{llm_id}", save_program=True)

# FEW SHOT OPTIMIERUNG

In [None]:
o1_optimizer_settings = dict(num_candidates = 10, max_bootstrapped_demos = 1, max_labeled_demos = 2, metric_threshold = None, init_temperature = 0.5, task_model = None, num_threads = 16,max_errors = 10, prompt_model=llama_3_1_70b_instruct_turbo_deepinfra, teacher_settings=dict(lm=llama_3_1_70b_instruct_turbo_deepinfra), auto=None)

o1_compiler_settings = dict(num_trials = 30, minibatch = True, minibatch_size = 25, minibatch_full_eval_steps = 10, program_aware_proposer=True, data_aware_proposer=True, view_data_batch_size=10, tip_aware_proposer=True, fewshot_aware_proposer=True, requires_permission_to_run=False)

In [None]:
assert False, "already finished and optimizer steps are not cached!"

BASE_PATH = "./ThreatActorExtractor/FS-O1"

to_do = [
    ("llama_3_2_1b_instruct", llama_3_2_1b_instruct_deepinfra),
    ("llama_3_2_3b_instruct", llama_3_2_3b_instruct_deepinfra),
    ("llama_3_1_8b_instruct", llama_3_1_8b_instruct_deepinfra),
    ("llama_3_1_70b_instruct", llama_3_1_70b_instruct_turbo_deepinfra),

    ("qwen_2_5_7b_instruct", qwen_2_5_7b_instruct_deepinfra),
    ("qwen_2_5_72b_instruct", qwen_2_5_72b_instruct_deepinfra),
]


for llm_id, llm in to_do: 
    default_program = dspy.Predict(ThreatActorExtractionSignature)

    o1_optimizer = MIPROv2(metric=metric, **o1_optimizer_settings)
                    
    with dspy.settings.context(lm=llm):
        optimized_program = o1_optimizer.compile(student=default_program, trainset=trainset, valset=devset, **o1_compiler_settings)

    # store program (cloudpickle)
    optimized_program.save(f"{BASE_PATH}/{llm_id}", save_program=True)

In [None]:
assert False, "already finished and optimizer steps are not cached!"

BASE_PATH = "./ThreatActorExtractor/FS-O1"


to_do = [
    ("qwen_2_5_1p5b_instruct", qwen_2_5_1p5b_instruct_vllm)
]


for llm_id, llm in to_do: 
    default_program = dspy.Predict(ThreatActorExtractionSignature)

    o1_optimizer = MIPROv2(metric=metric, **o1_optimizer_settings)
                    
    with dspy.settings.context(lm=llm):
        optimized_program = o1_optimizer.compile(student=default_program, trainset=trainset, valset=devset, **o1_compiler_settings)

    # store program (cloudpickle)
    optimized_program.save(f"{BASE_PATH}/{llm_id}", save_program=True)

In [None]:
assert False, "already finished and optimizer steps are not cached!"


BASE_PATH = "./ThreatActorExtractor/FS-O1"


to_do = [
    ("qwen_2_5_3b_instruct", qwen_2_5_3b_instruct_vllm)
]


for llm_id, llm in to_do: 
    default_program = dspy.Predict(ThreatActorExtractionSignature)

    o1_optimizer = MIPROv2(metric=metric, **o1_optimizer_settings)
                    
    with dspy.settings.context(lm=llm):
        optimized_program = o1_optimizer.compile(student=default_program, trainset=trainset, valset=devset, **o1_compiler_settings)

    # store program (cloudpickle)
    optimized_program.save(f"{BASE_PATH}/{llm_id}", save_program=True)

In [None]:
o2_optimizer_settings = dict(num_candidates = 25, max_bootstrapped_demos = 1, max_labeled_demos = 2, metric_threshold = None, init_temperature = 0.5, task_model = None, num_threads = 16,max_errors = 10, prompt_model=llama_3_1_70b_instruct_turbo_deepinfra, teacher_settings=dict(lm=llama_3_1_70b_instruct_turbo_deepinfra), auto=None)

o2_compiler_settings = dict(num_trials = 50, minibatch = True, minibatch_size = 25, minibatch_full_eval_steps = 10, program_aware_proposer=True, data_aware_proposer=True, view_data_batch_size=10, tip_aware_proposer=True, fewshot_aware_proposer=True, requires_permission_to_run=False)

In [None]:
assert False, "already finished and optimizer steps are not cached!"
BASE_PATH = "./ThreatActorExtractor/FS-O2"

to_do = [
    ("llama_3_2_1b_instruct", llama_3_2_1b_instruct_deepinfra),
    ("llama_3_2_3b_instruct", llama_3_2_3b_instruct_deepinfra),
    ("llama_3_1_8b_instruct", llama_3_1_8b_instruct_deepinfra),
    ("llama_3_1_70b_instruct", llama_3_1_70b_instruct_turbo_deepinfra),

    ("qwen_2_5_7b_instruct", qwen_2_5_7b_instruct_deepinfra),
    ("qwen_2_5_72b_instruct", qwen_2_5_72b_instruct_deepinfra),
]


for llm_id, llm in to_do: 
    default_program = dspy.Predict(ThreatActorExtractionSignature)

    o2_optimizer = MIPROv2(metric=metric, **o2_optimizer_settings)
                    
    with dspy.settings.context(lm=llm):
        optimized_program = o2_optimizer.compile(student=default_program, trainset=trainset, valset=devset, **o2_compiler_settings)

    # store program (cloudpickle)
    optimized_program.save(f"{BASE_PATH}/{llm_id}", save_program=True)

In [None]:
assert False, "already finished and optimizer steps are not cached!"


BASE_PATH = "./ThreatActorExtractor/FS-O2"


to_do = [
    ("qwen_2_5_1p5b_instruct", qwen_2_5_1p5b_instruct_vllm)
]


for llm_id, llm in to_do: 
    default_program = dspy.Predict(ThreatActorExtractionSignature)

    o2_optimizer = MIPROv2(metric=metric, **o2_optimizer_settings)
                    
    with dspy.settings.context(lm=llm):
        optimized_program = o2_optimizer.compile(student=default_program, trainset=trainset, valset=devset, **o2_compiler_settings)

    # store program (cloudpickle)
    optimized_program.save(f"{BASE_PATH}/{llm_id}", save_program=True)

In [None]:
assert False, "already finished and optimizer steps are not cached!"


BASE_PATH = "./ThreatActorExtractor/FS-O2"


to_do = [
    ("qwen_2_5_3b_instruct", qwen_2_5_3b_instruct_vllm)
]


for llm_id, llm in to_do: 
    default_program = dspy.Predict(ThreatActorExtractionSignature)

    o2_optimizer = MIPROv2(metric=metric, **o2_optimizer_settings)
                    
    with dspy.settings.context(lm=llm):
        optimized_program = o2_optimizer.compile(student=default_program, trainset=trainset, valset=devset, **o2_compiler_settings)

    # store program (cloudpickle)
    optimized_program.save(f"{BASE_PATH}/{llm_id}", save_program=True)

# TOTAL EVALUATION

In [None]:
assert False, "DONE"

BASE_PATH = "./ThreatActorExtractor/Baseline"
mlflow.autolog()
mlflow.set_experiment(BASE_PATH)

to_do = [
    ("llama_3_2_1b_instruct", llama_3_2_1b_instruct_deepinfra),
    ("llama_3_2_3b_instruct", llama_3_2_3b_instruct_deepinfra),
    ("llama_3_1_8b_instruct", llama_3_1_8b_instruct_deepinfra),
    ("llama_3_1_70b_instruct", llama_3_1_70b_instruct_turbo_deepinfra),

    ("qwen_2_5_7b_instruct", qwen_2_5_7b_instruct_deepinfra),
    ("qwen_2_5_72b_instruct", qwen_2_5_72b_instruct_deepinfra),
]

for llm_id, llm in to_do: 
    program = dspy.Predict(ThreatActorExtractionSignature)
    
    save_and_evaluate(base_path=BASE_PATH, llm_id=llm_id, llm=llm, program=program, valset=devset)

In [None]:
assert False, "DONE"


BASE_PATH = "./ThreatActorExtractor/Baseline"
mlflow.autolog()
mlflow.set_experiment(BASE_PATH)

to_do = [
    ("qwen_2_5_1p5b_instruct", qwen_2_5_1p5b_instruct_vllm)
]

for llm_id, llm in to_do: 
    program = dspy.Predict(ThreatActorExtractionSignature)
    
    save_and_evaluate(base_path=BASE_PATH, llm_id=llm_id, llm=llm, program=program, valset=devset)

In [None]:
assert False, "DONE"


BASE_PATH = "./ThreatActorExtractor/Baseline"
mlflow.autolog()
mlflow.set_experiment(BASE_PATH)

to_do = [
    ("qwen_2_5_3b_instruct", qwen_2_5_3b_instruct_vllm)
]

for llm_id, llm in to_do: 
    program = dspy.Predict(ThreatActorExtractionSignature)
    
    save_and_evaluate(base_path=BASE_PATH, llm_id=llm_id, llm=llm, program=program, valset=devset)

## ZS EVAL

In [None]:
assert False, "DONE"
BASE_PATH = "./ThreatActorExtractor/ZERO-O1"
mlflow.autolog()
mlflow.set_experiment(BASE_PATH)

to_do = [
    ("llama_3_2_1b_instruct", llama_3_2_1b_instruct_deepinfra),
    ("llama_3_2_3b_instruct", llama_3_2_3b_instruct_deepinfra),
    ("llama_3_1_8b_instruct", llama_3_1_8b_instruct_deepinfra),
    ("llama_3_1_70b_instruct", llama_3_1_70b_instruct_turbo_deepinfra),

    ("qwen_2_5_7b_instruct", qwen_2_5_7b_instruct_deepinfra),
    ("qwen_2_5_72b_instruct", qwen_2_5_72b_instruct_deepinfra),
]

for llm_id, llm in to_do: 
    program = dspy.load(f"{BASE_PATH}/{llm_id}")    
    save_and_evaluate(base_path=BASE_PATH, llm_id=llm_id, llm=llm, program=program, valset=devset)

In [None]:
assert False, "DONE"

BASE_PATH = "./ThreatActorExtractor/ZERO-O1"
mlflow.autolog()
mlflow.set_experiment(BASE_PATH)

to_do = [
    ("qwen_2_5_1p5b_instruct", qwen_2_5_1p5b_instruct_vllm)
]

for llm_id, llm in to_do: 
    program = dspy.load(f"{BASE_PATH}/{llm_id}")    
    save_and_evaluate(base_path=BASE_PATH, llm_id=llm_id, llm=llm, program=program, valset=devset)

In [None]:
assert False, "DONE"

BASE_PATH = "./ThreatActorExtractor/ZERO-O1"
mlflow.autolog()
mlflow.set_experiment(BASE_PATH)

to_do = [
    ("qwen_2_5_3b_instruct", qwen_2_5_3b_instruct_vllm)
]

for llm_id, llm in to_do: 
    program = dspy.load(f"{BASE_PATH}/{llm_id}")    
    save_and_evaluate(base_path=BASE_PATH, llm_id=llm_id, llm=llm, program=program, valset=devset)

In [None]:
assert False, "DONE"

BASE_PATH = "./ThreatActorExtractor/ZERO-O2"
mlflow.autolog()
mlflow.set_experiment(BASE_PATH)

to_do = [
    ("llama_3_2_1b_instruct", llama_3_2_1b_instruct_deepinfra),
    ("llama_3_2_3b_instruct", llama_3_2_3b_instruct_deepinfra),
    ("llama_3_1_8b_instruct", llama_3_1_8b_instruct_deepinfra),
    ("llama_3_1_70b_instruct", llama_3_1_70b_instruct_turbo_deepinfra),

    ("qwen_2_5_7b_instruct", qwen_2_5_7b_instruct_deepinfra),
    ("qwen_2_5_72b_instruct", qwen_2_5_72b_instruct_deepinfra),
]

for llm_id, llm in to_do: 
    program = dspy.load(f"{BASE_PATH}/{llm_id}")    
    save_and_evaluate(base_path=BASE_PATH, llm_id=llm_id, llm=llm, program=program, valset=devset)

In [None]:
assert False, "DONE"

BASE_PATH = "./ThreatActorExtractor/ZERO-O2"
mlflow.autolog()
mlflow.set_experiment(BASE_PATH)

to_do = [
    ("qwen_2_5_1p5b_instruct", qwen_2_5_1p5b_instruct_vllm)
]

for llm_id, llm in to_do: 
    program = dspy.load(f"{BASE_PATH}/{llm_id}")    
    save_and_evaluate(base_path=BASE_PATH, llm_id=llm_id, llm=llm, program=program, valset=devset)

In [None]:
assert False, "DONE"

BASE_PATH = "./ThreatActorExtractor/ZERO-O2"
mlflow.autolog()
mlflow.set_experiment(BASE_PATH)

to_do = [
    ("qwen_2_5_3b_instruct", qwen_2_5_3b_instruct_vllm)
]

for llm_id, llm in to_do: 
    program = dspy.load(f"{BASE_PATH}/{llm_id}")    
    save_and_evaluate(base_path=BASE_PATH, llm_id=llm_id, llm=llm, program=program, valset=devset)

In [None]:
assert False, "DONE"
BASE_PATH = "./ThreatActorExtractor/FS-O1"
mlflow.autolog()
mlflow.set_experiment(BASE_PATH)

to_do = [
    ("llama_3_2_1b_instruct", llama_3_2_1b_instruct_deepinfra),
    ("llama_3_2_3b_instruct", llama_3_2_3b_instruct_deepinfra),
    ("llama_3_1_8b_instruct", llama_3_1_8b_instruct_deepinfra),
    ("llama_3_1_70b_instruct", llama_3_1_70b_instruct_turbo_deepinfra),

    ("qwen_2_5_7b_instruct", qwen_2_5_7b_instruct_deepinfra),
    ("qwen_2_5_72b_instruct", qwen_2_5_72b_instruct_deepinfra),
]

for llm_id, llm in to_do: 
    program = dspy.load(f"{BASE_PATH}/{llm_id}")    
    save_and_evaluate(base_path=BASE_PATH, llm_id=llm_id, llm=llm, program=program, valset=devset)

In [None]:
assert False, "DONE"

BASE_PATH = "./ThreatActorExtractor/FS-O1"
mlflow.autolog()
mlflow.set_experiment(BASE_PATH)

to_do = [
    ("qwen_2_5_1p5b_instruct", qwen_2_5_1p5b_instruct_vllm)
]

for llm_id, llm in to_do: 
    program = dspy.load(f"{BASE_PATH}/{llm_id}")    
    save_and_evaluate(base_path=BASE_PATH, llm_id=llm_id, llm=llm, program=program, valset=devset)

In [None]:
assert False, "DONE"

BASE_PATH = "./ThreatActorExtractor/FS-O1"
mlflow.autolog()
mlflow.set_experiment(BASE_PATH)

to_do = [
    ("qwen_2_5_3b_instruct", qwen_2_5_3b_instruct_vllm)
]

for llm_id, llm in to_do: 
    program = dspy.load(f"{BASE_PATH}/{llm_id}")    
    save_and_evaluate(base_path=BASE_PATH, llm_id=llm_id, llm=llm, program=program, valset=devset)

In [None]:
assert False, "DONE"

BASE_PATH = "./ThreatActorExtractor/FS-O2"
mlflow.autolog()
mlflow.set_experiment(BASE_PATH)

to_do = [
    ("llama_3_2_1b_instruct", llama_3_2_1b_instruct_deepinfra),
    ("llama_3_2_3b_instruct", llama_3_2_3b_instruct_deepinfra),
    ("llama_3_1_8b_instruct", llama_3_1_8b_instruct_deepinfra),
    ("llama_3_1_70b_instruct", llama_3_1_70b_instruct_turbo_deepinfra),

    ("qwen_2_5_7b_instruct", qwen_2_5_7b_instruct_deepinfra),
    ("qwen_2_5_72b_instruct", qwen_2_5_72b_instruct_deepinfra),
]

for llm_id, llm in to_do: 
    program = dspy.load(f"{BASE_PATH}/{llm_id}")    
    save_and_evaluate(base_path=BASE_PATH, llm_id=llm_id, llm=llm, program=program, valset=devset)

In [None]:
assert False, "DONE"

BASE_PATH = "./ThreatActorExtractor/FS-O2"
mlflow.autolog()
mlflow.set_experiment(BASE_PATH)

to_do = [
    ("qwen_2_5_1p5b_instruct", qwen_2_5_1p5b_instruct_vllm)
]

for llm_id, llm in to_do: 
    program = dspy.load(f"{BASE_PATH}/{llm_id}")    
    save_and_evaluate(base_path=BASE_PATH, llm_id=llm_id, llm=llm, program=program, valset=devset)

In [None]:
assert False, "DONE"

BASE_PATH = "./ThreatActorExtractor/FS-O2"
mlflow.autolog()
mlflow.set_experiment(BASE_PATH)

to_do = [
    ("qwen_2_5_3b_instruct", qwen_2_5_3b_instruct_vllm)
]

for llm_id, llm in to_do: 
    program = dspy.load(f"{BASE_PATH}/{llm_id}")    
    save_and_evaluate(base_path=BASE_PATH, llm_id=llm_id, llm=llm, program=program, valset=devset)