In [None]:
import Modules.utils as utils
import typing
import pandas as pd

from matplotlib.ticker import NullLocator

import matplotlib.pyplot as plt
import matplotlib.axes
import matplotlib.figure


plt.rcParams.update({
    "pgf.texsystem": "pdflatex",
    "font.family": "serif",
    "text.usetex": True,         
    "pgf.rcfonts": False,
})

In [None]:
def print_baseline_table(baseline_data: dict, print_latex_help: bool = False) -> pd.DataFrame:
    data = []

    for model, values in baseline_data.items():
        f1 = values["Baseline"]["f1"]
        precision = values["Baseline"]["precision"]
        recall = values["Baseline"]["recall"]
        
        data.append([model, utils.calc_avg(f1, 2), utils.calc_avg(precision, 2), utils.calc_avg(recall, 2)]),

        if print_latex_help: 
            models_str = {"llama_3_2_1b_instruct": "Llama 3.2 1b instruct", "llama_3_2_3b_instruct": "Llama 3.2 3b instruct", "llama_3_1_8b_instruct": "Llama 3.1 8b instruct", "llama_3_1_70b_instruct": "Llama 3.1 70b instruct", "qwen_2_5_1p5b_instruct": "Qwen 2.5 1.5b instruct", "qwen_2_5_3b_instruct": "Qwen 2.5 3b instruct", "qwen_2_5_7b_instruct": "Qwen 2.5 7b instruct", "qwen_2_5_72b_instruct": "Qwen 2.5 72b instruct"}
            print(models_str[model], "&", utils.calc_avg(f1, 2), "&", utils.calc_avg(precision, 2), "&", utils.calc_avg(recall, 2), "\\\\")

    df = pd.DataFrame(data, columns=["Model", "F1", "Precision", "Recall"])
    df.style.set_properties(**{'text-align': 'left'}).set_table_styles(
        [{'selector': 'th', 'props': [('text-align', 'center')]}]
    )
    return df


def print_zero_shot_table(zero_shot_data: dict, print_latex_help: bool = False) -> pd.DataFrame:
    data = []

    for model, values in zero_shot_data.items():
        baseline_f1 = values["Baseline"]["f1"]
        baseline_precision = values["Baseline"]["precision"]
        baseline_recall = values["Baseline"]["recall"]
        
        zs_o1_f1 = values["ZERO-O1"]["f1"]
        zs_o1_precision = values["ZERO-O1"]["precision"]
        zs_o1_recall = values["ZERO-O1"]["recall"]

        zs_o2_f1 = values["ZERO-O2"]["f1"]
        zs_o2_precision = values["ZERO-O2"]["precision"]
        zs_o2_recall = values["ZERO-O2"]["recall"]

        data.append([model, utils.calc_avg(baseline_f1, 2), utils.calc_avg(baseline_precision, 2), utils.calc_avg(baseline_recall, 2),  
                            utils.calc_avg(zs_o1_f1, 2), utils.calc_avg(zs_o1_precision, 2), utils.calc_avg(zs_o1_recall, 2),
                            utils.calc_avg(zs_o2_f1, 2), utils.calc_avg(zs_o2_precision, 2), utils.calc_avg(zs_o2_recall, 2) ]),

        if print_latex_help: 
            models_str = {"llama_3_2_1b_instruct": "Llama 3.2 1b instruct", "llama_3_2_3b_instruct": "Llama 3.2 3b instruct", "llama_3_1_8b_instruct": "Llama 3.1 8b instruct", "llama_3_1_70b_instruct": "Llama 3.1 70b instruct", "qwen_2_5_1p5b_instruct": "Qwen 2.5 1.5b instruct", "qwen_2_5_3b_instruct": "Qwen 2.5 3b instruct", "qwen_2_5_7b_instruct": "Qwen 2.5 7b instruct", "qwen_2_5_72b_instruct": "Qwen 2.5 72b instruct"}
            print(models_str[model], "&", 
                  utils.calc_avg(baseline_f1, 2), "&", utils.calc_avg(baseline_precision, 2), "&", utils.calc_avg(baseline_recall, 2), "&", 
                  utils.calc_avg(zs_o1_f1, 2), "&", utils.calc_avg(zs_o1_precision, 2), "&", utils.calc_avg(zs_o1_recall, 2), "&",
                  utils.calc_avg(zs_o2_f1, 2), "&", utils.calc_avg(zs_o2_precision, 2), "&", utils.calc_avg(zs_o2_recall, 2), "\\\\")

    df = pd.DataFrame(data, columns=["Model", "F1", "Precision", "Recall", "ZS-O1-F1", "ZS-O1-Precision", "ZS-O1-Recall", "ZS-O2-F1", "ZS-O2-Precision", "ZS-O2-Recall", ])
    df.style.set_properties(**{'text-align': 'left'}).set_table_styles(
        [{'selector': 'th', 'props': [('text-align', 'center')]}]
    )
    return df



def print_few_shot_table(zero_shot_data: dict, print_latex_help: bool = False) -> pd.DataFrame:
    data = []

    for model, values in zero_shot_data.items():
        baseline_f1 = values["Baseline"]["f1"]
        baseline_precision = values["Baseline"]["precision"]
        baseline_recall = values["Baseline"]["recall"]

        zs_o1_f1 = values["ZERO-O1"]["f1"]
        zs_o1_precision = values["ZERO-O1"]["precision"]
        zs_o1_recall = values["ZERO-O1"]["recall"]

        zs_o2_f1 = values["ZERO-O2"]["f1"]
        zs_o2_precision = values["ZERO-O2"]["precision"]
        zs_o2_recall = values["ZERO-O2"]["recall"]
        
        fs_o1_f1 = values["FS-O1"]["f1"]
        fs_o1_precision = values["FS-O1"]["precision"]
        fs_o1_recall = values["FS-O1"]["recall"]

        fs_o2_f1 = values["FS-O2"]["f1"]
        fs_o2_precision = values["FS-O2"]["precision"]
        fs_o2_recall = values["FS-O2"]["recall"]

        data.append([model, utils.calc_avg(baseline_f1, 2), utils.calc_avg(baseline_precision, 2), utils.calc_avg(baseline_recall, 2),  
                            utils.calc_avg(fs_o1_f1, 2), utils.calc_avg(fs_o1_precision, 2), utils.calc_avg(fs_o1_recall, 2),
                            utils.calc_avg(fs_o2_f1, 2), utils.calc_avg(fs_o2_precision, 2), utils.calc_avg(fs_o2_recall, 2),
                            utils.calc_avg(zs_o1_f1, 2), utils.calc_avg(zs_o1_precision, 2), utils.calc_avg(zs_o1_recall, 2),
                            utils.calc_avg(zs_o2_f1, 2), utils.calc_avg(zs_o2_precision, 2), utils.calc_avg(zs_o2_recall, 2),
                            ]),

        if print_latex_help: 
            models_str = {"llama_3_2_1b_instruct": "Llama 3.2 1b instruct", "llama_3_2_3b_instruct": "Llama 3.2 3b instruct", "llama_3_1_8b_instruct": "Llama 3.1 8b instruct", "llama_3_1_70b_instruct": "Llama 3.1 70b instruct", "qwen_2_5_1p5b_instruct": "Qwen 2.5 1.5b instruct", "qwen_2_5_3b_instruct": "Qwen 2.5 3b instruct", "qwen_2_5_7b_instruct": "Qwen 2.5 7b instruct", "qwen_2_5_72b_instruct": "Qwen 2.5 72b instruct"}
            print(models_str[model], "&", 
                  utils.calc_avg(baseline_f1, 2), "&", utils.calc_avg(baseline_precision, 2), "&", utils.calc_avg(baseline_recall, 2), "&", 

                
                  utils.calc_avg(fs_o1_f1, 2), "&", utils.calc_avg(fs_o1_precision, 2), "&", utils.calc_avg(fs_o1_recall, 2), "&",
                  utils.calc_avg(fs_o2_f1, 2), "&", utils.calc_avg(fs_o2_precision, 2), "&", utils.calc_avg(fs_o2_recall, 2), "&",

                  utils.calc_avg(zs_o1_f1, 2), "&", utils.calc_avg(zs_o1_precision, 2), "&", utils.calc_avg(zs_o1_recall, 2), "&",
                  utils.calc_avg(zs_o2_f1, 2), "&", utils.calc_avg(zs_o2_precision, 2), "&", utils.calc_avg(zs_o2_recall, 2), 

                  "\\\\")

    df = pd.DataFrame(data, columns=["Model", "F1", "Precision", "Recall", "FS-O1-F1", "FS-O1-Precision", "FS-O1-Recall", "FS-O2-F1", "FS-O2-Precision", "FS-O2-Recall", "ZS-O1-F1", "ZS-O1-Precision", "ZS-O1-Recall", "ZS-O2-F1", "ZS-O2-Precision", "ZS-O2-Recall",])
    df.style.set_properties(**{'text-align': 'left'}).set_table_styles(
        [{'selector': 'th', 'props': [('text-align', 'center')]}]
    )
    return df


line_kwargs = dict(linewidth=0.7, markersize=5)


def generate_plot_base(font_size: int) -> tuple[matplotlib.figure.Figure, matplotlib.axes.Axes, matplotlib.axes.Axes]:
    x_left = [1, 3, 8, 70]
    labels_left = ["Llama 3.2 1B \n instruct", "Llama 3.2 3B \n instruct", "Llama 3.1 8B \n instruct", "Llama 3.1 70B \n instruct"]

    x_right = [1.5, 3, 7, 72]
    labels_right = ["Qwen 2.5 1.5B \n instruct", "Qwen 2.5 3B \n instruct", "Qwen 2.5 7B \n instruct", "Qwen 2.5 72B \n instruct"]


    fig, axes = plt.subplots(1, 2, figsize=(10, 5), sharey=False)


    ax0: matplotlib.axes.Axes = axes[0]
    ax1: matplotlib.axes.Axes = axes[1]
    assert isinstance(axes[0], matplotlib.axes.Axes) and isinstance(axes[1], matplotlib.axes.Axes)





    
    ax0.set_ylim(0, 1)
    ax0.set_ylabel("F1-Score", fontsize=font_size)

    
    ax0.set_xscale("log")
    ax0.set_xticks(x_left)
    ax0.xaxis.set_minor_locator(NullLocator())
    ax0.set_xticklabels(labels_left, rotation=45, fontsize=font_size, ha="right")

   
    ax1.set_xscale("log")
    ax1.set_xticks(x_right)
    ax1.xaxis.set_minor_locator(NullLocator())
    ax1.set_xticklabels(labels_right, rotation=45, fontsize=font_size, ha="right")

   
    y_ticks = [0.2, 0.4, 0.6, 0.8, 1.0]
    ax0.set_yticks(y_ticks)
    ax0.set_yticklabels(y_ticks, fontsize=font_size)

    ax1.set_yticks(y_ticks)
    ax1.set_yticklabels(y_ticks, fontsize=font_size)

    ax_llama = ax0
    ax_qwen = ax1

    for x in [1, 3, 8, 70]:
        ax_llama.plot([x, x], [0, 1], linestyle="dashed", color="gray", linewidth=0.4, alpha=0.5)

    for x in [1.5, 3, 7, 72]:
        ax_qwen.plot([x, x], [0, 1], linestyle="dashed", color="gray", linewidth=0.4, alpha=0.5)


    ax_llama.set_ylim(0,1)
    ax_qwen.set_ylim(0,1)

    fig.tight_layout()

    return fig, ax_llama, ax_qwen



def get_f1_in_order(model_family: typing.Literal["llama", "qwen"], category: typing.Literal["Baseline", "ZERO-O1", "ZERO-O2", "FS-O1", "FS-O2"], data: dict):
    res = []

    if model_family == "llama":
        res = [  utils.calc_avg(data["llama_3_2_1b_instruct"][category]["f1"]),
                utils.calc_avg(data["llama_3_2_3b_instruct"][category]["f1"]),
                utils.calc_avg(data["llama_3_1_8b_instruct"][category]["f1"]),
                utils.calc_avg(data["llama_3_1_70b_instruct"][category]["f1"])]
        
    elif model_family == "qwen":
        res = [  utils.calc_avg(data["qwen_2_5_1p5b_instruct"][category]["f1"]),
                utils.calc_avg(data["qwen_2_5_3b_instruct"][category]["f1"]),
                utils.calc_avg(data["qwen_2_5_7b_instruct"][category]["f1"]),
                utils.calc_avg(data["qwen_2_5_72b_instruct"][category]["f1"])]

    return res

# 1. Malware Extractor

In [None]:
malware_extractor_data = utils.load_json_files("./Modules/MalwareExtractor", ["Baseline", "ZERO-O1", "ZERO-O2", "FS-O1", "FS-O2"], models=["llama_3_2_1b_instruct", "llama_3_2_3b_instruct", "llama_3_1_8b_instruct", "llama_3_1_70b_instruct", "qwen_2_5_1p5b_instruct", "qwen_2_5_3b_instruct", "qwen_2_5_7b_instruct", "qwen_2_5_72b_instruct"])

## Baseline

In [None]:
print_baseline_table(malware_extractor_data, print_latex_help=True)

In [None]:
font_size = 11
baseline_fig, ax_llama, ax_qwen = generate_plot_base(font_size=font_size)


llama_baseline_data = get_f1_in_order("llama", "Baseline", malware_extractor_data)
llama_model_sizes = [1, 3, 8, 70]

# FILL BASELINE
ax_llama.fill_between(llama_model_sizes, 0, llama_baseline_data, color="gray", alpha=0.2)

# BASELINE
ax_llama.plot(llama_model_sizes, llama_baseline_data, **line_kwargs,
            marker="o",
            color="gray",
            label="BASE"
            )



qwen_baseline_data = get_f1_in_order("qwen", "Baseline", malware_extractor_data)
qwen_model_sizes = [1.5, 3, 7, 72]

# FILL BASELINE
ax_qwen.fill_between(qwen_model_sizes, 0, qwen_baseline_data, color="gray", alpha=0.2)

# BASELINE
ax_qwen.plot(qwen_model_sizes, qwen_baseline_data, **line_kwargs,
            marker="o",
            color="gray",
            label="BASE"
            )



ax_llama.legend(loc="lower right", prop={'size': font_size})
ax_qwen.legend(loc="lower right", prop={'size': font_size})

# baseline_fig.savefig("./pgfs/MalwareExtractorBase.pgf")

## ZERO SHOT

In [None]:
df = print_zero_shot_table(malware_extractor_data, print_latex_help=False)

In [None]:
import matplotlib.lines


font_size = 11
baseline_fig, ax_llama, ax_qwen = generate_plot_base(font_size=font_size)


llama_model_sizes = [1, 3, 8, 70]

# LLAMA FILL BASELINE
ax_llama.fill_between(llama_model_sizes, 0, get_f1_in_order("llama", "Baseline", malware_extractor_data), color="gray", alpha=0.2)


# LLAMA BASELINE
ax_llama.plot(llama_model_sizes, get_f1_in_order("llama", "Baseline", malware_extractor_data), **line_kwargs,
            marker="o",
            color="gray",
            label="BASE"
            )

# LLAMA ZS O1
ax_llama.plot(llama_model_sizes[:2], get_f1_in_order("llama", "ZERO-O1", data=malware_extractor_data)[:2], **line_kwargs,
            marker="s",
            label="ZS-O1",
            color="blue"
            )

# LLAMA ZS O1: llama 3.2 3b instruct ZS-O1 --> llama 3.1 8b instruct BASE
ax_llama.plot([3, 8], [get_f1_in_order("llama", "ZERO-O1", malware_extractor_data)[1], get_f1_in_order("llama", "Baseline", malware_extractor_data)[2]], color="gray", zorder=0, **line_kwargs,)


# LLAMA ZS O1 llama 3.1 8b instruct BASE --> llama 3.1 70b instruct O1 
ax_llama.plot(llama_model_sizes[2:], get_f1_in_order("llama", "ZERO-O1", malware_extractor_data)[2:], color="blue", marker="s", zorder=0, **line_kwargs, markevery=[1])




# LLAMA ZS O2
ax_llama.plot(llama_model_sizes[:2], get_f1_in_order("llama", "ZERO-O2", data=malware_extractor_data)[:2], **line_kwargs,
            marker="^",
            label="ZS-O2",
            color="red"
            )

# LLAMA ZS O2: llama 3.2 3b instruct ZS-O1 --> llama 3.1 8b instruct BASE
ax_llama.plot([3, 8], [get_f1_in_order("llama", "ZERO-O2", malware_extractor_data)[1], get_f1_in_order("llama", "Baseline", malware_extractor_data)[2]], color="gray", zorder=0, **line_kwargs)




# QWEN FILL BASELINE
ax_qwen.fill_between(qwen_model_sizes, 0, get_f1_in_order("qwen", "Baseline", malware_extractor_data), color="gray", alpha=0.2)

# QWEN BASELINE
ax_qwen.plot(qwen_model_sizes, get_f1_in_order("qwen", "Baseline", malware_extractor_data), **line_kwargs,
            marker="o",
            color="gray",
            label="BASE"
            )


# QWEN ZS O1
ax_qwen.plot(qwen_model_sizes, get_f1_in_order("qwen", "ZERO-O1", data=malware_extractor_data), **line_kwargs,
            marker="s",
            label="ZS-O1",
            color="blue",
            zorder=2,
            )


# QWEN ZS O2
ax_qwen.plot(qwen_model_sizes, get_f1_in_order("qwen", "ZERO-O2", data=malware_extractor_data), **line_kwargs,
            marker="^",
            label="ZS-O2",
            color="red",
            zorder=1,
            )


ax_llama.legend(loc="lower right", prop={'size': font_size})
ax_qwen.legend(loc="lower right", prop={'size': font_size})


baseline_fig.savefig("./pgfs/MalwareExtractorZS.pgf")

## FEW SHOT

In [None]:
import matplotlib.lines

font_size = 11
baseline_fig, ax_llama, ax_qwen = generate_plot_base(font_size=font_size)



llama_model_sizes = [1, 3, 8, 70]

# LLAMA FILL BASELINE
ax_llama.fill_between(llama_model_sizes, 0, get_f1_in_order("llama", "Baseline", malware_extractor_data), color="gray", alpha=0.2)


# LLAMA BASELINE
ax_llama.plot(llama_model_sizes, get_f1_in_order("llama", "Baseline", malware_extractor_data), **line_kwargs,
            marker="o",
            color="gray",
            label="BASE",
            zorder=10,
            )

# LLAMA ZS O1
ax_llama.plot(llama_model_sizes[:2], get_f1_in_order("llama", "ZERO-O1", data=malware_extractor_data)[:2], **line_kwargs,
            marker="s",
            label="ZS-O1",
            color="blue",
            zorder=0,
            )

# LLAMA ZS O1: llama 3.2 3b instruct ZS-O1 --> llama 3.1 8b instruct BASE
ax_llama.plot([3, 8], [get_f1_in_order("llama", "ZERO-O1", malware_extractor_data)[1], get_f1_in_order("llama", "Baseline", malware_extractor_data)[2]], color="blue", **line_kwargs, zorder=0)


# LLAMA ZS O1 llama 3.1 8b instruct BASE --> llama 3.1 70b instruct O1 
ax_llama.plot(llama_model_sizes[2:], get_f1_in_order("llama", "ZERO-O1", malware_extractor_data)[2:], color="blue", marker="s", **line_kwargs, markevery=[1], zorder=0)


# LLAMA ZS O2: llama 3.2 3b instruct ZS-O1 --> llama 3.1 8b instruct BASE
ax_llama.plot([3, 8], [get_f1_in_order("llama", "ZERO-O2", malware_extractor_data)[1], get_f1_in_order("llama", "Baseline", malware_extractor_data)[2]], color="red", **line_kwargs, zorder=0)

# LLAMA ZS O2 llama 3.1 8b instruct BASE --> llama 3.1 70b instruct O2
ax_llama.plot(llama_model_sizes[2:], get_f1_in_order("llama", "ZERO-O2", malware_extractor_data)[2:], color="gray", marker="s", **line_kwargs, markevery=[1], zorder=0)


# LLAMA ZS O2
ax_llama.plot(llama_model_sizes[:2], get_f1_in_order("llama", "ZERO-O2", data=malware_extractor_data)[:2], **line_kwargs,
            marker="^",
            label="ZS-O2",
            color="red",
            zorder=0,
            )




# QWEN FILL BASELINE
ax_qwen.fill_between(qwen_model_sizes, 0, get_f1_in_order("qwen", "Baseline", malware_extractor_data), color="gray", alpha=0.2)

# QWEN BASELINE
ax_qwen.plot(qwen_model_sizes, get_f1_in_order("qwen", "Baseline", malware_extractor_data), **line_kwargs,
            marker="o",
            color="gray",
            label="BASE",
            zorder=10,
            )


# QWEN ZS O1
ax_qwen.plot(qwen_model_sizes, get_f1_in_order("qwen", "ZERO-O1", data=malware_extractor_data), **line_kwargs,
            marker="s",
            label="ZS-O1",
            color="blue",
            zorder=0,

            )


# QWEN ZS O2
ax_qwen.plot(qwen_model_sizes, get_f1_in_order("qwen", "ZERO-O2", data=malware_extractor_data), **line_kwargs,
            marker="^",
            label="ZS-O2",
            color="red",
            zorder=0,

            )




# LLAMA FS O1
ax_llama.plot(llama_model_sizes[:], get_f1_in_order("llama", "FS-O1", data=malware_extractor_data)[:], **line_kwargs,
            marker="p",
            label="FS-O1",
            color="purple",
            zorder=10,
            )


# LLAMA FS O2
ax_llama.plot(llama_model_sizes[:], get_f1_in_order("llama", "FS-O2", data=malware_extractor_data)[:], **line_kwargs,
            marker="d",
            label="FS-O2",
            color="green",
            zorder=10,
            )


# QWEN FS O1
ax_qwen.plot(qwen_model_sizes, get_f1_in_order("qwen", "FS-O1", data=malware_extractor_data), **line_kwargs,
            marker="p",
            label="FS-O1",
            color="purple",
            zorder=10,
            )


# QWEN FS O2
ax_qwen.plot(qwen_model_sizes, get_f1_in_order("qwen", "FS-O2", data=malware_extractor_data), **line_kwargs,
            marker="d",
            label="FS-O2",
            color="green",
            zorder=10,
            )


ax_llama.legend(loc="lower right", prop={'size': font_size})
ax_qwen.legend(loc="lower right", prop={'size': font_size})


# baseline_fig.savefig("./pgfs/MalwareExtractorFS.pgf")

In [None]:
df = print_few_shot_table(malware_extractor_data, print_latex_help=True)

In [None]:
for col in df.columns:
    if "Recall" in str(col) or "Precision" in str(col):
        df = df.drop(col, axis=1)
df

# Threat Actor Extractor

In [None]:
threat_actor_extractor_data = utils.load_json_files("./Modules/ThreatActorExtractor/", ["Baseline", "ZERO-O1", "ZERO-O2", "FS-O1", "FS-O2"], models=["llama_3_2_1b_instruct", "llama_3_2_3b_instruct", "llama_3_1_8b_instruct", "llama_3_1_70b_instruct", "qwen_2_5_1p5b_instruct", "qwen_2_5_3b_instruct", "qwen_2_5_7b_instruct", "qwen_2_5_72b_instruct"])

## Baseline

In [None]:
font_size = 11
baseline_fig, ax_llama, ax_qwen = generate_plot_base(font_size=font_size)


llama_baseline_data = get_f1_in_order("llama", "Baseline", threat_actor_extractor_data)
llama_model_sizes = [1, 3, 8, 70]

# FILL BASELINE
ax_llama.fill_between(llama_model_sizes, 0, llama_baseline_data, color="gray", alpha=0.2)

# BASELINE
ax_llama.plot(llama_model_sizes, llama_baseline_data, **line_kwargs,
            marker="o",
            color="gray",
            label="BASE"
            )



qwen_baseline_data = get_f1_in_order("qwen", "Baseline", threat_actor_extractor_data)
qwen_model_sizes = [1.5, 3, 7, 72]

# FILL BASELINE
ax_qwen.fill_between(qwen_model_sizes, 0, qwen_baseline_data, color="gray", alpha=0.2)

# BASELINE
ax_qwen.plot(qwen_model_sizes, qwen_baseline_data, **line_kwargs,
            marker="o",
            color="gray",
            label="BASE"
            )



ax_llama.legend(loc="lower right", prop={'size': font_size})
ax_qwen.legend(loc="lower right", prop={'size': font_size})

# baseline_fig.savefig("./pgfs/ThreatActorExtractorBase.pgf")

## FEW SHOT

In [None]:
df = print_few_shot_table(threat_actor_extractor_data, print_latex_help=True)

df

In [None]:
for col in df.columns:
    if "Recall" in str(col) or "Precision" in str(col):
        df = df.drop(col, axis=1)

df

In [None]:
font_size = 11
baseline_fig, ax_llama, ax_qwen = generate_plot_base(font_size=font_size)


llama_baseline_data = get_f1_in_order("llama", "Baseline", threat_actor_extractor_data)
llama_model_sizes = [1, 3, 8, 70]

# FILL BASELINE
ax_llama.fill_between(llama_model_sizes, 0, llama_baseline_data, color="gray", alpha=0.2)

# BASELINE
ax_llama.plot(llama_model_sizes, llama_baseline_data, **line_kwargs,
            marker="o",
            color="gray",
            label="BASE"
            )



qwen_baseline_data = get_f1_in_order("qwen", "Baseline", threat_actor_extractor_data)
qwen_model_sizes = [1.5, 3, 7, 72]

# FILL BASELINE
ax_qwen.fill_between(qwen_model_sizes, 0, qwen_baseline_data, color="gray", alpha=0.2)

# BASELINE
ax_qwen.plot(qwen_model_sizes, qwen_baseline_data, **line_kwargs,
            marker="o",
            color="gray",
            label="BASE"
            )





# LLAMA ZS O1
ax_llama.plot(llama_model_sizes[:2], get_f1_in_order("llama", "ZERO-O1", data=threat_actor_extractor_data)[:2], **line_kwargs,
            marker="s",
            label="ZS-O1",
            color="blue",
            zorder=0,
            )



# LLAMA ZS O2
ax_llama.plot(llama_model_sizes[:2], get_f1_in_order("llama", "ZERO-O2", data=threat_actor_extractor_data)[:2], **line_kwargs,
            marker="^",
            label="ZS-O2",
            color="red",
            zorder=0,
            )


# LLAMA ZS O1: llama 3.2 3b instruct ZS-O1 --> llama 3.1 8b instruct BASE
ax_llama.plot([3, 8], [get_f1_in_order("llama", "ZERO-O1", threat_actor_extractor_data)[1], get_f1_in_order("llama", "Baseline", threat_actor_extractor_data)[2]], color="blue", **line_kwargs, zorder=0)


# LLAMA ZS O1 llama 3.1 8b instruct BASE --> llama 3.1 70b instruct O1 
ax_llama.plot(llama_model_sizes[2:], get_f1_in_order("llama", "ZERO-O1", threat_actor_extractor_data)[2:], color="blue", marker="s", **line_kwargs, markevery=[1], zorder=0)



# LLAMA ZS O2: llama 3.2 3b instruct ZS-O1 --> llama 3.1 8b instruct BASE
ax_llama.plot([3, 8], [get_f1_in_order("llama", "ZERO-O2", threat_actor_extractor_data)[1], get_f1_in_order("llama", "Baseline", threat_actor_extractor_data)[2]], color="red", **line_kwargs, zorder=0)

# LLAMA ZS O2 llama 3.1 8b instruct BASE --> llama 3.1 70b instruct O2
ax_llama.plot(llama_model_sizes[2:], get_f1_in_order("llama", "ZERO-O2", threat_actor_extractor_data)[2:], color="red", marker="s", **line_kwargs, markevery=[1], zorder=0)





# QWEN ZS O1
ax_qwen.plot(qwen_model_sizes, get_f1_in_order("qwen", "ZERO-O1", data=threat_actor_extractor_data), **line_kwargs,
            marker="s",
            label="ZS-O1",
            color="blue",
            zorder=0,
            )


# QWEN ZS O2
ax_qwen.plot(qwen_model_sizes, get_f1_in_order("qwen", "ZERO-O2", data=threat_actor_extractor_data), **line_kwargs,
            marker="^",
            label="ZS-O2",
            color="red",
            zorder=0,
            )










# LLAMA FS O1
ax_llama.plot(llama_model_sizes[:], get_f1_in_order("llama", "FS-O1", data=threat_actor_extractor_data)[:], **line_kwargs,
            marker="p",
            label="FS-O1",
            color="purple",
            zorder=10,
            )


# LLAMA FS O2
ax_llama.plot(llama_model_sizes[:], get_f1_in_order("llama", "FS-O2", data=threat_actor_extractor_data)[:], **line_kwargs,
            marker="d",
            label="FS-O2",
            color="green",
            zorder=10,
            )







# QWEN FS O1
ax_qwen.plot(qwen_model_sizes, get_f1_in_order("qwen", "FS-O1", data=threat_actor_extractor_data), **line_kwargs,
            marker="p",
            label="FS-O1",
            color="purple",
            zorder=10,
            )

# QWEN FS O2
ax_qwen.plot(qwen_model_sizes, get_f1_in_order("qwen", "FS-O2", data=threat_actor_extractor_data), **line_kwargs,
            marker="d",
            label="FS-O2",
            color="green",
            zorder=10,
            )



ax_llama.legend(loc="lower right", prop={'size': font_size})
ax_qwen.legend(loc="lower right", prop={'size': font_size})

# baseline_fig.savefig("./pgfs/ThreatActorExtractorFS.pgf")

# Attack Pattern Extractor

In [None]:
attack_pattern_extractor_data = utils.load_json_files("./Modules/AttackPatternExtractor/", ["Baseline", "ZERO-O1", "ZERO-O2", "FS-O1", "FS-O2"], models=["llama_3_2_1b_instruct", "llama_3_2_3b_instruct", "llama_3_1_8b_instruct", "llama_3_1_70b_instruct", "qwen_2_5_1p5b_instruct", "qwen_2_5_3b_instruct", "qwen_2_5_7b_instruct", "qwen_2_5_72b_instruct"])

In [None]:
print_baseline_table(attack_pattern_extractor_data, print_latex_help=False)

In [None]:
font_size = 11
baseline_fig, ax_llama, ax_qwen = generate_plot_base(font_size=font_size)

llama_model_sizes = [1, 3, 8, 70]
qwen_model_sizes = [1.5, 3, 7, 72]

# === LLAMA BASELINE ===
llama_baseline_data = get_f1_in_order("llama", "Baseline", attack_pattern_extractor_data)

# Fill
ax_llama.fill_between(llama_model_sizes, 0, llama_baseline_data, color="gray", alpha=0.2)

# Points
ax_llama.plot(1, llama_baseline_data[0], marker="o", color="gray", label="BASE", **line_kwargs)
ax_llama.plot(3, llama_baseline_data[1], marker="o", color="gray", **line_kwargs)
ax_llama.plot(8, llama_baseline_data[2], marker="o", color="gray", **line_kwargs)
ax_llama.plot(70, llama_baseline_data[3], marker="o", color="gray", **line_kwargs)

# Lines
ax_llama.plot([1, 3], [llama_baseline_data[0], llama_baseline_data[1]], color="gray", **line_kwargs)
ax_llama.plot([3, 8], [llama_baseline_data[1], llama_baseline_data[2]], color="gray", **line_kwargs)
ax_llama.plot([8, 70], [llama_baseline_data[2], llama_baseline_data[3]], color="gray", **line_kwargs)



# === QWEN BASELINE ===
qwen_baseline_data = get_f1_in_order("qwen", "Baseline", attack_pattern_extractor_data)

# Fill
ax_qwen.fill_between(qwen_model_sizes, 0, qwen_baseline_data, color="gray", alpha=0.2)

# Points
ax_qwen.plot(1.5, qwen_baseline_data[0], marker="o", color="gray", label="BASE", **line_kwargs)
ax_qwen.plot(3, qwen_baseline_data[1], marker="o", color="gray", **line_kwargs)
ax_qwen.plot(7, qwen_baseline_data[2], marker="o", color="gray", **line_kwargs)
ax_qwen.plot(72, qwen_baseline_data[3], marker="o", color="gray", **line_kwargs)

# Lines
ax_qwen.plot([1.5, 3], [qwen_baseline_data[0], qwen_baseline_data[1]], color="gray", **line_kwargs)
ax_qwen.plot([3, 7], [qwen_baseline_data[1], qwen_baseline_data[2]], color="gray", **line_kwargs)
ax_qwen.plot([7, 72], [qwen_baseline_data[2], qwen_baseline_data[3]], color="gray", **line_kwargs)

# === LEGENDS + SAVE ===
ax_llama.legend(loc="upper left", prop={'size': font_size})
ax_qwen.legend(loc="upper left", prop={'size': font_size})

# baseline_fig.savefig("./pgfs/AttackPatternExtractorBase.pgf")

In [None]:
df = print_few_shot_table(attack_pattern_extractor_data, print_latex_help=True)

for col in df.columns:
    if "Recall" in str(col) or "Precision" in str(col):
        df = df.drop(col, axis=1)

df

In [None]:
font_size = 11
baseline_fig, ax_llama, ax_qwen = generate_plot_base(font_size=font_size)

llama_model_sizes = [1, 3, 8, 70]
qwen_model_sizes = [1.5, 3, 7, 72]

# === LLAMA BASELINE ===
llama_baseline_data = get_f1_in_order("llama", "Baseline", attack_pattern_extractor_data)

# Fill
ax_llama.fill_between(llama_model_sizes, 0, llama_baseline_data, color="gray", alpha=0.2)

# Points
ax_llama.plot(1, llama_baseline_data[0], marker="o", color="gray", label="BASE", **line_kwargs)
ax_llama.plot(3, llama_baseline_data[1], marker="o", color="gray", **line_kwargs)
ax_llama.plot(8, llama_baseline_data[2], marker="o", color="gray", **line_kwargs)
ax_llama.plot(70, llama_baseline_data[3], marker="o", color="gray", **line_kwargs)

# Lines
ax_llama.plot([1, 3], [llama_baseline_data[0], llama_baseline_data[1]], color="gray", **line_kwargs)
ax_llama.plot([3, 8], [llama_baseline_data[1], llama_baseline_data[2]], color="gray", **line_kwargs)
ax_llama.plot([8, 70], [llama_baseline_data[2], llama_baseline_data[3]], color="gray", **line_kwargs)



# === LLAMA ZERO-SHOT O1 ===
llama_zs_o1 = get_f1_in_order("llama", "ZERO-O1", data=attack_pattern_extractor_data)

# Points
ax_llama.plot(1, llama_zs_o1[0], marker="s", color="blue", label="ZS-O1", **line_kwargs)
# ax_llama.plot(3, llama_zs_o1[1], marker="s", color="blue", **line_kwargs)
ax_llama.plot(8, llama_zs_o1[2], marker="s", color="blue", **line_kwargs)
ax_llama.plot(70, llama_zs_o1[3], marker="s", color="blue", **line_kwargs)

# Lines
ax_llama.plot([1, 3], [llama_zs_o1[0], llama_zs_o1[1]], color="blue", **line_kwargs)
ax_llama.plot([3, 8], [llama_baseline_data[1], llama_zs_o1[2]], color="blue", **line_kwargs)
ax_llama.plot([8, 70], [llama_zs_o1[2], llama_zs_o1[3]], color="blue", **line_kwargs)


# === LLAMA ZERO-SHOT O2 ===
llama_zs_o2 = get_f1_in_order("llama", "ZERO-O2", data=attack_pattern_extractor_data)

# Points
# ax_llama.plot(1, llama_zs_o2[0], marker="^", color="red", label="ZS-O2", **line_kwargs)
# ax_llama.plot(3, llama_zs_o2[1], marker="^", color="red", **line_kwargs)
# ax_llama.plot(8, llama_zs_o2[2], marker="^", color="red", **line_kwargs)
ax_llama.plot(70, llama_zs_o2[3], marker="^", color="red", label="ZS-O2", **line_kwargs)

# Lines
# ax_llama.plot([1, 3], [llama_baseline_data[0], llama_zs_o2[1]], color="red", **line_kwargs)
# ax_llama.plot([3, 8], [llama_zs_o2[1], llama_zs_o2[2]], color="gray", **line_kwargs)
ax_llama.plot([8, 70], [llama_baseline_data[2], llama_zs_o2[3]], color="red", **line_kwargs)




# === LLAMA FEW-SHOT O1 ===
llama_fs_o1 = get_f1_in_order("llama", "FS-O1", data=attack_pattern_extractor_data)

# Points
# ax_llama.plot(1, llama_fs_o1[0], marker="p", color="purple", label="FS-O1", **line_kwargs)
# ax_llama.plot(3, llama_fs_o1[1], marker="p", color="purple", **line_kwargs)
# ax_llama.plot(8, llama_fs_o1[2], marker="p", color="purple", **line_kwargs)
ax_llama.plot(70, llama_fs_o1[3], marker="p", color="purple", label="FS-O1", **line_kwargs)

# Lines
# ax_llama.plot([1, 3], [llama_fs_o1[0], llama_fs_o1[1]], color="purple", **line_kwargs)
# ax_llama.plot([3, 8], [llama_fs_o1[1], llama_fs_o1[2]], color="purple", **line_kwargs)
ax_llama.plot([8, 70], [llama_baseline_data[2], llama_fs_o1[3]], color="purple", **line_kwargs)



# === LLAMA FEW-SHOT O2 ===
llama_fs_o2 = get_f1_in_order("llama", "FS-O2", data=attack_pattern_extractor_data)

# Points
ax_llama.plot(1, llama_fs_o2[0], marker="d", color="green", label="FS-O2", **line_kwargs)
ax_llama.plot(3, llama_fs_o2[1], marker="d", color="green", **line_kwargs)
ax_llama.plot(8, llama_fs_o2[2], marker="d", color="green", **line_kwargs)
ax_llama.plot(70, llama_fs_o2[3], marker="d", color="green", **line_kwargs)

# Lines
ax_llama.plot([1, 3], [llama_fs_o2[0], llama_fs_o2[1]], color="green", **line_kwargs)
ax_llama.plot([3, 8], [llama_fs_o2[1], llama_fs_o2[2]], color="green", **line_kwargs)
ax_llama.plot([8, 70], [llama_fs_o2[2], llama_fs_o2[3]], color="green", **line_kwargs)








# === QWEN BASELINE ===
qwen_baseline_data = get_f1_in_order("qwen", "Baseline", attack_pattern_extractor_data)

# Fill
ax_qwen.fill_between(qwen_model_sizes, 0, qwen_baseline_data, color="gray", alpha=0.2)

# Points
ax_qwen.plot(1.5, qwen_baseline_data[0], marker="o", color="gray", label="BASE", **line_kwargs)
ax_qwen.plot(3, qwen_baseline_data[1], marker="o", color="gray", **line_kwargs)
ax_qwen.plot(7, qwen_baseline_data[2], marker="o", color="gray", **line_kwargs)
ax_qwen.plot(72, qwen_baseline_data[3], marker="o", color="gray", **line_kwargs)

# Lines
ax_qwen.plot([1.5, 3], [qwen_baseline_data[0], qwen_baseline_data[1]], color="gray", **line_kwargs)
ax_qwen.plot([3, 7], [qwen_baseline_data[1], qwen_baseline_data[2]], color="gray", **line_kwargs)
ax_qwen.plot([7, 72], [qwen_baseline_data[2], qwen_baseline_data[3]], color="gray", **line_kwargs)


# === QWEN ZERO-SHOT O1 ===
qwen_zs_o1 = get_f1_in_order("qwen", "ZERO-O1", data=attack_pattern_extractor_data)

# ax_qwen.plot(1.5, qwen_zs_o1[0], marker="s", color="blue", label="ZS-O1", **line_kwargs)
# ax_qwen.plot(3, qwen_zs_o1[1], marker="s", color="blue", **line_kwargs)
# ax_qwen.plot(7, qwen_zs_o1[2], marker="s", color="blue", **line_kwargs)
ax_qwen.plot(72, qwen_zs_o1[3], marker="s", color="blue", label="ZS-O1", **line_kwargs)

# ax_qwen.plot([1.5, 3], [qwen_zs_o1[0], qwen_zs_o1[1]], color="blue", **line_kwargs)
# ax_qwen.plot([3, 7], [qwen_zs_o1[1], qwen_zs_o1[2]], color="blue", **line_kwargs)
ax_qwen.plot([7, 72], [qwen_baseline_data[2], qwen_zs_o1[3]], color="blue", **line_kwargs)


# === QWEN ZERO-SHOT O2 ===
qwen_zs_o2 = get_f1_in_order("qwen", "ZERO-O2", data=attack_pattern_extractor_data)

ax_qwen.plot(1.5, qwen_zs_o2[0], marker="^", color="red", label="ZS-O2", **line_kwargs)
ax_qwen.plot(3, qwen_zs_o2[1], marker="^", color="red", **line_kwargs)
# ax_qwen.plot(7, qwen_zs_o2[2], marker="^", color="red", **line_kwargs)
# ax_qwen.plot(72, qwen_zs_o2[3], marker="^", color="red", **line_kwargs)

ax_qwen.plot([1.5, 3], [qwen_zs_o2[0], qwen_zs_o2[1]], color="red", **line_kwargs)
ax_qwen.plot([3, 7], [qwen_zs_o2[1], qwen_zs_o2[2]], color="red", **line_kwargs)
# ax_qwen.plot([7, 72], [qwen_zs_o2[2], qwen_zs_o2[3]], color="red", **line_kwargs)



# === QWEN FEW-SHOT O1 ===
qwen_fs_o1 = get_f1_in_order("qwen", "FS-O1", data=attack_pattern_extractor_data)

ax_qwen.plot(1.5, qwen_fs_o1[0], marker="p", color="purple", label="FS-O1", **line_kwargs)
ax_qwen.plot(3, qwen_fs_o1[1], marker="p", color="purple", **line_kwargs)
# ax_qwen.plot(7, qwen_fs_o1[2], marker="p", color="purple", **line_kwargs)
ax_qwen.plot(72, qwen_fs_o1[3], marker="p", color="purple", **line_kwargs)

ax_qwen.plot([1.5, 3], [qwen_fs_o1[0], qwen_fs_o1[1]], color="purple", **line_kwargs)
ax_qwen.plot([3, 7], [qwen_fs_o1[1], qwen_baseline_data[2]], color="purple", **line_kwargs)
ax_qwen.plot([7, 72], [qwen_baseline_data[2], qwen_fs_o1[3]], color="purple", **line_kwargs)


# === QWEN FEW-SHOT O2 ===
qwen_fs_o2 = get_f1_in_order("qwen", "FS-O2", data=attack_pattern_extractor_data)

ax_qwen.plot(1.5, qwen_fs_o2[0], marker="d", color="green", label="FS-O2", **line_kwargs)
ax_qwen.plot(3, qwen_fs_o2[1], marker="d", color="green", **line_kwargs)
ax_qwen.plot(7, qwen_fs_o2[2], marker="d", color="green", **line_kwargs)
ax_qwen.plot(72, qwen_fs_o2[3], marker="d", color="green", **line_kwargs)

ax_qwen.plot([1.5, 3], [qwen_fs_o2[0], qwen_fs_o2[1]], color="green", **line_kwargs)
ax_qwen.plot([3, 7], [qwen_fs_o2[1], qwen_fs_o2[2]], color="green", **line_kwargs)
ax_qwen.plot([7, 72], [qwen_fs_o2[2], qwen_fs_o2[3]], color="green", **line_kwargs)



# === LEGENDS + SAVE ===
ax_llama.legend(loc="upper left", prop={'size': font_size})
ax_qwen.legend(loc="upper left", prop={'size': font_size})

# baseline_fig.savefig("./pgfs/AttackPatternExtractorFS.pgf")

# Targets Extractor

In [None]:
targets_extractor_data = utils.load_json_files("./Modules/TargetsExtractor/", ["Baseline", "ZERO-O1", "ZERO-O2", "FS-O1", "FS-O2"], models=["llama_3_2_1b_instruct", "llama_3_2_3b_instruct", "llama_3_1_8b_instruct", "llama_3_1_70b_instruct", "qwen_2_5_1p5b_instruct", "qwen_2_5_3b_instruct", "qwen_2_5_7b_instruct", "qwen_2_5_72b_instruct"])

In [None]:
print_baseline_table(targets_extractor_data, print_latex_help=True)

In [None]:
font_size = 11
baseline_fig, ax_llama, ax_qwen = generate_plot_base(font_size=font_size)

llama_model_sizes = [1, 3, 8, 70]
qwen_model_sizes = [1.5, 3, 7, 72]

# === LLAMA BASELINE ===
llama_baseline_data = get_f1_in_order("llama", "Baseline", targets_extractor_data)

# Fill
ax_llama.fill_between(llama_model_sizes, 0, llama_baseline_data, color="gray", alpha=0.2)

# Points
ax_llama.plot(1, llama_baseline_data[0], marker="o", color="gray", label="BASE", **line_kwargs)
ax_llama.plot(3, llama_baseline_data[1], marker="o", color="gray", **line_kwargs)
ax_llama.plot(8, llama_baseline_data[2], marker="o", color="gray", **line_kwargs)
ax_llama.plot(70, llama_baseline_data[3], marker="o", color="gray", **line_kwargs)

# Lines
ax_llama.plot([1, 3], [llama_baseline_data[0], llama_baseline_data[1]], color="gray", **line_kwargs)
ax_llama.plot([3, 8], [llama_baseline_data[1], llama_baseline_data[2]], color="gray", **line_kwargs)
ax_llama.plot([8, 70], [llama_baseline_data[2], llama_baseline_data[3]], color="gray", **line_kwargs)



# === QWEN BASELINE ===
qwen_baseline_data = get_f1_in_order("qwen", "Baseline", targets_extractor_data)

# Fill
ax_qwen.fill_between(qwen_model_sizes, 0, qwen_baseline_data, color="gray", alpha=0.2)

# Points
ax_qwen.plot(1.5, qwen_baseline_data[0], marker="o", color="gray", label="BASE", **line_kwargs)
ax_qwen.plot(3, qwen_baseline_data[1], marker="o", color="gray", **line_kwargs)
ax_qwen.plot(7, qwen_baseline_data[2], marker="o", color="gray", **line_kwargs)
ax_qwen.plot(72, qwen_baseline_data[3], marker="o", color="gray", **line_kwargs)

# Lines
ax_qwen.plot([1.5, 3], [qwen_baseline_data[0], qwen_baseline_data[1]], color="gray", **line_kwargs)
ax_qwen.plot([3, 7], [qwen_baseline_data[1], qwen_baseline_data[2]], color="gray", **line_kwargs)
ax_qwen.plot([7, 72], [qwen_baseline_data[2], qwen_baseline_data[3]], color="gray", **line_kwargs)

# === LEGENDS + SAVE ===
ax_llama.legend(loc="upper left", prop={'size': font_size})
ax_qwen.legend(loc="upper left", prop={'size': font_size})

# baseline_fig.savefig("./pgfs/TargetsExtractorBase.pgf")

In [None]:
df = print_few_shot_table(targets_extractor_data, print_latex_help=True)

for col in df.columns:
    if "Recall" in str(col) or "Precision" in str(col):
        df = df.drop(col, axis=1)

df

In [None]:
font_size = 11
baseline_fig, ax_llama, ax_qwen = generate_plot_base(font_size=font_size)

llama_model_sizes = [1, 3, 8, 70]
qwen_model_sizes = [1.5, 3, 7, 72]

# === LLAMA BASELINE ===
llama_baseline_data = get_f1_in_order("llama", "Baseline", targets_extractor_data)

# Fill
ax_llama.fill_between(llama_model_sizes, 0, llama_baseline_data, color="gray", alpha=0.2)

# Points
ax_llama.plot(1, llama_baseline_data[0], marker="o", color="gray", label="BASE", **line_kwargs)
ax_llama.plot(3, llama_baseline_data[1], marker="o", color="gray", **line_kwargs)
ax_llama.plot(8, llama_baseline_data[2], marker="o", color="gray", **line_kwargs)
ax_llama.plot(70, llama_baseline_data[3], marker="o", color="gray", **line_kwargs)

# Lines
ax_llama.plot([1, 3], [llama_baseline_data[0], llama_baseline_data[1]], color="gray", **line_kwargs)
ax_llama.plot([3, 8], [llama_baseline_data[1], llama_baseline_data[2]], color="gray", **line_kwargs)
ax_llama.plot([8, 70], [llama_baseline_data[2], llama_baseline_data[3]], color="gray", **line_kwargs)


# === LLAMA ZERO-SHOT O1 ===
llama_zs_o1 = get_f1_in_order("llama", "ZERO-O1", data=targets_extractor_data)

# Points
# ax_llama.plot(1, llama_zs_o1[0], marker="s", color="blue", label="ZS-O1", **line_kwargs)
ax_llama.plot(3, llama_zs_o1[1], marker="s", color="blue", label="ZS-O1", **line_kwargs)
# ax_llama.plot(8, llama_zs_o1[2], marker="s", color="blue", **line_kwargs)
ax_llama.plot(70, llama_zs_o1[3], marker="s", color="blue", **line_kwargs)

# Lines
ax_llama.plot([1, 3], [llama_baseline_data[0], llama_zs_o1[1]], color="blue", **line_kwargs)
ax_llama.plot([3, 8], [llama_zs_o1[1], llama_baseline_data[2]], color="blue", **line_kwargs)
ax_llama.plot([8, 70], [llama_baseline_data[2], llama_zs_o1[3]], color="blue", **line_kwargs)



# === LLAMA ZERO-SHOT O2 ===
llama_zs_o2 = get_f1_in_order("llama", "ZERO-O2", data=targets_extractor_data)

# Points
# ax_llama.plot(1, llama_zs_o2[0], marker="^", color="red", label="ZS-O2", **line_kwargs)
ax_llama.plot(3, llama_zs_o2[1], marker="^", color="red", label="ZS-O2", **line_kwargs)
ax_llama.plot(8, llama_zs_o2[2], marker="^", color="red", **line_kwargs)
ax_llama.plot(70, llama_zs_o2[3], marker="^", color="red", **line_kwargs)

# Lines
ax_llama.plot([1, 3], [llama_baseline_data[0], llama_zs_o2[1]], color="red", **line_kwargs)
ax_llama.plot([3, 8], [llama_zs_o2[1], llama_zs_o2[2]], color="red", **line_kwargs)
ax_llama.plot([8, 70], [llama_zs_o2[2], llama_zs_o2[3]], color="red", **line_kwargs)


# === LLAMA FEW-SHOT O1 ===
llama_fs_o1 = get_f1_in_order("llama", "FS-O1", data=targets_extractor_data)

# Points
ax_llama.plot(1, llama_fs_o1[0], marker="p", color="purple", label="FS-O1", **line_kwargs)
ax_llama.plot(3, llama_fs_o1[1], marker="p", color="purple", **line_kwargs)
ax_llama.plot(8, llama_fs_o1[2], marker="p", color="purple", **line_kwargs)
ax_llama.plot(70, llama_fs_o1[3], marker="p", color="purple", **line_kwargs)

# Lines
ax_llama.plot([1, 3], [llama_fs_o1[0], llama_fs_o1[1]], color="purple", **line_kwargs)
ax_llama.plot([3, 8], [llama_fs_o1[1], llama_fs_o1[2]], color="purple", **line_kwargs)
ax_llama.plot([8, 70], [llama_fs_o1[2], llama_fs_o1[3]], color="purple", **line_kwargs)



# === LLAMA FEW-SHOT O2 ===
llama_fs_o2 = get_f1_in_order("llama", "FS-O2", data=targets_extractor_data)

# Points
ax_llama.plot(1, llama_fs_o2[0], marker="d", color="green", label="FS-O2", **line_kwargs)
ax_llama.plot(3, llama_fs_o2[1], marker="d", color="green", **line_kwargs)
ax_llama.plot(8, llama_fs_o2[2], marker="d", color="green", **line_kwargs)
ax_llama.plot(70, llama_fs_o2[3], marker="d", color="green", **line_kwargs)

# Lines
ax_llama.plot([1, 3], [llama_fs_o2[0], llama_fs_o2[1]], color="green", **line_kwargs)
ax_llama.plot([3, 8], [llama_fs_o2[1], llama_fs_o2[2]], color="green", **line_kwargs)
ax_llama.plot([8, 70], [llama_fs_o2[2], llama_fs_o2[3]], color="green", **line_kwargs)



# === QWEN BASELINE ===
qwen_baseline_data = get_f1_in_order("qwen", "Baseline", targets_extractor_data)

# Fill
ax_qwen.fill_between(qwen_model_sizes, 0, qwen_baseline_data, color="gray", alpha=0.2)

# Points
ax_qwen.plot(1.5, qwen_baseline_data[0], marker="o", color="gray", label="BASE", **line_kwargs)
ax_qwen.plot(3, qwen_baseline_data[1], marker="o", color="gray", **line_kwargs)
ax_qwen.plot(7, qwen_baseline_data[2], marker="o", color="gray", **line_kwargs)
ax_qwen.plot(72, qwen_baseline_data[3], marker="o", color="gray", **line_kwargs)

# Lines
ax_qwen.plot([1.5, 3], [qwen_baseline_data[0], qwen_baseline_data[1]], color="gray", **line_kwargs)
ax_qwen.plot([3, 7], [qwen_baseline_data[1], qwen_baseline_data[2]], color="gray", **line_kwargs)
ax_qwen.plot([7, 72], [qwen_baseline_data[2], qwen_baseline_data[3]], color="gray", **line_kwargs)



# === QWEN ZERO-SHOT O1 ===
qwen_zs_o1 = get_f1_in_order("qwen", "ZERO-O1", data=targets_extractor_data)

ax_qwen.plot(1.5, qwen_zs_o1[0], marker="s", color="blue", label="ZS-O1", **line_kwargs)
ax_qwen.plot(3, qwen_zs_o1[1], marker="s", color="blue", **line_kwargs)
ax_qwen.plot(7, qwen_zs_o1[2], marker="s", color="blue", **line_kwargs)
# ax_qwen.plot(72, qwen_zs_o1[3], marker="s", color="blue", **line_kwargs)

ax_qwen.plot([1.5, 3], [qwen_zs_o1[0], qwen_zs_o1[1]], color="blue", **line_kwargs)
ax_qwen.plot([3, 7], [qwen_zs_o1[1], qwen_zs_o1[2]], color="blue", **line_kwargs)
ax_qwen.plot([7, 72], [qwen_zs_o1[2], qwen_baseline_data[3]], color="blue", **line_kwargs)


# === QWEN ZERO-SHOT O2 ===
qwen_zs_o2 = get_f1_in_order("qwen", "ZERO-O2", data=targets_extractor_data)

ax_qwen.plot(1.5, qwen_zs_o2[0], marker="^", color="red", label="ZS-O2", **line_kwargs)
ax_qwen.plot(3, qwen_zs_o2[1], marker="^", color="red", **line_kwargs)
ax_qwen.plot(7, qwen_zs_o2[2], marker="^", color="red", **line_kwargs)
ax_qwen.plot(72, qwen_zs_o2[3], marker="^", color="red", **line_kwargs)

ax_qwen.plot([1.5, 3], [qwen_zs_o2[0], qwen_zs_o2[1]], color="red", **line_kwargs)
ax_qwen.plot([3, 7], [qwen_zs_o2[1], qwen_zs_o2[2]], color="red", **line_kwargs)
ax_qwen.plot([7, 72], [qwen_zs_o2[2], qwen_zs_o2[3]], color="red", **line_kwargs)



# === QWEN FEW-SHOT O1 ===
qwen_fs_o1 = get_f1_in_order("qwen", "FS-O1", data=targets_extractor_data)

ax_qwen.plot(1.5, qwen_fs_o1[0], marker="p", color="purple", label="FS-O1", **line_kwargs)
ax_qwen.plot(3, qwen_fs_o1[1], marker="p", color="purple", **line_kwargs)
ax_qwen.plot(7, qwen_fs_o1[2], marker="p", color="purple", **line_kwargs)
ax_qwen.plot(72, qwen_fs_o1[3], marker="p", color="purple", **line_kwargs)

ax_qwen.plot([1.5, 3], [qwen_fs_o1[0], qwen_fs_o1[1]], color="purple", **line_kwargs)
ax_qwen.plot([3, 7], [qwen_fs_o1[1], qwen_fs_o1[2]], color="purple", **line_kwargs)
ax_qwen.plot([7, 72], [qwen_fs_o1[2], qwen_fs_o1[3]], color="purple", **line_kwargs)



# === QWEN FEW-SHOT O2 ===
qwen_fs_o2 = get_f1_in_order("qwen", "FS-O2", data=targets_extractor_data)

ax_qwen.plot(1.5, qwen_fs_o2[0], marker="d", color="green", label="FS-O2", **line_kwargs)
ax_qwen.plot(3, qwen_fs_o2[1], marker="d", color="green", **line_kwargs)
ax_qwen.plot(7, qwen_fs_o2[2], marker="d", color="green", **line_kwargs)
ax_qwen.plot(72, qwen_fs_o2[3], marker="d", color="green", **line_kwargs)

ax_qwen.plot([1.5, 3], [qwen_fs_o2[0], qwen_fs_o2[1]], color="green", **line_kwargs)
ax_qwen.plot([3, 7], [qwen_fs_o2[1], qwen_fs_o2[2]], color="green", **line_kwargs)
ax_qwen.plot([7, 72], [qwen_fs_o2[2], qwen_fs_o2[3]], color="green", **line_kwargs)


# === LEGENDS + SAVE ===
ax_llama.legend(loc="upper left", prop={'size': font_size})
ax_qwen.legend(loc="upper left", prop={'size': font_size})

# baseline_fig.savefig("./pgfs/TargetsExtractorFS.pgf")

In [None]:
import os 
import json 


def load_retry_stats_files(base_path, categories, models):
    data = {}

    for model in models: 
        data[model] = dict() 

        data[model]["Baseline"] = dict()
        data[model]["ZERO-O1"] = dict()
        data[model]["ZERO-O2"] = dict()
        data[model]["FS-O1"] = dict()
        data[model]["FS-O2"] = dict()

    for category in categories: 
        for model in models: 
            # retry_stats
            file_path = os.path.join(base_path, category, f"{model}_retry_stats.json")
            with open(file_path, "r") as fp:
                data[model][category] = json.load(fp)
                
    return data

In [None]:
def print_retry_stats_table(data: dict, print_latex_help: bool = False) -> pd.DataFrame:
    table_data = []


    def calc_retry_table(retry_stats: typing.List[dict]):
        total_failed, total_finished, finished_with_retries = 0, 0, [] 

        for retry_stat in retry_stats:
            if retry_stat["finished"]:
                total_finished += 1
                finished_with_retries.append(retry_stat["retries"] + 1)
            else:
                total_failed += 1
        
        assert len(finished_with_retries) == total_finished

        avg_retries = utils.calc_avg(finished_with_retries, 2)
        return total_failed, total_finished, avg_retries


    for model, values in data.items():
        baseline_retry_stats = values["Baseline"]
        zs_o1_retry_stats = values["ZERO-O1"]
        zs_o2_retry_stats = values["ZERO-O2"]
        fs_o1_retry_stats = values["FS-O1"]
        fs_o2_retry_stats = values["FS-O2"]


        baseline_total_failed, baseline_total_finished, baseline_avg_retries = calc_retry_table(baseline_retry_stats)
        zs_o1_total_failed, zs_o1_total_finished, zs_o1_avg_retries = calc_retry_table(zs_o1_retry_stats)
        zs_o2_total_failed, zs_o2_total_finished, zs_o2_avg_retries = calc_retry_table(zs_o2_retry_stats)
        fs_o1_total_failed, fs_o1_total_finished, fs_o1_avg_retries = calc_retry_table(fs_o1_retry_stats)
        fs_o2_total_failed, fs_o2_total_finished, fs_o2_avg_retries = calc_retry_table(fs_o2_retry_stats)

        table_data.append([model, baseline_total_failed, baseline_total_finished, baseline_avg_retries, 
                           zs_o1_total_failed, zs_o1_total_finished, zs_o1_avg_retries,
                           zs_o2_total_failed, zs_o2_total_finished, zs_o2_avg_retries,
                           fs_o1_total_failed, fs_o1_total_finished, fs_o1_avg_retries,
                           fs_o2_total_failed, fs_o2_total_finished, fs_o2_avg_retries])


        if print_latex_help: 
            models_str = {"llama_3_2_1b_instruct": "Llama 3.2 1b instruct", "llama_3_2_3b_instruct": "Llama 3.2 3b instruct", "llama_3_1_8b_instruct": "Llama 3.1 8b instruct", "llama_3_1_70b_instruct": "Llama 3.1 70b instruct", "qwen_2_5_1p5b_instruct": "Qwen 2.5 1.5b instruct", "qwen_2_5_3b_instruct": "Qwen 2.5 3b instruct", "qwen_2_5_7b_instruct": "Qwen 2.5 7b instruct", "qwen_2_5_72b_instruct": "Qwen 2.5 72b instruct"}
            print(models_str[model], "&", 
                  baseline_total_failed, "&", baseline_total_finished, "&", baseline_avg_retries, "&", 
                  
                  
                  
                  fs_o1_total_failed, "&", fs_o1_total_finished, "&", fs_o1_avg_retries, "&", 
                  fs_o2_total_failed, "&", fs_o2_total_finished, "&", fs_o2_avg_retries, "&",
                  
                  zs_o1_total_failed, "&", zs_o1_total_finished, "&", zs_o1_avg_retries, "&", 
                  zs_o2_total_failed, "&", zs_o2_total_finished, "&", zs_o2_avg_retries, "\\\\", )


    df = pd.DataFrame(table_data, columns=["Model", "base_total_failed", "base_total_finished", "base_avg_retries",
                                              "zs_o1_total_failed", "zs_o1_total_finished", "zs_o1_avg_retries",
                                              "zs_o2_total_failed", "zs_o2_total_finished", "zs_o2_avg_retries",
                                              "fs_o1_total_failed", "fs_o1_total_finished", "fs_o1_avg_retries",
                                              "fs_o2_total_failed", "fs_o2_total_finished", "fs_o2_avg_retries"])

    
    df.style.set_properties(**{'text-align': 'left'}).set_table_styles(
        [{'selector': 'th', 'props': [('text-align', 'center')]}]
    )
    

    return df 

# Attack Pattern Retry Stats

In [None]:
retry_stats = load_retry_stats_files("./Modules/AttackPatternExtractor/", categories=["Baseline", "ZERO-O1", "ZERO-O2", "FS-O1", "FS-O2"], models=["llama_3_2_1b_instruct", "llama_3_2_3b_instruct", "llama_3_1_8b_instruct", "llama_3_1_70b_instruct", "qwen_2_5_1p5b_instruct", "qwen_2_5_3b_instruct", "qwen_2_5_7b_instruct", "qwen_2_5_72b_instruct"])

print_retry_stats_table(retry_stats, print_latex_help=True)

# Targets Extractor Retry Stats

In [None]:
retry_stats = load_retry_stats_files("./Modules/TargetsExtractor/", categories=["Baseline", "ZERO-O1", "ZERO-O2", "FS-O1", "FS-O2"], models=["llama_3_2_1b_instruct", "llama_3_2_3b_instruct", "llama_3_1_8b_instruct", "llama_3_1_70b_instruct", "qwen_2_5_1p5b_instruct", "qwen_2_5_3b_instruct", "qwen_2_5_7b_instruct", "qwen_2_5_72b_instruct"])

print_retry_stats_table(retry_stats, print_latex_help=True)