# Prompt Injection

## 0) Prereqs

In [15]:
import numpy as np
import plotly.graph_objects as go
import plotly
from plotly.subplots import make_subplots

## 1) Load results
use the OTT notebook to produce the results (only considering true demos and the prompt injection prompt)

In [16]:
model_names = [
    "gpt_neo_125M",
    "gpt_neo_1.3B",
    "gpt_neo_2.7B",
    "opt_1.3B",
    "pythia_13B_deduped",
    "bloom_560M",
]

dataset_names = [
    "sick",
]

In [17]:
metrics_fs = {}
for model in model_names:
    metrics_fs[model] = {}
    for dataset in dataset_names:
        metrics_fs[model][dataset] = np.load(
            f"../results/prompt_injection/{model}/{dataset}_few_shot.npy",
            allow_pickle=True,
        ).item()

In [18]:
metrics_pi = {}
for model in model_names:
    metrics_pi[model] = {}
    for dataset in dataset_names:
        metrics_pi[model][dataset] = np.load(
            f"../results/prompt_injection/{model}/{dataset}.npy",
            allow_pickle=True,
        ).item()

In [19]:
dataset_name = "sick"
# metric = "cal_correct_over_incorrect"
metric = "top_1_acc"

model_names = [
    "gpt_neo_1.3B",
    "gpt_neo_2.7B",
    "gpt_neo_125M",
    "opt_1.3B",
    "pythia_13B_deduped",
    "bloom_560M",
]

true_or_false = [0, 1]
coordinates = [(1, 1), (1, 2), (1, 3), (2, 1), (2, 2), (2, 3)]
colors = ["green", "red"]

In [20]:
fig = make_subplots(
    rows=2,
    cols=3,
    subplot_titles=(
        "Neo (1.3B)",
        "Neo (2.7B)",
        "Neo (125M)",
        "OPT (1.3B)",
        "Pythia (12B)",
        "BLOOM (560M)",
    ),
    y_title="Top-1 Accuracy" if metric == "top_1_acc" else "Calibrated Accuracy",
    x_title="Layer",
    vertical_spacing=0.15,
)

for model, coord in zip(model_names, coordinates):
    show_legend = True if coord == (1, 1) else False
    fs = metrics_fs[model][dataset_name][metric]
    pi = metrics_pi[model][dataset_name][metric]
    x = list(range(len(pi)))
    fig.add_trace(
        go.Scatter(
            x=x,
            y=fs,
            marker_color="green",
            mode="lines",
            name="Few-Shot",
            showlegend=show_legend,
        ),
        row=coord[0],
        col=coord[1],
    )
    fig.add_trace(
        go.Scatter(
            x=x,
            y=pi,
            marker_color="red",
            mode="lines",
            name="Prompt Injection",
            showlegend=show_legend,
        ),
        row=coord[0],
        col=coord[1],
    )
    
    if metric == "cal_correct_over_incorrect":
        baseline = [0.33] * len(x)
    else:
        baseline = [1/50400] * len(x)
    fig.add_trace(
        go.Scatter(
            x=x,
            y=baseline,
            marker_color="black",
            mode="lines",
            line={"dash": "dash"},
            name="Random<br>Baseline",
            showlegend=show_legend,
        ),
        row=coord[0],
        col=coord[1],
    )


In [21]:
fig.update_layout(
    title="Prompt Injection (SICK)",
)

fig.show()

plotly.io.write_image(fig, "pi_sick.pdf")