In [1]:
# Imports
import polars as pl
import os
from sklearn.model_selection import train_test_split
from pathlib import Path
from json import load as json_load
from dotenv import load_dotenv

In [2]:
# Load environment variables
load_dotenv()

# Get the directory of the current file
__dir__ = Path(os.path.abspath(""))
"""
The directory of the current file
"""

# Create the output directory
OUTPUT_DIRECTORY = __dir__ / "../data/notebooks/prepare-datasets"
(OUTPUT_DIRECTORY / "ahsanayub-malicious-prompts").mkdir(parents=True, exist_ok=True)
(OUTPUT_DIRECTORY / "jayavibhav-prompt-injection-safety").mkdir(parents=True, exist_ok=True)
(OUTPUT_DIRECTORY / "synthetic-benchmark").mkdir(parents=True, exist_ok=True)

In [3]:
# Load https://huggingface.co/datasets/ahsanayub/malicious-prompts
ahsanayub_train_df = pl.read_csv("hf://datasets/ahsanayub/malicious-prompts/train.csv", missing_utf8_is_empty_string=True)
ahsanayub_test_df = pl.read_csv("hf://datasets/ahsanayub/malicious-prompts/test.csv", missing_utf8_is_empty_string=True)

# Save the datasets
ahsanayub_train_df.write_parquet(OUTPUT_DIRECTORY / "ahsanayub-malicious-prompts/train.parquet")
ahsanayub_test_df.write_parquet(OUTPUT_DIRECTORY / "ahsanayub-malicious-prompts/test.parquet")

In [4]:
# Load https://huggingface.co/datasets/jayavibhav/prompt-injection-safety
jayavibhav_train_df = pl.read_parquet("hf://datasets/jayavibhav/prompt-injection-safety/data/train-00000-of-00001.parquet")
jayavibhav_test_df = pl.read_parquet("hf://datasets/jayavibhav/prompt-injection-safety/data/test-00000-of-00001.parquet")

def map_label(raw_label: int) -> int:
  """
  Map the raw label to binary label.
  """
  
  if raw_label == 0:
    return 0
  else:
    return 1

jayavibhav_train_df = jayavibhav_train_df.with_columns(
  pl.col("label").map_elements(map_label)
)
jayavibhav_test_df = jayavibhav_test_df.with_columns(
  pl.col("label").map_elements(map_label)
)

# Save the datasets
jayavibhav_train_df.write_parquet(OUTPUT_DIRECTORY / "jayavibhav-prompt-injection-safety/train.parquet")
jayavibhav_test_df.write_parquet(OUTPUT_DIRECTORY / "jayavibhav-prompt-injection-safety/test.parquet")

In [None]:
# Load synthetic dataset dataset
with open(
    __dir__ / "../data/synthetic-dataset/aggregated.json", "r", encoding="utf-8"
) as aggregated_file:
    aggregated_synthetic_dataset = json_load(aggregated_file)

# Label the messages
synthetic_data = [
    (f"{message["role"].capitalize()}: {message["content"]}", 0)
    for aggregated_idea in aggregated_synthetic_dataset["ideas"]
    for benign_conversation in aggregated_idea["benign_conversations"]
    for message in benign_conversation["messages"]
] + [
    (f"{message["role"].capitalize()}: {message["content"]}", 0)
    for aggregated_idea in aggregated_synthetic_dataset["ideas"]
    for malicious_goal in aggregated_idea["malicious_goals"]
    for malicious_conversation in malicious_goal["malicious_conversations"]
    for message in malicious_conversation["conversation"]["messages"]
]

# Split the dataset
synthetic_train_split, synthetic_test_split = train_test_split(
    synthetic_data, test_size=0.2, shuffle=True, random_state=0
)

# Create the dataframes
synthetic_train_df = pl.DataFrame(synthetic_train_split, schema=["text", "label"], orient="row")
synthetic_test_df = pl.DataFrame(synthetic_test_split, schema=["text", "label"], orient="row")

# Save the datasets
synthetic_train_df.write_parquet(OUTPUT_DIRECTORY / "synthetic-dataset/train.parquet")
synthetic_test_df.write_parquet(OUTPUT_DIRECTORY / "synthetic-dataset/test.parquet")