In [1]:
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

tasks = ['clean','fgsm','pgd','apgd','square']

dataset = pd.read_csv('/pfs/work7/workspace/scratch/ma_fknuette-project_GRANDE/data/result/imageNet/dataGeneral.csv')
dataset = dataset.drop(columns=['isomorphTo'])

In [2]:
from IPython import embed
import os
import json

# Single Objectives
for task in tasks:
    dataset_task = dataset
    dataset_task = dataset_task[[col for col in dataset_task.columns if col == task or col not in tasks]]
    X = dataset_task.drop(columns=[task])
    y = dataset_task[task]

    # Split the dataset
    X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    X_train, X_valid, y_train, y_valid = train_test_split(X_temp, y_temp, test_size=0.2, random_state=42)

    # To numpy
    X_train = X_train.to_numpy()
    X_valid = X_valid.to_numpy()
    X_test = X_test.to_numpy()
    y_train = y_train.to_numpy()
    y_valid = y_valid.to_numpy()
    y_test = y_test.to_numpy()

    # Create a folder for the task if it doesn't exist
    task_folder = f"./{task}"
    os.makedirs(task_folder, exist_ok=True)

    # Save numpy arrays to the folder
    np.save(os.path.join(task_folder, "N_train.npy"), X_train)
    np.save(os.path.join(task_folder, "N_val.npy"), X_valid)
    np.save(os.path.join(task_folder, "N_test.npy"), X_test)
    np.save(os.path.join(task_folder, "y_train.npy"), y_train)
    np.save(os.path.join(task_folder, "y_val.npy"), y_valid)
    np.save(os.path.join(task_folder, "y_test.npy"), y_test)
    
    # Create a dictionary with information about the dataset
    info = {
        "task_type": 'regression',
        "n_num_features": 14,
        "n_cat_features": 0,
        "train_size": X_train.shape[0],
        "val_size": X_valid.shape[0],
        "test_size": X_test.shape[0],
        "num_feature_intro": {
            feature: feature for feature in X.columns
        }
    }

    # Save the dictionary as a JSON file
    with open(os.path.join(task_folder, "info.json"), "w") as f:
        json.dump(info, f, indent=4)
    

In [3]:
# Multi Objectives
for attack in tasks:
    if attack == 'clean':
        continue  # Skip pairing clean with itself

    # Create a new dataset for the multi-objective task
    dataset_multi = dataset
    dataset_multi = dataset_multi[[col for col in dataset_multi.columns if col in ['clean', attack] or col not in tasks]]
    X = dataset_multi.drop(columns=['clean', attack])
    y = dataset_multi[['clean', attack]]

    # Split the dataset
    X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    X_train, X_valid, y_train, y_valid = train_test_split(X_temp, y_temp, test_size=0.2, random_state=42)

    # To numpy
    X_train = X_train.to_numpy()
    X_valid = X_valid.to_numpy()
    X_test = X_test.to_numpy()
    y_train = y_train.to_numpy()
    y_valid = y_valid.to_numpy()
    y_test = y_test.to_numpy()

    # Create a folder for the multi-objective task if it doesn't exist
    task_folder = os.path.join("..", "multi", f"clean_{attack}")
    os.makedirs(task_folder, exist_ok=True)
    

    # Save numpy arrays to the folder
    np.save(os.path.join(task_folder, "N_train.npy"), X_train)
    np.save(os.path.join(task_folder, "N_val.npy"), X_valid)
    np.save(os.path.join(task_folder, "N_test.npy"), X_test)
    np.save(os.path.join(task_folder, "y_train.npy"), y_train)
    np.save(os.path.join(task_folder, "y_val.npy"), y_valid)
    np.save(os.path.join(task_folder, "y_test.npy"), y_test)
    
    # Create a dictionary with information about the dataset
    info = {
        "task_type": 'regression',
        "n_num_features": 14,
        "n_cat_features": 0,
        "train_size": X_train.shape[0],
        "val_size": X_valid.shape[0],
        "test_size": X_test.shape[0],
        "num_feature_intro": {
            feature: feature for feature in X.columns
        }
    }

    # Save the dictionary as a JSON file
    with open(os.path.join(task_folder, "info.json"), "w") as f:
        json.dump(info, f, indent=4)