## Task 3: Model Training

In [1]:
import wandb
import os
os.environ["WANDB_NOTEBOOK_NAME"] = "model_training.ipynb"
# wandb.login()

In [2]:
import torch
from torch import optim, nn
from tqdm import tqdm

from models import run_pytorch
from data import get_datasets

from models.pytorch.mlp import MLP
from models.pytorch.tab_transformer import TabTransformer
from models.pytorch.ft_transformer import FTTransformer
from models.jax.logistic_regression import LogisticRegression


[34m[1mwandb[0m: Currently logged in as: [33mzhipeng-he[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [3]:
def model_config(model, input_dim, output_dim, categories_list, numerical_cols, device):
    if model == "MLP":
        model_config = {
            "input_dim": input_dim,
            "output_dim": output_dim,
            "num_hidden_layers": 2,
            "hidden_layer_dims": [64, 32],
            "dropout": 0.2,
            "categories": categories_list,
            "embedding_dim": 8,
            "num_categorical_feature": len(categories_list),
            "num_numerical_feature": len(numerical_cols),
        }
        train_config = {
            "epochs": 20,
            "batch_size": 512,
            "learning_rate": 1e-3,
            "model": "MLP",
            "dropout": 0.2,
        }
        return MLP(**model_config).to(device), train_config
    
    elif model == "TabTransformer":
        model_config = {
            "categories": categories_list,
            "num_continuous": len(numerical_cols),
            "dim": 8, # can sweep
            "dim_out": output_dim,
            "depth": 6,
            "heads": 8,
            "attn_dropout": 0.2,
            "ff_dropout": 0.2,
            "mlp_hidden_mults": (4, 2), 
            "mlp_act": nn.ReLU(),
            "continuous_mean_std": None,
        }

        train_config = {
            "epochs": 20,
            "batch_size": 128,
            "learning_rate": 1e-3,
            "model": "TabTransformer",
        }
        return TabTransformer(**model_config).to(device), train_config

    elif model == "FTTransformer":
        model_config = {
            "categories": categories_list,
            "num_continuous": len(numerical_cols),
            "dim": 8, 
            "dim_out": output_dim,
            "depth": 6,
            "heads": 8, 
            "attn_dropout": 0.2, 
            "ff_dropout": 0.2, 
        }

        train_config = {
            "epochs": 20,
            "batch_size": 128,
            "learning_rate": 1e-3,
            "model": "FTTransformer",
        }
        return FTTransformer(**model_config).to(device), train_config

Run main

In [4]:
wandb_run = False

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")

for dataset_name in ["Adult"]: # "Adult", "Electricity", "Higgs", "KDDCup09_appetency", "Mushroom"
    X_train, y_train, X_val, y_val, X_test, y_test, \
        X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor, X_val_tensor, y_val_tensor, \
        info = get_datasets.get_split(dataset_name, device)
    for model_name in ["FTTransformer"]:
        model, train_config = model_config(model_name, X_train.shape[1], 2, info.num_categories_list, info.numerical_cols, device)
        train_config["dataset"] = dataset_name
        criterion = nn.CrossEntropyLoss()
        optimizer = run_pytorch.build_optimizer(model, "adam", train_config["learning_rate"])

        config = {**train_config}

        if wandb_run:
            with wandb.init(project="TabAttackBench-ModelTraining", config=config):
                run_pytorch.train(model, (X_train_tensor, y_train_tensor), (X_val_tensor, y_val_tensor), criterion, optimizer, train_config, wandb_run=wandb.run)
                # and test its final performance
                run_pytorch.test(model, (X_test_tensor, y_test_tensor), train_config, stage="train", wandb_run=wandb.run)
        else:
            run_pytorch.train(model, (X_train_tensor, y_train_tensor), (X_val_tensor, y_val_tensor), criterion, optimizer, train_config)
            run_pytorch.test(model, (X_test_tensor, y_test_tensor), train_config, stage="train")


100%|██████████| 20/20 [02:13<00:00,  6.70s/it]


OutOfMemoryError: CUDA out of memory. Tried to allocate 2.18 GiB (GPU 0; 8.00 GiB total capacity; 4.31 GiB already allocated; 0 bytes free; 6.52 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
X_train, y_train, X_val, y_val, X_test, y_test, \
        X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor, X_val_tensor, y_val_tensor, \
        info = get_datasets.get_split("Adult", device)

In [None]:
info.ohe_feature_names

In [None]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# X_train, y_train, X_val, y_val, X_test, y_test, \
#         X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor, X_val_tensor, y_val_tensor, \
#         info = get_datasets.get_dataset("Higgs", device)



In [None]:
y_val.sum()

In [None]:
epsilon_values = np.arange(0.01, 0.21, 0.03)
epsilon_values

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.plot(epsilon_values, attack_success_rates, marker='o', linestyle='-')
plt.title('Success Rate vs. Epsilon')
plt.xlabel('Epsilon')
plt.ylabel('Success Rate')
plt.grid(True)
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(epsilon_values, average_l2_distances, marker='o', linestyle='-')
plt.title('L2 Distance vs. Epsilon')
plt.xlabel('Epsilon')
plt.ylabel('L2 Distance')
plt.grid(True)
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(average_l2_distances, attack_success_rates, marker='o', linestyle='-')
plt.title('L2 Distance vs. Success Rate')
plt.xlabel('L2 Distance')
plt.ylabel('Success Rate')
plt.grid(True)
plt.show()