## Task 3: Model Training

In [1]:
import wandb
import os
os.environ["WANDB_NOTEBOOK_NAME"] = "model_training.ipynb"
# wandb.login()

In [2]:
import torch
from torch import optim, nn
from tqdm import tqdm

from models import run_pytorch
from data import get_datasets

from models.pytorch.mlp import MLP
from models.pytorch.tab_transformer import TabTransformer
from models.pytorch.ft_transformer import FTTransformer
from models.pytorch.logistic_regression import LogisticRegression


[34m[1mwandb[0m: Currently logged in as: [33mzhipeng-he[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [3]:
def model_config(model, input_dim, output_dim, categories_list, numerical_cols, device):
    if model == "LogisticRegression":
        model_config = {
            "input_dim": input_dim,
            "output_dim": output_dim,
        }
        train_config = {
            "epochs": 20,
            "batch_size": 512,
            "learning_rate": 1e-3,
            "model": "LogisticRegression",
        }
        return LogisticRegression(**model_config).to(device), train_config
    elif model == "MLP":
        model_config = {
            "input_dim": input_dim,
            "output_dim": output_dim,
            "num_hidden_layers": 2,
            "hidden_layer_dims": [64, 32],
            "dropout": 0.2,
            "categories": categories_list,
            "embedding_dim": 8,
            "num_categorical_feature": len(categories_list),
            "num_numerical_feature": len(numerical_cols),
        }
        train_config = {
            "epochs": 20,
            "batch_size": 512,
            "learning_rate": 1e-3,
            "model": "MLP",
        }
        return MLP(**model_config).to(device), train_config
    
    elif model == "TabTransformer":
        model_config = {
            "categories": categories_list,
            "num_continuous": len(numerical_cols),
            "dim": 8, # can sweep
            "dim_out": output_dim,
            "depth": 6,
            "heads": 8,
            "attn_dropout": 0.2,
            "ff_dropout": 0.2,
            "mlp_hidden_mults": (4, 2), 
            "mlp_act": nn.ReLU(),
            "continuous_mean_std": None,
        }

        train_config = {
            "epochs": 20,
            "batch_size": 512,
            "learning_rate": 1e-3,
            "model": "TabTransformer",
        }
        return TabTransformer(**model_config).to(device), train_config

    elif model == "FTTransformer":
        model_config = {
            "categories": categories_list,
            "num_continuous": len(numerical_cols),
            "dim": 8, 
            "dim_out": output_dim,
            "depth": 6,
            "heads": 8, 
            "attn_dropout": 0.2, 
            "ff_dropout": 0.2, 
        }

        train_config = {
            "epochs": 20,
            "batch_size": 512,
            "learning_rate": 1e-3,
            "model": "FTTransformer",
        }
        return FTTransformer(**model_config).to(device), train_config

Run main

In [4]:
wandb_run = False

In [5]:
# ### Just for testing - Ablation

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# # device = torch.device("cpu")

# for dataset_name in ["Adult"]: # "Adult", "Electricity", "Higgs", "KDDCup09_appetency", "Mushroom"
#     X_train, y_train, X_val, y_val, X_test, y_test, \
#         X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor, X_val_tensor, y_val_tensor, \
#         info = get_datasets.get_split_continues(dataset_name, device)
#     for model_name in ["MLP"]:
#         model, train_config = model_config(model_name, X_train.shape[1], 2, [], info.numerical_cols, device)
#         train_config["dataset"] = f"{dataset_name}_continuous_only"
#         train_config["device"] = device
#         criterion = nn.CrossEntropyLoss()
#         optimizer = run_pytorch.build_optimizer(model, "adam", train_config["learning_rate"])

#         print(f"X_train shape: {X_train.shape}")
#         config = {**train_config}

#         if wandb_run:
#             with wandb.init(project="TabAttackBench-ModelTraining", config=config):
#                 run_pytorch.train(model, (X_train_tensor, y_train_tensor), (X_val_tensor, y_val_tensor), criterion, optimizer, train_config, wandb_run=wandb.run)
#                 # torch.cuda.empty_cache() # clear GPU memory
#                 # and test its final performance
#                 run_pytorch.test(model, (X_test_tensor, y_test_tensor), train_config, stage="train", wandb_run=wandb.run)
#                 torch.cuda.empty_cache()
#         else:
#             run_pytorch.train(model, (X_train_tensor, y_train_tensor), (X_val_tensor, y_val_tensor), criterion, optimizer, train_config)
#             # torch.cuda.empty_cache() # clear GPU memory
#             run_pytorch.test(model, (X_test_tensor, y_test_tensor), train_config, stage="train")
#             torch.cuda.empty_cache()


In [6]:
# for model_name in ["MLP"]:
#     model, train_config = model_config(model_name, X_train.shape[1], 2, info.num_categories_list, info.numerical_cols, device)
#     train_config["dataset"] = dataset_name
#     train_config["device"] = device

In [7]:
# X_train.shape[1]

In [20]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")

for dataset_name in ["WineQuality-White"]: # "Adult", "Electricity", "Higgs", "KDDCup09_appetency", "Mushroom", "GermanCredit", "BankMarketing", "house_16H", "jm1", "Diabetes"
    X_train, y_train, X_val, y_val, X_test, y_test, \
        X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor, X_val_tensor, y_val_tensor, \
        info = get_datasets.get_split(dataset_name, device)
    for model_name in ["LogisticRegression", "MLP", "TabTransformer", "FTTransformer"]: # "MLP", "TabTransformer", "FTTransformer"
        model, train_config = model_config(model_name, X_train.shape[1], 2, info.num_categories_list, info.numerical_cols, device)
        train_config["dataset"] = dataset_name
        train_config["device"] = device

        if dataset_name in ["Diabetes", "BreastCancer", "WineQuality-Red"]:
            train_config["batch_size"] = 64
        if dataset_name in ["WineQuality-White"]:
            train_config["batch_size"] = 128

        criterion = nn.CrossEntropyLoss()
        optimizer = run_pytorch.build_optimizer(model, "adam", train_config["learning_rate"])

        config = {**train_config}

        if wandb_run:
            with wandb.init(project="TabAttackBench-ModelTraining", config=config):
                run_pytorch.train(model, (X_train_tensor, y_train_tensor), (X_val_tensor, y_val_tensor), criterion, optimizer, train_config, wandb_run=wandb.run)
                # torch.cuda.empty_cache() # clear GPU memory
                # and test its final performance
                run_pytorch.test(model, (X_test_tensor, y_test_tensor), train_config, stage="train", wandb_run=wandb.run)
                torch.cuda.empty_cache()
        else:
            run_pytorch.train(model, (X_train_tensor, y_train_tensor), (X_val_tensor, y_val_tensor), criterion, optimizer, train_config)
            # torch.cuda.empty_cache() # clear GPU memory
            run_pytorch.test(model, (X_test_tensor, y_test_tensor), train_config, stage="train")
            torch.cuda.empty_cache()


Training LogisticRegression on WineQuality-White...


100%|██████████| 20/20 [00:00<00:00, 22.54it/s]


Running test: LogisticRegression on WineQuality-White...


100%|██████████| 8/8 [00:00<00:00, 890.20it/s]


Accuracy: 67.45%
Test results logged to WandB not available. Consider passing a WandB run object for logging.
verbose: False, log level: Level.ERROR

ONNX model saved as models/train/LogisticRegression/WineQuality-White/train_run-test.onnx. Consider passing a WandB run object for saving.
PyTorch model saved as models/train/LogisticRegression/WineQuality-White/train_run-test.pt. Consider passing a WandB run object for saving.
Training MLP on WineQuality-White...


100%|██████████| 20/20 [00:00<00:00, 22.29it/s]


Running test: MLP on WineQuality-White...


100%|██████████| 8/8 [00:00<00:00, 1505.63it/s]


Accuracy: 74.69%
Test results logged to WandB not available. Consider passing a WandB run object for logging.
verbose: False, log level: Level.ERROR

ONNX model saved as models/train/MLP/WineQuality-White/train_run-test.onnx. Consider passing a WandB run object for saving.
PyTorch model saved as models/train/MLP/WineQuality-White/train_run-test.pt. Consider passing a WandB run object for saving.
Training TabTransformer on WineQuality-White...


100%|██████████| 20/20 [00:00<00:00, 20.70it/s]


Running test: TabTransformer on WineQuality-White...


100%|██████████| 8/8 [00:00<00:00, 2108.75it/s]
  assert x_cont.shape[1] == self.num_continuous, f'you must pass in {self.num_continuous} values for your continuous input'
  if not return_attn:


Accuracy: 73.16%
Test results logged to WandB not available. Consider passing a WandB run object for logging.
verbose: False, log level: Level.ERROR

ONNX model saved as models/train/TabTransformer/WineQuality-White/train_run-test.onnx. Consider passing a WandB run object for saving.
PyTorch model saved as models/train/TabTransformer/WineQuality-White/train_run-test.pt. Consider passing a WandB run object for saving.
Training FTTransformer on WineQuality-White...


100%|██████████| 20/20 [00:05<00:00,  3.41it/s]


Running test: FTTransformer on WineQuality-White...


100%|██████████| 8/8 [00:00<00:00, 308.31it/s]
  if not return_attn:


Accuracy: 75.20%
Test results logged to WandB not available. Consider passing a WandB run object for logging.
verbose: False, log level: Level.ERROR

ONNX model saved as models/train/FTTransformer/WineQuality-White/train_run-test.onnx. Consider passing a WandB run object for saving.
PyTorch model saved as models/train/FTTransformer/WineQuality-White/train_run-test.pt. Consider passing a WandB run object for saving.


In [9]:
def test(model, data, config, stage="test", wandb_run=None):
    model_name = config["model"]
    data_name = config["dataset"]

    X_test_tensor, y_test_tensor = data

    run_name = wandb.run.name if wandb_run else "test"
    sweep_id = wandb.run.sweep_id if wandb_run and stage == "sweep" else stage

    print(f"Running test: {config['model']} on {config['dataset']}...")
    model.eval()
    correct_predictions = 0
    total_samples = 0
    with torch.no_grad():
        for i in tqdm(range(0, X_test_tensor.size(0), config["batch_size"])):
            inputs = X_test_tensor[i:i+config["batch_size"]].to(config["device"])
            labels = y_test_tensor[i:i+config["batch_size"]].to(config["device"])

            test_outputs = model(inputs)
            # predicted = (test_outputs >= 0.5).float()
            _, predicted = test_outputs.max(dim=1)

            correct_predictions += (predicted == labels).sum().item()
            total_samples += labels.size(0)
        accuracy = correct_predictions / total_samples
        print(f"Accuracy: {accuracy * 100:.2f}%")
        
        if wandb_run:
            wandb_run.log({"test_accuracy": accuracy})
        else:
            print("Test results logged to WandB not available. Consider passing a WandB run object for logging.")

In [10]:
for dataset_name in ["GermanCredit"]: # "Adult", "Electricity", "Higgs", "BankMarketing", "house_16H", "GermanCredit", "jm1", "Diabetes"
    X_train, y_train, X_val, y_val, X_test, y_test, \
        X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor, X_val_tensor, y_val_tensor, \
        info = get_datasets.get_split(dataset_name, device)
    # Considering size of Higgs dataset, we will use only 10k samples for adversarial attack.
    if dataset_name == "Higgs":
        X_test = X_test[:10000]
        y_test = y_test[:10000]
        X_test_tensor = X_test_tensor[:10000]
        y_test_tensor = y_test_tensor[:10000]

    for model_name in ["LogisticRegression", "MLP", "TabTransformer", "FTTransformer"]: # "LogisticRegression", "MLP", "TabTransformer", "FTTransformer"
        model, train_config = run_pytorch.model_config(model_name, X_train.shape[1], 2, info.num_categories_list, info.numerical_cols, device)
        train_config["dataset"] = dataset_name
        train_config["device"] = device

        criterion = nn.CrossEntropyLoss()
        optimizer = run_pytorch.build_optimizer(model, "adam", train_config["learning_rate"])
        path = f"models/train/{model_name}/{dataset_name}/train_run-test.pt"
        model.load_state_dict(torch.load(path))
        test(model, (X_test_tensor, y_test_tensor), train_config, stage="train")

        

Running test: LogisticRegression on GermanCredit...


100%|██████████| 1/1 [00:00<00:00, 1605.17it/s]


Accuracy: 70.50%
Test results logged to WandB not available. Consider passing a WandB run object for logging.
Running test: MLP on GermanCredit...


100%|██████████| 1/1 [00:00<00:00, 423.37it/s]


Accuracy: 74.00%
Test results logged to WandB not available. Consider passing a WandB run object for logging.
Running test: TabTransformer on GermanCredit...


100%|██████████| 1/1 [00:00<00:00, 73.79it/s]


Accuracy: 75.00%
Test results logged to WandB not available. Consider passing a WandB run object for logging.
Running test: FTTransformer on GermanCredit...


100%|██████████| 1/1 [00:00<00:00, 79.77it/s]

Accuracy: 70.50%
Test results logged to WandB not available. Consider passing a WandB run object for logging.





In [11]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# X_train, y_train, X_val, y_val, X_test, y_test, \
#         X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor, X_val_tensor, y_val_tensor, \
#         info = get_datasets.get_dataset("Higgs", device)

