## Task 3: Model Training

In [1]:
import wandb
import os
os.environ["WANDB_NOTEBOOK_NAME"] = "model_training.ipynb"
# wandb.login()

In [2]:
import torch
from torch import optim, nn
from tqdm import tqdm

from models import run_pytorch
from data import get_datasets

from models.pytorch.mlp import MLP
from models.pytorch.tab_transformer import TabTransformer
from models.pytorch.ft_transformer import FTTransformer
from models.pytorch.logistic_regression import LogisticRegression


[34m[1mwandb[0m: Currently logged in as: [33mzhipeng-he[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [3]:
def model_config(model, input_dim, output_dim, categories_list, numerical_cols, device):
    if model == "LogisticRegression":
        model_config = {
            "input_dim": input_dim,
            "output_dim": output_dim,
        }
        train_config = {
            "epochs": 20,
            "batch_size": 512,
            "learning_rate": 1e-3,
            "model": "LogisticRegression",
        }
        return LogisticRegression(**model_config).to(device), train_config
    elif model == "MLP":
        model_config = {
            "input_dim": input_dim,
            "output_dim": output_dim,
            "num_hidden_layers": 2,
            "hidden_layer_dims": [64, 32],
            "dropout": 0.2,
            "categories": categories_list,
            "embedding_dim": 8,
            "num_categorical_feature": len(categories_list),
            "num_numerical_feature": len(numerical_cols),
        }
        train_config = {
            "epochs": 20,
            "batch_size": 512,
            "learning_rate": 1e-3,
            "model": "MLP",
            "dropout": 0.2,
        }
        return MLP(**model_config).to(device), train_config
    
    elif model == "TabTransformer":
        model_config = {
            "categories": categories_list,
            "num_continuous": len(numerical_cols),
            "dim": 8, # can sweep
            "dim_out": output_dim,
            "depth": 6,
            "heads": 8,
            "attn_dropout": 0.2,
            "ff_dropout": 0.2,
            "mlp_hidden_mults": (4, 2), 
            "mlp_act": nn.ReLU(),
            "continuous_mean_std": None,
        }

        train_config = {
            "epochs": 20,
            "batch_size": 512,
            "learning_rate": 1e-3,
            "model": "TabTransformer",
        }
        return TabTransformer(**model_config).to(device), train_config

    elif model == "FTTransformer":
        model_config = {
            "categories": categories_list,
            "num_continuous": len(numerical_cols),
            "dim": 8, 
            "dim_out": output_dim,
            "depth": 6,
            "heads": 8, 
            "attn_dropout": 0.2, 
            "ff_dropout": 0.2, 
        }

        train_config = {
            "epochs": 20,
            "batch_size": 512,
            "learning_rate": 1e-3,
            "model": "FTTransformer",
        }
        return FTTransformer(**model_config).to(device), train_config

Run main

In [4]:
wandb_run = False

In [13]:
# ### Just for testing - Ablation

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# # device = torch.device("cpu")

# for dataset_name in ["Adult"]: # "Adult", "Electricity", "Higgs", "KDDCup09_appetency", "Mushroom"
#     X_train, y_train, X_val, y_val, X_test, y_test, \
#         X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor, X_val_tensor, y_val_tensor, \
#         info = get_datasets.get_split_continues(dataset_name, device)
#     for model_name in ["MLP"]:
#         model, train_config = model_config(model_name, X_train.shape[1], 2, [], info.numerical_cols, device)
#         train_config["dataset"] = f"{dataset_name}_continuous_only"
#         train_config["device"] = device
#         criterion = nn.CrossEntropyLoss()
#         optimizer = run_pytorch.build_optimizer(model, "adam", train_config["learning_rate"])

#         print(f"X_train shape: {X_train.shape}")
#         config = {**train_config}

#         if wandb_run:
#             with wandb.init(project="TabAttackBench-ModelTraining", config=config):
#                 run_pytorch.train(model, (X_train_tensor, y_train_tensor), (X_val_tensor, y_val_tensor), criterion, optimizer, train_config, wandb_run=wandb.run)
#                 # torch.cuda.empty_cache() # clear GPU memory
#                 # and test its final performance
#                 run_pytorch.test(model, (X_test_tensor, y_test_tensor), train_config, stage="train", wandb_run=wandb.run)
#                 torch.cuda.empty_cache()
#         else:
#             run_pytorch.train(model, (X_train_tensor, y_train_tensor), (X_val_tensor, y_val_tensor), criterion, optimizer, train_config)
#             # torch.cuda.empty_cache() # clear GPU memory
#             run_pytorch.test(model, (X_test_tensor, y_test_tensor), train_config, stage="train")
#             torch.cuda.empty_cache()


In [14]:
# for model_name in ["MLP"]:
#     model, train_config = model_config(model_name, X_train.shape[1], 2, info.num_categories_list, info.numerical_cols, device)
#     train_config["dataset"] = dataset_name
#     train_config["device"] = device

In [15]:
# X_train.shape[1]

In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")

for dataset_name in ["Adult", "Electricity", "Higgs", "Mushroom"]: # "Adult", "Electricity", "Higgs", "KDDCup09_appetency", "Mushroom"
    X_train, y_train, X_val, y_val, X_test, y_test, \
        X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor, X_val_tensor, y_val_tensor, \
        info = get_datasets.get_split(dataset_name, device)
    for model_name in ["LogisticRegression"]: # "MLP", "TabTransformer", "FTTransformer"
        model, train_config = model_config(model_name, X_train.shape[1], 2, info.num_categories_list, info.numerical_cols, device)
        train_config["dataset"] = dataset_name
        train_config["device"] = device
        criterion = nn.CrossEntropyLoss()
        optimizer = run_pytorch.build_optimizer(model, "adam", train_config["learning_rate"])

        config = {**train_config}

        if wandb_run:
            with wandb.init(project="TabAttackBench-ModelTraining", config=config):
                run_pytorch.train(model, (X_train_tensor, y_train_tensor), (X_val_tensor, y_val_tensor), criterion, optimizer, train_config, wandb_run=wandb.run)
                # torch.cuda.empty_cache() # clear GPU memory
                # and test its final performance
                run_pytorch.test(model, (X_test_tensor, y_test_tensor), train_config, stage="train", wandb_run=wandb.run)
                torch.cuda.empty_cache()
        else:
            run_pytorch.train(model, (X_train_tensor, y_train_tensor), (X_val_tensor, y_val_tensor), criterion, optimizer, train_config)
            # torch.cuda.empty_cache() # clear GPU memory
            run_pytorch.test(model, (X_test_tensor, y_test_tensor), train_config, stage="train")
            torch.cuda.empty_cache()


Training LogisticRegression on Adult...


100%|██████████| 20/20 [00:02<00:00,  8.39it/s]


Running test: LogisticRegression on Adult...


100%|██████████| 13/13 [00:00<00:00, 2026.46it/s]


Accuracy: 83.40%
Test results logged to WandB not available. Consider passing a WandB run object for logging.
verbose: False, log level: Level.ERROR

ONNX model saved as models/train/LogisticRegression/Adult/train_run-test.onnx. Consider passing a WandB run object for saving.
PyTorch model saved as models/train/LogisticRegression/Adult/train_run-test.pt. Consider passing a WandB run object for saving.
Training LogisticRegression on Electricity...


100%|██████████| 20/20 [00:01<00:00, 11.96it/s]


Running test: LogisticRegression on Electricity...


100%|██████████| 18/18 [00:00<00:00, 2428.59it/s]


Accuracy: 66.07%
Test results logged to WandB not available. Consider passing a WandB run object for logging.
verbose: False, log level: Level.ERROR

ONNX model saved as models/train/LogisticRegression/Electricity/train_run-test.onnx. Consider passing a WandB run object for saving.
PyTorch model saved as models/train/LogisticRegression/Electricity/train_run-test.pt. Consider passing a WandB run object for saving.
Training LogisticRegression on Higgs...


100%|██████████| 20/20 [00:34<00:00,  1.71s/it]


Running test: LogisticRegression on Higgs...


100%|██████████| 391/391 [00:00<00:00, 2689.76it/s]


Accuracy: 63.15%
Test results logged to WandB not available. Consider passing a WandB run object for logging.
verbose: False, log level: Level.ERROR

ONNX model saved as models/train/LogisticRegression/Higgs/train_run-test.onnx. Consider passing a WandB run object for saving.
PyTorch model saved as models/train/LogisticRegression/Higgs/train_run-test.pt. Consider passing a WandB run object for saving.
Training LogisticRegression on Mushroom...


100%|██████████| 20/20 [00:00<00:00, 58.57it/s]


Running test: LogisticRegression on Mushroom...


100%|██████████| 4/4 [00:00<00:00, 1607.63it/s]

Accuracy: 94.89%
Test results logged to WandB not available. Consider passing a WandB run object for logging.
verbose: False, log level: Level.ERROR

ONNX model saved as models/train/LogisticRegression/Mushroom/train_run-test.onnx. Consider passing a WandB run object for saving.
PyTorch model saved as models/train/LogisticRegression/Mushroom/train_run-test.pt. Consider passing a WandB run object for saving.





In [9]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# X_train, y_train, X_val, y_val, X_test, y_test, \
#         X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor, X_val_tensor, y_val_tensor, \
#         info = get_datasets.get_dataset("Higgs", device)

