In [1]:
import torch
from sklearn.datasets import fetch_openml

from neural_blueprints.utils import Trainer, accuracy, infer_types
from neural_blueprints.config.architectures import TabularBERTConfig
from neural_blueprints.config.utils import TrainerConfig
from neural_blueprints.architectures import TabularBERT
from neural_blueprints.datasets import MaskedTabularDataset, TabularSingleLabelDataset
from neural_blueprints.preprocess import TabularPreprocessor

import logging
logging.basicConfig(
    level=logging.DEBUG,  # or DEBUG if you want even more detail
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)

In [2]:
data = fetch_openml(name="adult", version=2, as_frame=True)
X = data.data
y = data.target

data = X.copy()
data['income'] = y

dtypes = infer_types(data)
data = data.astype(dtypes)
data.head()

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,25,Private,226802,11th,7,Never-married,Machine-op-inspct,Own-child,Black,Male,0,0,40,United-States,<=50K
1,38,Private,89814,HS-grad,9,Married-civ-spouse,Farming-fishing,Husband,White,Male,0,0,50,United-States,<=50K
2,28,Local-gov,336951,Assoc-acdm,12,Married-civ-spouse,Protective-serv,Husband,White,Male,0,0,40,United-States,>50K
3,44,Private,160323,Some-college,10,Married-civ-spouse,Machine-op-inspct,Husband,Black,Male,7688,0,40,United-States,>50K
4,18,,103497,Some-college,10,Never-married,,Own-child,White,Female,0,0,30,United-States,<=50K


In [3]:
preprocessor = TabularPreprocessor()
data, discrete_features, continuous_features = preprocessor.run(data)

2025-12-23 16:57:06,558 - neural_blueprints.preprocess.tabular_preprocess - INFO - Identified 10 discrete features: ['workclass', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country', 'income']
2025-12-23 16:57:06,559 - neural_blueprints.preprocess.tabular_preprocess - INFO - Identified 5 continuous features: ['age', 'fnlwgt', 'capital-gain', 'capital-loss', 'hours-per-week']


### Income Inference Accuracy

In [4]:
dataset = TabularSingleLabelDataset(
    data=data,
    label_column='income',              # Specify the label column for single-label classification
    discrete_features=discrete_features,
    continuous_features=continuous_features
)

train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

In [5]:
bert_config = TabularBERTConfig(
    input_cardinalities = dataset.cardinalities,
    output_cardinalities = [3],
    latent_dim = 64,
    encoder_layers=8,
    dropout_p = 0.1,
    normalization = "batchnorm1d",
    activation = "gelu",
    final_activation = None
)

model = TabularBERT(bert_config)
model.blueprint()

TabularBERT(
  (input_projection): TabularInputProjection(
    (input_projections): ModuleList(
      (0): FeedForwardNetwork(
        (network): Sequential(
          (0): DenseLayer(
            (layer): Sequential(
              (0): Linear(in_features=1, out_features=256, bias=True)
              (1): NormalizationLayer(
                (network): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              )
              (2): GELU(approximate='none')
              (3): DropoutLayer(
                (dropout): Dropout(p=0.1, inplace=False)
              )
            )
          )
          (1): DenseLayer(
            (layer): Sequential(
              (0): Linear(in_features=256, out_features=128, bias=True)
              (1): NormalizationLayer(
                (network): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              )
              (2): GELU(approximate='none')
              (3): DropoutLayer(
 

TabularBERTConfig(input_cardinalities=[1, 9, 1, 16, 16, 7, 15, 6, 5, 2, 1, 1, 1, 42], output_cardinalities=[3], latent_dim=64, encoder_layers=8, dropout_p=0.1, normalization='batchnorm1d', activation='gelu', final_activation=None)

In [6]:
trainer = Trainer(
    model=model,
    config=TrainerConfig(
        criterion='cross_entropy',
        optimizer='adam',
        early_stopping_patience=3,
        learning_rate=1e-3,
        weight_decay=1e-5,
        save_weights_path="./models/bert_adult.pth",
        batch_size=128
    )
)
trainer.train(train_dataset, val_dataset, epochs=5, visualize=True)

2025-12-23 16:47:53,796 - neural_blueprints.utils.trainer - INFO - Trainer initialized on device: cpu


Directory ./models already exists. Existing weights are overwritten.


Training Epochs:  20%|██        | 1/5 [00:28<01:55, 28.90s/epoch]

Epoch 1/5, Training Loss: 0.3640, Validation Loss: 0.3379


Training Epochs:  40%|████      | 2/5 [00:58<01:27, 29.04s/epoch]

Epoch 2/5, Training Loss: 0.3242, Validation Loss: 0.3508


Training Epochs:  60%|██████    | 3/5 [01:27<00:58, 29.07s/epoch]

Epoch 3/5, Training Loss: 0.3196, Validation Loss: 0.3335


Training Epochs:  80%|████████  | 4/5 [01:56<00:29, 29.11s/epoch]

Epoch 4/5, Training Loss: 0.3172, Validation Loss: 0.3298


Training Epochs: 100%|██████████| 5/5 [02:25<00:00, 29.08s/epoch]
2025-12-23 16:50:19,212 - neural_blueprints.utils.trainer - INFO - Training completed in 145.42 seconds.
2025-12-23 16:50:19,212 - neural_blueprints.utils.trainer - INFO - Best validation loss: 3.2439e-01


Epoch 5/5, Training Loss: 0.3164, Validation Loss: 0.3244


In [7]:
X = torch.tensor(val_dataset[:][0])
y = val_dataset[:][1]
with torch.no_grad():
    y_pred = model(X)
    y_pred = y_pred.argmax(dim=1)
print(f"Predictions: {y_pred[:5]}, \n Ground Truth: {y[:5]}")
acc = accuracy(y_pred, y)
print(f"Validation Accuracy: {acc:.4f}")


To copy construct from a tensor, it is recommended to use sourceTensor.detach().clone() or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor).



Predictions: tensor([2, 2, 1, 1, 2]), 
 Ground Truth: tensor([2, 2, 1, 1, 2])
Validation Accuracy: 0.8477


### Masked Dataset Inference Accuracy

In [4]:
# Create dataset
dataset = MaskedTabularDataset(
    data, 
    discrete_features, 
    continuous_features,
    mask_prob=0.35
)

train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

In [5]:
bert_config = TabularBERTConfig(
    input_cardinalities = dataset.cardinalities,   
    latent_dim = 64,
    encoder_layers=8,
    dropout_p = 0.1,
    normalization = "batchnorm1d",
    activation = "gelu",
    final_activation = None
)

model = TabularBERT(bert_config)
model.blueprint()

TabularBERT(
  (input_projection): TabularInputProjection(
    (input_projections): ModuleList(
      (0): FeedForwardNetwork(
        (network): Sequential(
          (0): DenseLayer(
            (layer): Sequential(
              (0): Linear(in_features=1, out_features=256, bias=True)
              (1): NormalizationLayer(
                (network): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              )
              (2): GELU(approximate='none')
              (3): DropoutLayer(
                (dropout): Dropout(p=0.1, inplace=False)
              )
            )
          )
          (1): DenseLayer(
            (layer): Sequential(
              (0): Linear(in_features=256, out_features=128, bias=True)
              (1): NormalizationLayer(
                (network): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              )
              (2): GELU(approximate='none')
              (3): DropoutLayer(
 

TabularBERTConfig(input_cardinalities=[1, 9, 1, 16, 16, 7, 15, 6, 5, 2, 1, 1, 1, 42, 2], output_cardinalities=[1, 9, 1, 16, 16, 7, 15, 6, 5, 2, 1, 1, 1, 42, 2], latent_dim=64, encoder_layers=8, dropout_p=0.1, normalization='batchnorm1d', activation='gelu', final_activation=None)

In [6]:
trainer = Trainer(
    model=model,
    config=TrainerConfig(
        criterion='mixed_type_reconstruction_loss',
        optimizer='adam',
        early_stopping_patience=3,
        learning_rate=1e-3,
        weight_decay=1e-5,
        save_weights_path="./models/bert_adult.pth",
        batch_size=128
    )
)
trainer.train(train_dataset, val_dataset, epochs=20, visualize=True)

2025-12-23 16:57:08,970 - neural_blueprints.utils.trainer - INFO - Trainer initialized on device: cpu


Directory ./models already exists. Existing weights are overwritten.


Training Epochs:   5%|▌         | 1/20 [00:36<11:40, 36.88s/epoch]

Epoch 1/20, Training Loss: 167.0596, Validation Loss: 161.7541


Training Epochs:  10%|█         | 2/20 [01:13<11:05, 37.00s/epoch]

Epoch 2/20, Training Loss: 162.3377, Validation Loss: 161.3914


Training Epochs:  15%|█▌        | 3/20 [01:50<10:24, 36.73s/epoch]

Epoch 3/20, Training Loss: 162.1939, Validation Loss: 161.3275


Training Epochs:  20%|██        | 4/20 [02:26<09:45, 36.59s/epoch]

Epoch 4/20, Training Loss: 162.0364, Validation Loss: 161.0797


Training Epochs:  25%|██▌       | 5/20 [03:02<09:06, 36.46s/epoch]

Epoch 5/20, Training Loss: 161.7602, Validation Loss: 160.8913


Training Epochs:  30%|███       | 6/20 [03:39<08:29, 36.40s/epoch]

Epoch 6/20, Training Loss: 161.6552, Validation Loss: 160.8497


Training Epochs:  35%|███▌      | 7/20 [04:15<07:52, 36.38s/epoch]

Epoch 7/20, Training Loss: 161.6360, Validation Loss: 160.7858


Training Epochs:  40%|████      | 8/20 [04:51<07:16, 36.35s/epoch]

Epoch 8/20, Training Loss: 161.6205, Validation Loss: 160.6914


Training Epochs:  45%|████▌     | 9/20 [05:28<06:39, 36.31s/epoch]

Epoch 9/20, Training Loss: 161.5727, Validation Loss: 160.7497


Training Epochs:  50%|█████     | 10/20 [06:04<06:02, 36.29s/epoch]

Epoch 10/20, Training Loss: 161.5364, Validation Loss: 160.7156


Training Epochs:  55%|█████▌    | 11/20 [06:40<05:26, 36.29s/epoch]

Epoch 11/20, Training Loss: 161.4082, Validation Loss: 160.5492


Training Epochs:  60%|██████    | 12/20 [07:16<04:50, 36.29s/epoch]

Epoch 12/20, Training Loss: 161.2808, Validation Loss: 160.4908


Training Epochs:  65%|██████▌   | 13/20 [07:53<04:14, 36.29s/epoch]

Epoch 13/20, Training Loss: 161.2754, Validation Loss: 160.4140


Training Epochs:  70%|███████   | 14/20 [08:29<03:37, 36.28s/epoch]

Epoch 14/20, Training Loss: 161.2658, Validation Loss: 160.3843


Training Epochs:  75%|███████▌  | 15/20 [09:05<03:01, 36.30s/epoch]

Epoch 15/20, Training Loss: 161.2519, Validation Loss: 160.4581


Training Epochs:  80%|████████  | 16/20 [09:42<02:25, 36.33s/epoch]

Epoch 16/20, Training Loss: 161.1953, Validation Loss: 160.2829


Training Epochs:  85%|████████▌ | 17/20 [10:18<01:48, 36.33s/epoch]

Epoch 17/20, Training Loss: 161.1481, Validation Loss: 160.2903


Training Epochs:  90%|█████████ | 18/20 [10:54<01:12, 36.32s/epoch]

Epoch 18/20, Training Loss: 161.1756, Validation Loss: 160.3524


Training Epochs:  95%|█████████▌| 19/20 [11:31<00:36, 36.31s/epoch]

Epoch 19/20, Training Loss: 161.1237, Validation Loss: 160.2515


Training Epochs: 100%|██████████| 20/20 [12:07<00:00, 36.37s/epoch]
2025-12-23 17:09:16,440 - neural_blueprints.utils.trainer - INFO - Training completed in 727.47 seconds.
2025-12-23 17:09:16,440 - neural_blueprints.utils.trainer - INFO - Best validation loss: 1.6025e+02


Epoch 20/20, Training Loss: 161.0993, Validation Loss: 160.2979


In [7]:
X = val_dataset[:][0]
y = val_dataset[:][1]
mask = val_dataset[:][2]
with torch.no_grad():
    y_pred = model(x=X)

dis_accuracy = 0
cont_accuracy = 0
for column_idx, column_name in enumerate(data.columns):
    print(f"\nFeature Column {column_name}:")
    predicted_attributes = y_pred[column_idx]      # shape: (batch_size, num_classes)
    targets = y[:, column_idx]                     # shape: (batch_size,)

    feature_mask = mask[:, column_idx]                  # shape: (batch_size,)
    predicted_attributes = predicted_attributes[feature_mask]
    if predicted_attributes.size(1) > 1:
        predicted_attributes = predicted_attributes.softmax(dim=-1).argmax(dim=-1).cpu().numpy()
    else:
        predicted_attributes = predicted_attributes.squeeze(-1).cpu().numpy()
    targets = targets[feature_mask].cpu().numpy()

    print("Predicted attribute values:", predicted_attributes[:5])
    print("True attribute values:", targets[:5])

    accuracy_value = accuracy(torch.tensor(predicted_attributes), torch.tensor(targets))
    print(f"Accuracy: {accuracy_value:.4f}")
    if column_name in discrete_features:
        dis_accuracy += accuracy_value
    else:
        cont_accuracy += accuracy_value

avg_dis_accuracy = dis_accuracy / len(discrete_features) if len(discrete_features) > 0 else 0
avg_cont_accuracy = cont_accuracy / len(continuous_features) if len(continuous_features) > 0 else 0
print(f"\nAverage Discrete Accuracy: {avg_dis_accuracy:.4f}")
print(f"Average Continuous Accuracy: {avg_cont_accuracy:.4f}")
avg_accuracy = (dis_accuracy + cont_accuracy) / len(data.columns)
print(f"Overall Average Accuracy: {avg_accuracy:.4f}")


Feature Column age:
Predicted attribute values: [0.0320401  0.14537771 0.31159168 0.38633358 0.3064133 ]
True attribute values: [0.         0.08219178 0.28767124 0.3561644  0.30136988]
Accuracy: 0.2905

Feature Column workclass:
Predicted attribute values: [4 4 4 4 4]
True attribute values: [4. 2. 4. 1. 4.]
Accuracy: 0.7079

Feature Column fnlwgt:
Predicted attribute values: [0.08354902 0.10376754 0.10186435 0.10423055 0.09699711]
True attribute values: [0.08226626 0.20848581 0.16234529 0.15389195 0.2190337 ]
Accuracy: 0.6049

Feature Column education:
Predicted attribute values: [10 16 12 16 16]
True attribute values: [10. 12. 12. 16. 16.]
Accuracy: 0.7211

Feature Column education-num:
Predicted attribute values: [16 14  2 16  5]
True attribute values: [16. 14. 16.  2.  6.]
Accuracy: 0.7330

Feature Column marital-status:
Predicted attribute values: [1 1 3 5 5]
True attribute values: [1. 1. 3. 1. 5.]
Accuracy: 0.7665

Feature Column occupation:
Predicted attribute values: [ 1 10  3 