In [1]:
import torch 
import pandas as pd
import torch.nn as nn
import time
import io
import joblib
import argparse

from torchsummary import summary
from tqdm import tqdm
from tabulate import tabulate
from sklearn.preprocessing import *

from AutoCleanse.utils import *
from AutoCleanse.dataloader import PlainDataset, DataLoader
from AutoCleanse.autoencoder import *
from AutoCleanse.loss_model import loss_CEMSE
from AutoCleanse.preprocessor import Preprocessor
from AutoCleanse.anonymize import anonymize
from AutoCleanse.bucketfs_client import *

Device configuration

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.manual_seed(42)

<torch._C.Generator at 0x7fcabef6bed0>

## Setup directory path

In [3]:
import os
PROJECT_DIR = os.getcwd()
os.chdir(PROJECT_DIR)
DATASET_DIR = os.path.join(PROJECT_DIR,'dataset')
EVAL_DIR = os.path.join(PROJECT_DIR,'evaluate')

## Preparing data

Load dataframe and group features by their type

In [4]:
df = pd.read_csv(os.path.join(DATASET_DIR,'adult.csv')).drop(columns=['fnlwgt','income'])
continous_columns = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_columns = df.select_dtypes(include=['object', 'bool']).columns.tolist()
og_columns = df.columns.to_list()
df = df[continous_columns+categorical_columns]

Data preprocessing

In [5]:
scaler = MinMaxScaler()
onehotencoder = OneHotEncoder(handle_unknown='ignore',sparse_output=False)
preprocessor = Preprocessor(scaler,onehotencoder)

X_train,X_val,X_test = preprocessor.split(df=df,
                                        train_ratio=0.7,
                                        val_ratio=0.15,
                                        test_ratio=0.15,
                                        random_seed=42)
X_dirty = replace_with_nan(X_test,0.2,42)

X_train = preprocessor.fit_transform(input_df=X_train,
                                    continous_columns=continous_columns,
                                    categorical_columns=categorical_columns)

X_val = preprocessor.transform(input_df=X_val,    
                               continous_columns=continous_columns,
                               categorical_columns=categorical_columns)                          

X_test = preprocessor.transform(input_df=X_test,   
                                continous_columns=continous_columns,
                                categorical_columns=categorical_columns)  

X_dirty = preprocessor.transform(input_df=X_dirty,   
                                continous_columns=continous_columns,
                                categorical_columns=categorical_columns)
categories = preprocessor.encoder.categories_

  df[:] = flat_data.reshape(df.shape)
  df[:] = flat_data.reshape(df.shape)
  df[:] = flat_data.reshape(df.shape)
  df[:] = flat_data.reshape(df.shape)
  df[:] = flat_data.reshape(df.shape)


In [6]:
X_dirty

Unnamed: 0,age,education.num,capital.gain,capital.loss,hours.per.week,workclass_?,workclass_Federal-gov,workclass_Local-gov,workclass_Never-worked,workclass_Private,...,native.country_Portugal,native.country_Puerto-Rico,native.country_Scotland,native.country_South,native.country_Taiwan,native.country_Thailand,native.country_Trinadad&Tobago,native.country_United-States,native.country_Vietnam,native.country_Yugoslavia
28296,0.424658,0.266667,0.000000,0.000000,9208.575629,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
28217,0.150685,0.533333,0.000000,0.000000,0.397959,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
8054,0.287671,0.266667,0.000000,0.000000,0.397959,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4223,-8742.361300,8561.805903,-5708.054784,0.000000,0.193878,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
22723,8448.118926,0.666667,7859.623854,0.000000,0.846939,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21656,0.109589,6693.105868,0.000000,7976.678645,0.346939,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
28318,0.027397,0.400000,0.000000,2640.493095,0.397959,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
32547,0.301370,0.733333,-8312.431233,-4863.080020,-7774.083226,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
9375,0.342466,-7669.257613,0.000000,0.000000,0.397959,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


(Optional) Check save/load function of preprocessor

Both functiona take in 2 parameters:

    - Suffix of the preprocessor name, in the example below would be **preprocessor_main.pkl**
    - Save/load location: can either be "local" to save/load in the home folder or "bucketfs" to save/load to/from Exasol BucketFS

In [None]:
preprocessor.save("main","local")
preprocessor.save("main","bucketfs")
preprocessor = Preprocessor(scaler=MinMaxScaler(),encoder=OneHotEncoder(sparse=False))
preprocessor2.load("main","local")
preprocessor2.load("main","bucketfs")

Convert dataframes into datasets, and create dataloaders

In [7]:
batch_size = 64

In [8]:
train_dataset = PlainDataset(X_train)
val_dataset = PlainDataset(X_val)
test_dataset = PlainDataset(X_test)
dirty_dataset = PlainDataset(X_dirty)

def custom_collate_fn(batch):
    tensor_data = torch.stack([item[0] for item in batch])
    indices = [item[1] for item in batch]
    return tensor_data, indices

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True,collate_fn=custom_collate_fn)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, drop_last=True, collate_fn=custom_collate_fn)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True, collate_fn=custom_collate_fn)
dirty_loader = DataLoader(dirty_dataset, batch_size=batch_size, shuffle=False, drop_last=True, collate_fn=custom_collate_fn)

## Instantiate model

In [9]:
layers = [X_train.shape[1],1024,128]   
wlc = (1,1) 

autoencoder = Autoencoder(layers=layers,dropout_enc=[(0,0.0)],dropout_dec=[(0,0.1)], batch_norm=True, \
                          learning_rate=1e-4,weight_decay=1e-5,l1_strength=1e-5,l2_strength=1e-5)

In [None]:
summary(autoencoder.to(device),torch.tensor(X_train.values).float().to(device).shape[1:])

(Optional) Model can be loaded from checkpoint after instantiation

The function takes in 2 parameters:

    - Suffix of the preprocessor name, in the example below would be **autoencoder_main.pkl**
    - Save/load location: can either be "local" to load in the home folder or "bucketfs" to load from Exasol BucketFS

In [None]:
autoencoder.load("local","main")

### Train the model

In [10]:
autoencoder.train_model(
      patience=10,
      num_epochs=100,
      batch_size=batch_size,
      train_loader=train_loader,
      val_loader=val_loader,
      continous_columns=continous_columns, 
      categorical_columns=categorical_columns, 
      categories=categories,
      device=device,
      wlc=wlc)

Epoch [1/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 27.52it/s]
Epoch [1/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.59it/s]


Epoch [1/100], Training Loss: 6.51186028
Epoch [1/100], Validation Loss: 2.39420251
Epoch [1/100], Training CE Loss: 6.45458449
Epoch [1/100], Validation CE Loss: 2.35875442
Epoch [1/100], Training MSE Loss: 0.05727580
Epoch [1/100], Validation MSE Loss: 0.03544809
Epoch [1/100], Training Loss Comp: 6.51186028
Epoch [1/100], Validation Loss Comp: 2.39420251
Epoch [1/100]: Learning Rate = [0.0001]



Epoch [2/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.19it/s]
Epoch [2/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.43it/s]


Epoch [2/100], Training Loss: 1.45376648
Epoch [2/100], Validation Loss: 0.82513203
Epoch [2/100], Training CE Loss: 1.42414442
Epoch [2/100], Validation CE Loss: 0.79886463
Epoch [2/100], Training MSE Loss: 0.02962206
Epoch [2/100], Validation MSE Loss: 0.02626741
Epoch [2/100], Training Loss Comp: 1.45376648
Epoch [2/100], Validation Loss Comp: 0.82513203
Epoch [2/100]: Learning Rate = [0.0001]



Epoch [3/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 28.79it/s]
Epoch [3/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.51it/s]


Epoch [3/100], Training Loss: 0.59678315
Epoch [3/100], Validation Loss: 0.42467861
Epoch [3/100], Training CE Loss: 0.57300065
Epoch [3/100], Validation CE Loss: 0.40312602
Epoch [3/100], Training MSE Loss: 0.02378251
Epoch [3/100], Validation MSE Loss: 0.02155259
Epoch [3/100], Training Loss Comp: 0.59678315
Epoch [3/100], Validation Loss Comp: 0.42467861
Epoch [3/100]: Learning Rate = [0.0001]



Epoch [4/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 28.16it/s]
Epoch [4/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.62it/s]


Epoch [4/100], Training Loss: 0.33864608
Epoch [4/100], Validation Loss: 0.28084032
Epoch [4/100], Training CE Loss: 0.31748882
Epoch [4/100], Validation CE Loss: 0.26084352
Epoch [4/100], Training MSE Loss: 0.02115727
Epoch [4/100], Validation MSE Loss: 0.01999680
Epoch [4/100], Training Loss Comp: 0.33864608
Epoch [4/100], Validation Loss Comp: 0.28084032
Epoch [4/100]: Learning Rate = [0.0001]



Epoch [5/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 28.56it/s]
Epoch [5/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.75it/s]


Epoch [5/100], Training Loss: 0.22021056
Epoch [5/100], Validation Loss: 0.19374763
Epoch [5/100], Training CE Loss: 0.20061890
Epoch [5/100], Validation CE Loss: 0.17607940
Epoch [5/100], Training MSE Loss: 0.01959166
Epoch [5/100], Validation MSE Loss: 0.01766823
Epoch [5/100], Training Loss Comp: 0.22021056
Epoch [5/100], Validation Loss Comp: 0.19374763
Epoch [5/100]: Learning Rate = [0.0001]



Epoch [6/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.27it/s]
Epoch [6/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.77it/s]


Epoch [6/100], Training Loss: 0.15196413
Epoch [6/100], Validation Loss: 0.15003542
Epoch [6/100], Training CE Loss: 0.13465823
Epoch [6/100], Validation CE Loss: 0.13400740
Epoch [6/100], Training MSE Loss: 0.01730590
Epoch [6/100], Validation MSE Loss: 0.01602802
Epoch [6/100], Training Loss Comp: 0.15196413
Epoch [6/100], Validation Loss Comp: 0.15003542
Epoch [6/100]: Learning Rate = [0.0001]



Epoch [7/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 28.31it/s]
Epoch [7/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.01it/s]


Epoch [7/100], Training Loss: 0.10914267
Epoch [7/100], Validation Loss: 0.11550182
Epoch [7/100], Training CE Loss: 0.09356863
Epoch [7/100], Validation CE Loss: 0.10133796
Epoch [7/100], Training MSE Loss: 0.01557405
Epoch [7/100], Validation MSE Loss: 0.01416386
Epoch [7/100], Training Loss Comp: 0.10914267
Epoch [7/100], Validation Loss Comp: 0.11550182
Epoch [7/100]: Learning Rate = [0.0001]



Epoch [8/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 28.09it/s]
Epoch [8/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.20it/s]


Epoch [8/100], Training Loss: 0.08835858
Epoch [8/100], Validation Loss: 0.10208140
Epoch [8/100], Training CE Loss: 0.07452423
Epoch [8/100], Validation CE Loss: 0.08889668
Epoch [8/100], Training MSE Loss: 0.01383435
Epoch [8/100], Validation MSE Loss: 0.01318472
Epoch [8/100], Training Loss Comp: 0.08835858
Epoch [8/100], Validation Loss Comp: 0.10208140
Epoch [8/100]: Learning Rate = [0.0001]



Epoch [9/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 28.04it/s]
Epoch [9/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 33.50it/s]


Epoch [9/100], Training Loss: 0.06836925
Epoch [9/100], Validation Loss: 0.08567826
Epoch [9/100], Training CE Loss: 0.05580785
Epoch [9/100], Validation CE Loss: 0.07424589
Epoch [9/100], Training MSE Loss: 0.01256140
Epoch [9/100], Validation MSE Loss: 0.01143238
Epoch [9/100], Training Loss Comp: 0.06836925
Epoch [9/100], Validation Loss Comp: 0.08567826
Epoch [9/100]: Learning Rate = [0.0001]



Epoch [10/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 28.58it/s]
Epoch [10/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.12it/s]


Epoch [10/100], Training Loss: 0.05703658
Epoch [10/100], Validation Loss: 0.07253984
Epoch [10/100], Training CE Loss: 0.04537811
Epoch [10/100], Validation CE Loss: 0.06060506
Epoch [10/100], Training MSE Loss: 0.01165847
Epoch [10/100], Validation MSE Loss: 0.01193478
Epoch [10/100], Training Loss Comp: 0.05703658
Epoch [10/100], Validation Loss Comp: 0.07253984
Epoch [10/100]: Learning Rate = [0.0001]



Epoch [11/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 28.33it/s]
Epoch [11/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.36it/s]


Epoch [11/100], Training Loss: 0.04634275
Epoch [11/100], Validation Loss: 0.06297465
Epoch [11/100], Training CE Loss: 0.03570571
Epoch [11/100], Validation CE Loss: 0.05231687
Epoch [11/100], Training MSE Loss: 0.01063704
Epoch [11/100], Validation MSE Loss: 0.01065778
Epoch [11/100], Training Loss Comp: 0.04634275
Epoch [11/100], Validation Loss Comp: 0.06297465
Epoch [11/100]: Learning Rate = [0.0001]



Epoch [12/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 28.62it/s]
Epoch [12/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.87it/s]


Epoch [12/100], Training Loss: 0.04106173
Epoch [12/100], Validation Loss: 0.06479886
Epoch [12/100], Training CE Loss: 0.03099527
Epoch [12/100], Validation CE Loss: 0.05361445
Epoch [12/100], Training MSE Loss: 0.01006645
Epoch [12/100], Validation MSE Loss: 0.01118441
Epoch [12/100], Training Loss Comp: 0.04106173
Epoch [12/100], Validation Loss Comp: 0.06479886
Epoch [12/100]: Learning Rate = [0.0001]



Epoch [13/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 28.79it/s]
Epoch [13/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.57it/s]


Epoch [13/100], Training Loss: 0.03456916
Epoch [13/100], Validation Loss: 0.05757061
Epoch [13/100], Training CE Loss: 0.02542724
Epoch [13/100], Validation CE Loss: 0.04873719
Epoch [13/100], Training MSE Loss: 0.00914192
Epoch [13/100], Validation MSE Loss: 0.00883342
Epoch [13/100], Training Loss Comp: 0.03456916
Epoch [13/100], Validation Loss Comp: 0.05757061
Epoch [13/100]: Learning Rate = [0.0001]



Epoch [14/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 28.14it/s]
Epoch [14/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.55it/s]


Epoch [14/100], Training Loss: 0.02968450
Epoch [14/100], Validation Loss: 0.05862348
Epoch [14/100], Training CE Loss: 0.02104773
Epoch [14/100], Validation CE Loss: 0.04803162
Epoch [14/100], Training MSE Loss: 0.00863677
Epoch [14/100], Validation MSE Loss: 0.01059186
Epoch [14/100], Training Loss Comp: 0.02968450
Epoch [14/100], Validation Loss Comp: 0.05862348
Epoch [14/100]: Learning Rate = [0.0001]



Epoch [15/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 28.17it/s]
Epoch [15/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.47it/s]


Epoch [15/100], Training Loss: 0.02699108
Epoch [15/100], Validation Loss: 0.04722519
Epoch [15/100], Training CE Loss: 0.01862856
Epoch [15/100], Validation CE Loss: 0.03978225
Epoch [15/100], Training MSE Loss: 0.00836252
Epoch [15/100], Validation MSE Loss: 0.00744295
Epoch [15/100], Training Loss Comp: 0.02699108
Epoch [15/100], Validation Loss Comp: 0.04722519
Epoch [15/100]: Learning Rate = [0.0001]



Epoch [16/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 28.06it/s]
Epoch [16/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.85it/s]


Epoch [16/100], Training Loss: 0.02308985
Epoch [16/100], Validation Loss: 0.04392504
Epoch [16/100], Training CE Loss: 0.01520982
Epoch [16/100], Validation CE Loss: 0.03630269
Epoch [16/100], Training MSE Loss: 0.00788003
Epoch [16/100], Validation MSE Loss: 0.00762235
Epoch [16/100], Training Loss Comp: 0.02308985
Epoch [16/100], Validation Loss Comp: 0.04392504
Epoch [16/100]: Learning Rate = [0.0001]



Epoch [17/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 28.54it/s]
Epoch [17/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.64it/s]


Epoch [17/100], Training Loss: 0.02087991
Epoch [17/100], Validation Loss: 0.03931887
Epoch [17/100], Training CE Loss: 0.01380626
Epoch [17/100], Validation CE Loss: 0.03235434
Epoch [17/100], Training MSE Loss: 0.00707364
Epoch [17/100], Validation MSE Loss: 0.00696452
Epoch [17/100], Training Loss Comp: 0.02087991
Epoch [17/100], Validation Loss Comp: 0.03931887
Epoch [17/100]: Learning Rate = [0.0001]



Epoch [18/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.25it/s]
Epoch [18/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.43it/s]


Epoch [18/100], Training Loss: 0.02030895
Epoch [18/100], Validation Loss: 0.04270734
Epoch [18/100], Training CE Loss: 0.01349674
Epoch [18/100], Validation CE Loss: 0.03603987
Epoch [18/100], Training MSE Loss: 0.00681221
Epoch [18/100], Validation MSE Loss: 0.00666747
Epoch [18/100], Training Loss Comp: 0.02030895
Epoch [18/100], Validation Loss Comp: 0.04270734
Epoch [18/100]: Learning Rate = [0.0001]



Epoch [19/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.36it/s]
Epoch [19/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.78it/s]


Epoch [19/100], Training Loss: 0.01682806
Epoch [19/100], Validation Loss: 0.04041971
Epoch [19/100], Training CE Loss: 0.01015232
Epoch [19/100], Validation CE Loss: 0.03418896
Epoch [19/100], Training MSE Loss: 0.00667574
Epoch [19/100], Validation MSE Loss: 0.00623075
Epoch [19/100], Training Loss Comp: 0.01682806
Epoch [19/100], Validation Loss Comp: 0.04041971
Epoch [19/100]: Learning Rate = [0.0001]



Epoch [20/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.18it/s]
Epoch [20/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.43it/s]


Epoch [20/100], Training Loss: 0.01669444
Epoch [20/100], Validation Loss: 0.03747796
Epoch [20/100], Training CE Loss: 0.01052652
Epoch [20/100], Validation CE Loss: 0.03215593
Epoch [20/100], Training MSE Loss: 0.00616792
Epoch [20/100], Validation MSE Loss: 0.00532203
Epoch [20/100], Training Loss Comp: 0.01669444
Epoch [20/100], Validation Loss Comp: 0.03747796
Epoch [20/100]: Learning Rate = [0.0001]



Epoch [21/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.18it/s]
Epoch [21/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.81it/s]


Epoch [21/100], Training Loss: 0.01682536
Epoch [21/100], Validation Loss: 0.03579171
Epoch [21/100], Training CE Loss: 0.01057488
Epoch [21/100], Validation CE Loss: 0.02967925
Epoch [21/100], Training MSE Loss: 0.00625048
Epoch [21/100], Validation MSE Loss: 0.00611245
Epoch [21/100], Training Loss Comp: 0.01682536
Epoch [21/100], Validation Loss Comp: 0.03579171
Epoch [21/100]: Learning Rate = [0.0001]



Epoch [22/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.14it/s]
Epoch [22/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.81it/s]


Epoch [22/100], Training Loss: 0.01532509
Epoch [22/100], Validation Loss: 0.03400279
Epoch [22/100], Training CE Loss: 0.00903459
Epoch [22/100], Validation CE Loss: 0.02925990
Epoch [22/100], Training MSE Loss: 0.00629050
Epoch [22/100], Validation MSE Loss: 0.00474290
Epoch [22/100], Training Loss Comp: 0.01532509
Epoch [22/100], Validation Loss Comp: 0.03400279
Epoch [22/100]: Learning Rate = [0.0001]



Epoch [23/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.10it/s]
Epoch [23/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.06it/s]


Epoch [23/100], Training Loss: 0.01546754
Epoch [23/100], Validation Loss: 0.03139852
Epoch [23/100], Training CE Loss: 0.00965815
Epoch [23/100], Validation CE Loss: 0.02671535
Epoch [23/100], Training MSE Loss: 0.00580939
Epoch [23/100], Validation MSE Loss: 0.00468317
Epoch [23/100], Training Loss Comp: 0.01546754
Epoch [23/100], Validation Loss Comp: 0.03139852
Epoch [23/100]: Learning Rate = [0.0001]



Epoch [24/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.03it/s]
Epoch [24/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.26it/s]


Epoch [24/100], Training Loss: 0.01239031
Epoch [24/100], Validation Loss: 0.03121456
Epoch [24/100], Training CE Loss: 0.00692263
Epoch [24/100], Validation CE Loss: 0.02645106
Epoch [24/100], Training MSE Loss: 0.00546768
Epoch [24/100], Validation MSE Loss: 0.00476350
Epoch [24/100], Training Loss Comp: 0.01239031
Epoch [24/100], Validation Loss Comp: 0.03121456
Epoch [24/100]: Learning Rate = [0.0001]



Epoch [25/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 28.75it/s]
Epoch [25/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 33.72it/s]


Epoch [25/100], Training Loss: 0.01170858
Epoch [25/100], Validation Loss: 0.03230876
Epoch [25/100], Training CE Loss: 0.00637356
Epoch [25/100], Validation CE Loss: 0.02582217
Epoch [25/100], Training MSE Loss: 0.00533502
Epoch [25/100], Validation MSE Loss: 0.00648659
Epoch [25/100], Training Loss Comp: 0.01170858
Epoch [25/100], Validation Loss Comp: 0.03230876
Epoch [25/100]: Learning Rate = [1e-05]



Epoch [26/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 28.70it/s]
Epoch [26/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.39it/s]


Epoch [26/100], Training Loss: 0.00845616
Epoch [26/100], Validation Loss: 0.02355344
Epoch [26/100], Training CE Loss: 0.00468995
Epoch [26/100], Validation CE Loss: 0.01973141
Epoch [26/100], Training MSE Loss: 0.00376620
Epoch [26/100], Validation MSE Loss: 0.00382203
Epoch [26/100], Training Loss Comp: 0.00845616
Epoch [26/100], Validation Loss Comp: 0.02355344
Epoch [26/100]: Learning Rate = [1e-05]



Epoch [27/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 28.88it/s]
Epoch [27/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.82it/s]


Epoch [27/100], Training Loss: 0.00751059
Epoch [27/100], Validation Loss: 0.02343598
Epoch [27/100], Training CE Loss: 0.00386024
Epoch [27/100], Validation CE Loss: 0.01971429
Epoch [27/100], Training MSE Loss: 0.00365035
Epoch [27/100], Validation MSE Loss: 0.00372169
Epoch [27/100], Training Loss Comp: 0.00751059
Epoch [27/100], Validation Loss Comp: 0.02343598
Epoch [27/100]: Learning Rate = [1e-05]



Epoch [28/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.44it/s]
Epoch [28/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.85it/s]


Epoch [28/100], Training Loss: 0.00703663
Epoch [28/100], Validation Loss: 0.02299380
Epoch [28/100], Training CE Loss: 0.00343599
Epoch [28/100], Validation CE Loss: 0.01907481
Epoch [28/100], Training MSE Loss: 0.00360064
Epoch [28/100], Validation MSE Loss: 0.00391899
Epoch [28/100], Training Loss Comp: 0.00703663
Epoch [28/100], Validation Loss Comp: 0.02299380
Epoch [28/100]: Learning Rate = [1e-05]



Epoch [29/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.45it/s]
Epoch [29/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.74it/s]


Epoch [29/100], Training Loss: 0.00678914
Epoch [29/100], Validation Loss: 0.02427184
Epoch [29/100], Training CE Loss: 0.00324412
Epoch [29/100], Validation CE Loss: 0.02056137
Epoch [29/100], Training MSE Loss: 0.00354502
Epoch [29/100], Validation MSE Loss: 0.00371047
Epoch [29/100], Training Loss Comp: 0.00678914
Epoch [29/100], Validation Loss Comp: 0.02427184
Epoch [29/100]: Learning Rate = [1e-05]



Epoch [30/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.40it/s]
Epoch [30/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.32it/s]


Epoch [30/100], Training Loss: 0.00658389
Epoch [30/100], Validation Loss: 0.02299354
Epoch [30/100], Training CE Loss: 0.00298924
Epoch [30/100], Validation CE Loss: 0.01947900
Epoch [30/100], Training MSE Loss: 0.00359464
Epoch [30/100], Validation MSE Loss: 0.00351454
Epoch [30/100], Training Loss Comp: 0.00658389
Epoch [30/100], Validation Loss Comp: 0.02299354
Epoch [30/100]: Learning Rate = [1e-05]



Epoch [31/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.28it/s]
Epoch [31/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.72it/s]


Epoch [31/100], Training Loss: 0.00629875
Epoch [31/100], Validation Loss: 0.02360979
Epoch [31/100], Training CE Loss: 0.00284420
Epoch [31/100], Validation CE Loss: 0.02003906
Epoch [31/100], Training MSE Loss: 0.00345455
Epoch [31/100], Validation MSE Loss: 0.00357073
Epoch [31/100], Training Loss Comp: 0.00629875
Epoch [31/100], Validation Loss Comp: 0.02360979
Epoch [31/100]: Learning Rate = [1e-05]



Epoch [32/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.31it/s]
Epoch [32/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.93it/s]


Epoch [32/100], Training Loss: 0.00618052
Epoch [32/100], Validation Loss: 0.02136516
Epoch [32/100], Training CE Loss: 0.00274391
Epoch [32/100], Validation CE Loss: 0.01777151
Epoch [32/100], Training MSE Loss: 0.00343661
Epoch [32/100], Validation MSE Loss: 0.00359365
Epoch [32/100], Training Loss Comp: 0.00618052
Epoch [32/100], Validation Loss Comp: 0.02136516
Epoch [32/100]: Learning Rate = [1e-05]



Epoch [33/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.47it/s]
Epoch [33/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.92it/s]


Epoch [33/100], Training Loss: 0.00605166
Epoch [33/100], Validation Loss: 0.02087578
Epoch [33/100], Training CE Loss: 0.00265768
Epoch [33/100], Validation CE Loss: 0.01742939
Epoch [33/100], Training MSE Loss: 0.00339398
Epoch [33/100], Validation MSE Loss: 0.00344639
Epoch [33/100], Training Loss Comp: 0.00605166
Epoch [33/100], Validation Loss Comp: 0.02087578
Epoch [33/100]: Learning Rate = [1e-05]



Epoch [34/100], Training Progress: 100%|██████████| 356/356 [00:11<00:00, 29.84it/s]
Epoch [34/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 35.17it/s]


Epoch [34/100], Training Loss: 0.00586187
Epoch [34/100], Validation Loss: 0.02250771
Epoch [34/100], Training CE Loss: 0.00257517
Epoch [34/100], Validation CE Loss: 0.01902029
Epoch [34/100], Training MSE Loss: 0.00328670
Epoch [34/100], Validation MSE Loss: 0.00348742
Epoch [34/100], Training Loss Comp: 0.00586187
Epoch [34/100], Validation Loss Comp: 0.02250771
Epoch [34/100]: Learning Rate = [1e-05]



Epoch [35/100], Training Progress: 100%|██████████| 356/356 [00:11<00:00, 29.80it/s]
Epoch [35/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.91it/s]


Epoch [35/100], Training Loss: 0.00588076
Epoch [35/100], Validation Loss: 0.02311647
Epoch [35/100], Training CE Loss: 0.00259088
Epoch [35/100], Validation CE Loss: 0.01958856
Epoch [35/100], Training MSE Loss: 0.00328988
Epoch [35/100], Validation MSE Loss: 0.00352790
Epoch [35/100], Training Loss Comp: 0.00588076
Epoch [35/100], Validation Loss Comp: 0.02311647
Epoch [35/100]: Learning Rate = [1e-05]



Epoch [36/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.55it/s]
Epoch [36/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.67it/s]


Epoch [36/100], Training Loss: 0.00548157
Epoch [36/100], Validation Loss: 0.02066910
Epoch [36/100], Training CE Loss: 0.00226934
Epoch [36/100], Validation CE Loss: 0.01745691
Epoch [36/100], Training MSE Loss: 0.00321223
Epoch [36/100], Validation MSE Loss: 0.00321218
Epoch [36/100], Training Loss Comp: 0.00548157
Epoch [36/100], Validation Loss Comp: 0.02066910
Epoch [36/100]: Learning Rate = [1e-05]



Epoch [37/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.22it/s]
Epoch [37/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 35.22it/s]


Epoch [37/100], Training Loss: 0.00547319
Epoch [37/100], Validation Loss: 0.02154450
Epoch [37/100], Training CE Loss: 0.00230381
Epoch [37/100], Validation CE Loss: 0.01833842
Epoch [37/100], Training MSE Loss: 0.00316938
Epoch [37/100], Validation MSE Loss: 0.00320608
Epoch [37/100], Training Loss Comp: 0.00547319
Epoch [37/100], Validation Loss Comp: 0.02154450
Epoch [37/100]: Learning Rate = [1e-05]



Epoch [38/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.54it/s]
Epoch [38/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.82it/s]


Epoch [38/100], Training Loss: 0.00538070
Epoch [38/100], Validation Loss: 0.02041744
Epoch [38/100], Training CE Loss: 0.00224835
Epoch [38/100], Validation CE Loss: 0.01727698
Epoch [38/100], Training MSE Loss: 0.00313236
Epoch [38/100], Validation MSE Loss: 0.00314045
Epoch [38/100], Training Loss Comp: 0.00538070
Epoch [38/100], Validation Loss Comp: 0.02041744
Epoch [38/100]: Learning Rate = [1e-05]



Epoch [39/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.51it/s]
Epoch [39/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.61it/s]


Epoch [39/100], Training Loss: 0.00525946
Epoch [39/100], Validation Loss: 0.02135465
Epoch [39/100], Training CE Loss: 0.00222278
Epoch [39/100], Validation CE Loss: 0.01807358
Epoch [39/100], Training MSE Loss: 0.00303669
Epoch [39/100], Validation MSE Loss: 0.00328107
Epoch [39/100], Training Loss Comp: 0.00525946
Epoch [39/100], Validation Loss Comp: 0.02135465
Epoch [39/100]: Learning Rate = [1e-05]



Epoch [40/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.35it/s]
Epoch [40/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.61it/s]


Epoch [40/100], Training Loss: 0.00529379
Epoch [40/100], Validation Loss: 0.01990076
Epoch [40/100], Training CE Loss: 0.00223559
Epoch [40/100], Validation CE Loss: 0.01672295
Epoch [40/100], Training MSE Loss: 0.00305820
Epoch [40/100], Validation MSE Loss: 0.00317781
Epoch [40/100], Training Loss Comp: 0.00529379
Epoch [40/100], Validation Loss Comp: 0.01990076
Epoch [40/100]: Learning Rate = [1e-05]



Epoch [41/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.34it/s]
Epoch [41/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.54it/s]


Epoch [41/100], Training Loss: 0.00520121
Epoch [41/100], Validation Loss: 0.02060406
Epoch [41/100], Training CE Loss: 0.00218174
Epoch [41/100], Validation CE Loss: 0.01760382
Epoch [41/100], Training MSE Loss: 0.00301947
Epoch [41/100], Validation MSE Loss: 0.00300024
Epoch [41/100], Training Loss Comp: 0.00520121
Epoch [41/100], Validation Loss Comp: 0.02060406
Epoch [41/100]: Learning Rate = [1e-05]



Epoch [42/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.33it/s]
Epoch [42/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.91it/s]


Epoch [42/100], Training Loss: 0.00521056
Epoch [42/100], Validation Loss: 0.01866597
Epoch [42/100], Training CE Loss: 0.00222375
Epoch [42/100], Validation CE Loss: 0.01559918
Epoch [42/100], Training MSE Loss: 0.00298681
Epoch [42/100], Validation MSE Loss: 0.00306679
Epoch [42/100], Training Loss Comp: 0.00521056
Epoch [42/100], Validation Loss Comp: 0.01866597
Epoch [42/100]: Learning Rate = [1e-05]



Epoch [43/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.48it/s]
Epoch [43/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.98it/s]


Epoch [43/100], Training Loss: 0.00515581
Epoch [43/100], Validation Loss: 0.01954335
Epoch [43/100], Training CE Loss: 0.00222030
Epoch [43/100], Validation CE Loss: 0.01643655
Epoch [43/100], Training MSE Loss: 0.00293551
Epoch [43/100], Validation MSE Loss: 0.00310680
Epoch [43/100], Training Loss Comp: 0.00515581
Epoch [43/100], Validation Loss Comp: 0.01954335
Epoch [43/100]: Learning Rate = [1e-05]



Epoch [44/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.66it/s]
Epoch [44/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.90it/s]


Epoch [44/100], Training Loss: 0.00475024
Epoch [44/100], Validation Loss: 0.02175461
Epoch [44/100], Training CE Loss: 0.00183425
Epoch [44/100], Validation CE Loss: 0.01877974
Epoch [44/100], Training MSE Loss: 0.00291600
Epoch [44/100], Validation MSE Loss: 0.00297487
Epoch [44/100], Training Loss Comp: 0.00475024
Epoch [44/100], Validation Loss Comp: 0.02175461
Epoch [44/100]: Learning Rate = [1e-05]



Epoch [45/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.46it/s]
Epoch [45/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.66it/s]


Epoch [45/100], Training Loss: 0.00476901
Epoch [45/100], Validation Loss: 0.02163702
Epoch [45/100], Training CE Loss: 0.00196617
Epoch [45/100], Validation CE Loss: 0.01855095
Epoch [45/100], Training MSE Loss: 0.00280284
Epoch [45/100], Validation MSE Loss: 0.00308607
Epoch [45/100], Training Loss Comp: 0.00476901
Epoch [45/100], Validation Loss Comp: 0.02163702
Epoch [45/100]: Learning Rate = [1e-05]



Epoch [46/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.26it/s]
Epoch [46/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.66it/s]


Epoch [46/100], Training Loss: 0.00479369
Epoch [46/100], Validation Loss: 0.01959338
Epoch [46/100], Training CE Loss: 0.00198677
Epoch [46/100], Validation CE Loss: 0.01665359
Epoch [46/100], Training MSE Loss: 0.00280692
Epoch [46/100], Validation MSE Loss: 0.00293980
Epoch [46/100], Training Loss Comp: 0.00479369
Epoch [46/100], Validation Loss Comp: 0.01959338
Epoch [46/100]: Learning Rate = [1e-05]



Epoch [47/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.21it/s]
Epoch [47/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.84it/s]


Epoch [47/100], Training Loss: 0.00473729
Epoch [47/100], Validation Loss: 0.01910952
Epoch [47/100], Training CE Loss: 0.00188341
Epoch [47/100], Validation CE Loss: 0.01616873
Epoch [47/100], Training MSE Loss: 0.00285387
Epoch [47/100], Validation MSE Loss: 0.00294080
Epoch [47/100], Training Loss Comp: 0.00473729
Epoch [47/100], Validation Loss Comp: 0.01910952
Epoch [47/100]: Learning Rate = [1e-05]



Epoch [48/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.26it/s]
Epoch [48/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.48it/s]


Epoch [48/100], Training Loss: 0.00457257
Epoch [48/100], Validation Loss: 0.01835696
Epoch [48/100], Training CE Loss: 0.00177541
Epoch [48/100], Validation CE Loss: 0.01555829
Epoch [48/100], Training MSE Loss: 0.00279716
Epoch [48/100], Validation MSE Loss: 0.00279867
Epoch [48/100], Training Loss Comp: 0.00457257
Epoch [48/100], Validation Loss Comp: 0.01835696
Epoch [48/100]: Learning Rate = [1e-05]



Epoch [49/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.20it/s]
Epoch [49/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.36it/s]


Epoch [49/100], Training Loss: 0.00462070
Epoch [49/100], Validation Loss: 0.01971735
Epoch [49/100], Training CE Loss: 0.00183071
Epoch [49/100], Validation CE Loss: 0.01708407
Epoch [49/100], Training MSE Loss: 0.00278999
Epoch [49/100], Validation MSE Loss: 0.00263328
Epoch [49/100], Training Loss Comp: 0.00462070
Epoch [49/100], Validation Loss Comp: 0.01971735
Epoch [49/100]: Learning Rate = [1e-05]



Epoch [50/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.13it/s]
Epoch [50/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.27it/s]


Epoch [50/100], Training Loss: 0.00461912
Epoch [50/100], Validation Loss: 0.01870091
Epoch [50/100], Training CE Loss: 0.00186711
Epoch [50/100], Validation CE Loss: 0.01601502
Epoch [50/100], Training MSE Loss: 0.00275201
Epoch [50/100], Validation MSE Loss: 0.00268588
Epoch [50/100], Training Loss Comp: 0.00461912
Epoch [50/100], Validation Loss Comp: 0.01870091
Epoch [50/100]: Learning Rate = [1.0000000000000002e-06]



Epoch [51/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.15it/s]
Epoch [51/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.52it/s]


Epoch [51/100], Training Loss: 0.00432375
Epoch [51/100], Validation Loss: 0.01843652
Epoch [51/100], Training CE Loss: 0.00187900
Epoch [51/100], Validation CE Loss: 0.01588091
Epoch [51/100], Training MSE Loss: 0.00244475
Epoch [51/100], Validation MSE Loss: 0.00255561
Epoch [51/100], Training Loss Comp: 0.00432375
Epoch [51/100], Validation Loss Comp: 0.01843652
Epoch [51/100]: Learning Rate = [1.0000000000000002e-06]



Epoch [52/100], Training Progress: 100%|██████████| 356/356 [00:12<00:00, 29.30it/s]
Epoch [52/100], Validation Progress: 100%|██████████| 76/76 [00:02<00:00, 34.75it/s]

Epoch [52/100], Training Loss: 0.00423427
Epoch [52/100], Validation Loss: 0.01956219
Epoch [52/100], Training CE Loss: 0.00178316
Epoch [52/100], Validation CE Loss: 0.01700943
Epoch [52/100], Training MSE Loss: 0.00245111
Epoch [52/100], Validation MSE Loss: 0.00255276
Epoch [52/100], Training Loss Comp: 0.00423427
Epoch [52/100], Validation Loss Comp: 0.01956219
Epoch [52/100]: Learning Rate = [1.0000000000000002e-06]

Early stopping triggered. Stopping training.





(Optional) Model can be saved after training

The function takes in 2 parameters:

    - Suffix of the preprocessor name, in the example below would be **autoencoder_main.pkl**
    - Save/load location: can either be "local" to load in the home folder or "bucketfs" to load from Exasol BucketFS

In [11]:
autoencoder.save("local","main")

Saved weight to autoencoder_main.pth


### Use trained model to clean data

In [16]:
cleaned_data = autoencoder.clean(dirty_loader=dirty_loader,
                                test_loader=test_loader,
                                df=X_dirty,
                                batch_size=batch_size,
                                continous_columns=continous_columns, 
                                categorical_columns=categorical_columns, 
                                og_columns=og_columns,
                                onehotencoder=preprocessor.encoder, 
                                scaler=preprocessor.scaler,
                                device=device) 

Clean progress: 100%|██████████| 76/76 [00:04<00:00, 18.33it/s]


MAE: 0.00116814

MSE: 0.00012697





In [13]:
# original data
print(tabulate(df.loc[[28296,28217,8054,4223,22723],og_columns],headers=og_columns,tablefmt="simple",maxcolwidths=[None, 4]))

         age  workclass         education      education.num  marital.status      occupation         relationship    race                sex       capital.gain    capital.loss    hours.per.week  native.country
-----  -----  ----------------  -----------  ---------------  ------------------  -----------------  --------------  ------------------  ------  --------------  --------------  ----------------  ----------------
28296     48  ?                 9th                        5  Separated           ?                  Not-in-family   Amer-Indian-Eskimo  Female               0               0                20  United-States
28217     28  ?                 HS-grad                    9  Separated           ?                  Unmarried       White               Female               0               0                40  United-States
 8054     38  Self-emp-not-inc  9th                        5  Divorced            Craft-repair       Not-in-family   White               Male                 0 

In [17]:
# cleaned data
print(tabulate(cleaned_data.loc[[28296,28217,8054,4223,22723]],headers=cleaned_data.columns.to_list(),tablefmt="simple",maxcolwidths=[None, 4]))

         age  workclass         education      education.num  marital.status      occupation         relationship    race                sex       capital.gain    capital.loss    hours.per.week  native.country
-----  -----  ----------------  -----------  ---------------  ------------------  -----------------  --------------  ------------------  ------  --------------  --------------  ----------------  ----------------
28296     48  ?                 9th                        6  Separated           ?                  Not-in-family   Amer-Indian-Eskimo  Female           -4947             230                30  United-States
28217     31  ?                 HS-grad                    9  Separated           ?                  Unmarried       White               Female           -2421              -7                37  United-States
 8054     36  Self-emp-not-inc  9th                        5  Divorced            Craft-repair       Not-in-family   White               Male              2361 

### Use trained model to anonymize data

In [None]:
anonymized_data = autoencoder.anonymize(df=X_test,
                                        data_loader=test_loader,
                                        batch_size=batch_size,
                                        device=device)

In [None]:
# anonymized data
print(tabulate(anonymized_data.round(decimals=4).iloc[:5,:32],headers=anonymized_data.columns.to_list(),tablefmt="simple",maxcolwidths=[None, 6]))