In [1]:
from src.tabular import models
from src.tabular import trainers
from torch.utils.data import TensorDataset, DataLoader, Subset
import pandas as pd
import torch
from datetime import datetime as dt
import numpy as np

In [2]:
def load_dataset(train=True, pct: float = 1.):
    if train:
        df_train = pd.read_csv("../data/processed/train_transactions_v2.csv")
        # Train only on normal data
        df_train = df_train[df_train["isFraud"] == 0]
        data = torch.Tensor(df_train.drop(columns=["isFraud"]).to_numpy())
        perm = torch.randperm(data.size(0))
        idx = perm[:int(len(data) * pct)]
        data = data[idx]
        targets = torch.zeros(len(data))
    else:
        df_test = pd.read_csv("../data/processed/test_transactions_v2.csv")
        data = torch.Tensor(df_test.to_numpy())
        targets = []
    return data, targets


def submission_df(predictions: np.array):
    root = '../data/ieee-fraud-detection'
    ts_transaction = pd.read_csv(root + '/test_transaction.csv', index_col='TransactionID')
    #generate a single test dataset file
    __test_dataset_submission_columns = ts_transaction.index
    sub = pd.DataFrame(columns=['TransactionID'], data=__test_dataset_submission_columns)
    sub = pd.concat([sub, pd.DataFrame(predictions, columns=['isFraud'])], axis=1)
    return sub

In [3]:
path_to_train_set = '../data/processed/train_transactions_v2.csv'
path_to_test_set = '../data/processed/test_transactions_v2.csv'

X_train, y_train = load_dataset(train=True)
n_features = X_train.shape[1]
batch_size = 128
n_epochs = 200
lr = 1e-04
device='cuda'

# Models & trainers
models_to_train = [models.DAGMM(in_features=n_features, latent_dim=1, K=4, device=device)]
trainers_to_train = [trainers.DAGMMTrainer(model=models_to_train[0], device=device, batch_size=batch_size, n_epochs=n_epochs, lr=lr)]

# Training and test data
X_test, test_index = load_dataset(train=False)
ds = TensorDataset(X_train, y_train)
train_ldr = DataLoader(ds, batch_size=batch_size)
test_ldr = DataLoader(TensorDataset(X_test, torch.zeros(len(X_test))), batch_size=batch_size)

## DeepSVDD model

In [4]:
# Training & evaluation
model = models.DeepSVDD(in_features=n_features)
trainer = trainers.DeepSVDDTrainer(model=model, device=device, batch_size=batch_size, n_epochs=n_epochs, lr=lr)
print("Training model {} on IEEE Fraud Detection dataset {}".format(model.print_name(), X_train.shape))
# Train
trainer.train(train_ldr, None)
# Evaluate model
trainer.model.eval()
_, logits = trainer.test(test_ldr)
y_pred = (1 - torch.sigmoid(torch.Tensor(logits))).detach().cpu()
# Create submission file
submission = submission_df(y_pred)
submission.to_csv("submissions/{}_kaggle_submission.csv".format(model.print_name()), index=False)
trainer.model.save("../models/{}_ieee_fraud_detection_{}.pklz".format(model.print_name(), dt.now()))

Training model DeepSVDD on IEEE Fraud Detection dataset torch.Size([1139754, 432])
Initializing center c...
Center c initialized.
Started training


 21%|██        | 1888/8905 [00:26<01:36, 72.52it/s, epoch=1, loss=1142.335]


KeyboardInterrupt: 

## DAGMM model

In [None]:
# Training & evaluation
for model, trainer in zip(models_to_train, trainers_to_train):
    print("Training model {} on IEEE Fraud Detection dataset with shape {}".format(model.print_name(), X_train.shape))
    # Train
    trainer.train(train_ldr, None)
    # Generate predictions on the test set
    trainer.model.eval()
    _, logits = trainer.test(test_ldr)
    y_pred = (1 - torch.sigmoid(torch.Tensor(logits))).detach().cpu()
    # Create submission file
    submission = submission_df(y_pred)
    submission.to_csv("submissions/{}_kaggle_submission.csv".format(model.print_name()), index=False)
    trainer.model.save("../models/{}_ieee_fraud_detection.pklz".format(model.print_name()))

Training model DAGMM on IEEE Fraud Detection dataset with shape torch.Size([569877, 432])
Started training


L = torch.cholesky(A)
should be replaced with
L = torch.linalg.cholesky(A)
and
U = torch.cholesky(A, upper=True)
should be replaced with
U = torch.linalg.cholesky(A.transpose(-2, -1).conj()).transpose(-2, -1).conj() (Triggered internally at  ..\aten\src\ATen\native\BatchLinearAlgebra.cpp:1284.)
  inv_cov_mat = torch.cholesky_inverse(torch.cholesky(cov_mat))
100%|█████████▉| 4452/4453 [07:44<00:00,  9.59it/s, epoch=1, loss=6106.427]
100%|█████████▉| 4452/4453 [07:42<00:00,  9.63it/s, epoch=2, loss=4307.534]
100%|█████████▉| 4452/4453 [07:41<00:00,  9.65it/s, epoch=3, loss=4028.461]
100%|█████████▉| 4452/4453 [07:44<00:00,  9.58it/s, epoch=4, loss=3880.795]
100%|█████████▉| 4452/4453 [07:34<00:00,  9.80it/s, epoch=5, loss=3771.933]
100%|█████████▉| 4452/4453 [07:34<00:00,  9.79it/s, epoch=6, loss=3678.804]
100%|█████████▉| 4452/4453 [07:35<00:00,  9.77it/s, epoch=7, loss=3609.503]
100%|█████████▉| 4452/4453 [07:39<00:00,  9.68it/s, epoch=8, loss=3556.103]
100%|█████████▉| 4452/4453 [07:3

## NeuTraLAD Model

In [14]:
# Training & evaluation
model = models.NeuTraAD(in_features=n_features, temperature=0.07, dataset='IEEEFraudDetection', device=device)
trainer = trainers.NeuTraADTrainer(model=model, device=device, batch_size=batch_size, n_epochs=n_epochs, lr=lr)
print("Training model {} on IEEE Fraud Detection dataset {}".format(model.print_name(), X_train.shape))
# Train
trainer.train(train_ldr, None)
# Evaluate model
trainer.model.eval()
_, logits = trainer.test(test_ldr)
y_pred = (1 - torch.sigmoid(torch.Tensor(logits))).detach().cpu()
# Create submission file
submission = submission_df(y_pred)
submission.to_csv("submissions/{}_kaggle_submission.csv".format(model.print_name()), index=False)
trainer.model.save("../models/{}_ieee_fraud_detection.pklz".format(model.print_name()))

Training model neuTraAD on IEEE Fraud Detection dataset torch.Size([284938, 432])
Started training


100%|█████████▉| 2226/2227 [07:06<00:00,  5.21it/s, epoch=1, loss=50975.005]
100%|█████████▉| 2226/2227 [07:08<00:00,  5.20it/s, epoch=2, loss=39190.691]
100%|█████████▉| 2226/2227 [07:04<00:00,  5.24it/s, epoch=3, loss=30985.256]
100%|█████████▉| 2226/2227 [07:04<00:00,  5.24it/s, epoch=4, loss=26545.148]
100%|█████████▉| 2226/2227 [07:06<00:00,  5.22it/s, epoch=5, loss=23441.015]
100%|█████████▉| 2226/2227 [07:06<00:00,  5.22it/s, epoch=6, loss=20844.802]
100%|█████████▉| 2226/2227 [07:06<00:00,  5.22it/s, epoch=7, loss=18388.564]
100%|█████████▉| 2226/2227 [07:04<00:00,  5.24it/s, epoch=8, loss=16161.602]
100%|█████████▉| 2226/2227 [07:04<00:00,  5.24it/s, epoch=9, loss=14593.033]
100%|█████████▉| 2226/2227 [07:04<00:00,  5.25it/s, epoch=10, loss=13258.429]
100%|█████████▉| 2226/2227 [07:04<00:00,  5.24it/s, epoch=11, loss=12239.191]
100%|█████████▉| 2226/2227 [07:04<00:00,  5.24it/s, epoch=12, loss=11349.146]
100%|█████████▉| 2226/2227 [07:05<00:00,  5.23it/s, epoch=13, loss=10442.

Traceback (most recent call last):
  File "C:\Users\verdi\anaconda3\envs\anomaly_detection\lib\site-packages\IPython\core\interactiveshell.py", line 3444, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-3beb525a6fb8>", line 6, in <module>
    trainer.train(train_ldr, None)
  File "C:\Users\verdi\NRCAN\git\anomaly_detection\src\tabular\trainers\BaseTrainer.py", line 67, in train
    loss = self.train_iter(X)
  File "C:\Users\verdi\NRCAN\git\anomaly_detection\src\tabular\trainers\AutoEncoder.py", line 190, in train_iter
    scores = self.model(X)
  File "C:\Users\verdi\anaconda3\envs\anomaly_detection\lib\site-packages\torch\nn\modules\module.py", line 1051, in _call_impl
    return forward_call(*input, **kwargs)
  File "C:\Users\verdi\NRCAN\git\anomaly_detection\src\tabular\models\AutoEncoder.py", line 273, in forward
    return self.score(X)
  File "C:\Users\verdi\NRCAN\git\anomaly_detection\src\tabular\models\AutoEncoder.py", line 220, in sc

TypeError: object of type 'NoneType' has no len()

## ALAD Model

In [None]:
# Training & evaluation
model = models.ALAD(in_features=n_features, out_features=128, latent_dim=n_features//2, device=device)
trainer = trainers.ALADTrainer(model=model,  device=device, batch_size=batch_size, n_epochs=n_epochs, lr=lr)
# Train
trainer.train(train_ldr, None)
# Evaluate model
trainer.model.eval()
_, logits = trainer.test(test_ldr)
y_pred = (1 - torch.sigmoid(torch.Tensor(logits))).detach().cpu()
# Create submission file
submission = submission_df(y_pred)
submission.to_csv("submissions/{}_kaggle_submission.csv".format(model.print_name()), index=False)
trainer.model.save("../models/{}_ieee_fraud_detection.pklz".format(model.print_name()))

In [8]:
good_sub = pd.read_csv("submissions/dsvdd_kaggle_submission.csv")
bad_sub = pd.read_csv("submissions/DAGMM_kaggle_submission.csv")

complete_trans = good_sub['TransactionID']
incomplete_trans = bad_sub['TransactionID']

set(complete_trans) - set(incomplete_trans)
bad_sub.head(5)

Unnamed: 0,TransactionID,isFraud
0,0,0.996052
1,1,0.997732
2,2,0.997456
3,3,0.99753
4,4,0.99766


In [8]:
model = models.BaseModel.load("../models/neuTraAD_ieee_fraud_detection.pklz")
trainer = trainers.NeuTraADTrainer(model=model, device=device, batch_size=batch_size, n_epochs=n_epochs, lr=lr)

with torch.no_grad():
    model.eval()
    _, logits = trainer.test(test_ldr)
    y_pred = (torch.sigmoid(torch.Tensor(logits))).detach().cpu()
    # Create submission file
    submission = submission_df(y_pred)

In [9]:
submission.to_csv("submissions/{}_kaggle_submission.csv".format(model.print_name()), index=False)