In [1]:
from src.tabular import models
from src.tabular import trainers
from torch.utils.data import TensorDataset, DataLoader
import pandas as pd
import torch
from datetime import datetime as dt
import numpy as np

In [2]:
def load_dataset(train=True):
    if train:
        df_train = pd.read_csv("../data/processed/train_transactions_v2.csv")
        # Train only on normal data
        df_train = df_train[df_train["isFraud"] == 0]
        df_target = df_train["isFraud"]
        data = torch.Tensor(df_train.drop(columns=["isFraud"]).to_numpy())
        targets = torch.Tensor(df_target.to_numpy())
    else:
        df_test = pd.read_csv("../data/processed/test_transactions_v2.csv")
        data = torch.Tensor(df_test.to_numpy())
        targets = df_test.index
    return data, targets


def submission_df(indices: pd.Series, y_pred: np.array):
    sub = pd.DataFrame(columns=['TransactionID'], data=indices)
    sub = pd.concat([sub, pd.DataFrame(y_pred, columns=['isFraud'])], axis=1)
    return sub

In [3]:
path_to_train_set = '../data/processed/train_transactions_v2.csv'
path_to_test_set = '../data/processed/test_transactions_v2.csv'

X_train, y_train = load_dataset(train=True)
n_features = X_train.shape[1]
batch_size = 128
n_epochs = 200
lr = 1e-04
device='cuda'

# Models & trainers
models_to_train = [models.DAGMM(in_features=n_features, latent_dim=1, K=4, device=device)]
trainers_to_train = [trainers.DAGMMTrainer(model=models_to_train[0], device=device, batch_size=batch_size, n_epochs=n_epochs, lr=lr)]

# Training and test data
X_test, test_index = load_dataset(train=False)
ds = TensorDataset(X_train, y_train)
train_ldr = DataLoader(ds, batch_size=batch_size)
test_ldr = DataLoader(TensorDataset(X_test, torch.zeros(len(X_test))), batch_size=batch_size)

## DeepSVDD model

In [4]:
# Training & evaluation
model = models.DeepSVDD(in_features=n_features)
trainer = trainers.DeepSVDDTrainer(model=model, device=device, batch_size=batch_size, n_epochs=n_epochs, lr=lr)
print("Training model {} on IEEE Fraud Detection dataset {}".format(model.print_name(), X_train.shape))
# Train
trainer.train(train_ldr, None)
# Evaluate model
trainer.model.eval()
_, logits = trainer.test(test_ldr)
y_pred = (1 - torch.sigmoid(torch.Tensor(logits))).detach().cpu()
# Create submission file
submission = submission_df(test_index)
submission = pd.concat([submission, pd.DataFrame(y_pred, columns=['isFraud'])], axis=1)
submission.to_csv("submissions/{}_kaggle_submission.csv".format(model.print_name()), index=False)
trainer.model.save("../models/{}_ieee_fraud_detection_{}.pklz".format(model.print_name(), dt.now()))

Training model DeepSVDD on IEEE Fraud Detection dataset torch.Size([1139754, 432])
Initializing center c...
Center c initialized.
Started training


 21%|██        | 1888/8905 [00:26<01:36, 72.52it/s, epoch=1, loss=1142.335]


KeyboardInterrupt: 

## DAGMM model

In [None]:
# Training & evaluation
for model, trainer in zip(models_to_train, trainers_to_train):
    print("Training model {} on IEEE Fraud Detection dataset with shape {}".format(model.print_name(), X_train.shape))
    # Train
    trainer.train(train_ldr, None)
    # Generate predictions on the test set
    trainer.model.eval()
    _, logits = trainer.test(test_ldr)
    y_pred = (1 - torch.sigmoid(torch.Tensor(logits))).detach().cpu()
    # Create submission file
    submission = submission_df(test_index, y_pred)
    submission.to_csv("submissions/{}_kaggle_submission.csv".format(model.print_name()), index=False)
    trainer.model.save("../models/{}_ieee_fraud_detection.pklz".format(model.print_name()))

Training model DAGMM on IEEE Fraud Detection dataset with shape torch.Size([569877, 432])
Started training


L = torch.cholesky(A)
should be replaced with
L = torch.linalg.cholesky(A)
and
U = torch.cholesky(A, upper=True)
should be replaced with
U = torch.linalg.cholesky(A.transpose(-2, -1).conj()).transpose(-2, -1).conj() (Triggered internally at  ..\aten\src\ATen\native\BatchLinearAlgebra.cpp:1284.)
  inv_cov_mat = torch.cholesky_inverse(torch.cholesky(cov_mat))
100%|█████████▉| 4452/4453 [07:44<00:00,  9.59it/s, epoch=1, loss=6106.427]
 55%|█████▍    | 2441/4453 [04:12<03:23,  9.90it/s, epoch=2, loss=2171.516]

## NeuTraLAD Model

In [5]:
# Training & evaluation
model = models.NeuTraAD(in_features=n_features, temperature=0.07, dataset='IEEEFraudDetection', device=device)
trainer = trainers.NeuTraADTrainer(model=model, device=device, batch_size=batch_size, n_epochs=n_epochs, lr=lr)
print("Training model {} on IEEE Fraud Detection dataset {}".format(model.print_name(), X_train.shape))
# Train
trainer.train(train_ldr, None)
# Evaluate model
trainer.model.eval()
_, logits = trainer.test(test_ldr)
y_pred = (1 - torch.sigmoid(torch.Tensor(logits))).detach().cpu()
# Create submission file
submission = submission_df(test_index, y_pred)
submission.to_csv("submissions/{}_kaggle_submission.csv".format(model.print_name()), index=False)
trainer.model.save("../models/{}_ieee_fraud_detection.pklz".format(model.print_name()))

Training model neuTraAD on IEEE Fraud Detection dataset torch.Size([1139754, 432])
Started training


100%|█████████▉| 8904/8905 [27:40<00:00,  5.36it/s, epoch=1, loss=135304.435]


## ALAD Model

In [6]:
batch_size = 1024
# Training & evaluation
model = models.ALAD(in_features=n_features, out_features=128, latent_dim=n_features//2, device=device)
trainer = trainers.ALADTrainer(model=model,  device=device, batch_size=batch_size, n_epochs=n_epochs, lr=lr)
# Train
trainer.train(train_ldr, None)
# Evaluate model
trainer.model.eval()
_, logits = trainer.test(test_ldr)
y_pred = (1 - torch.sigmoid(torch.Tensor(logits))).detach().cpu()
# Create submission file
submission = submission_df(test_index, y_pred)
submission.to_csv("submissions/{}_kaggle_submission.csv".format(model.print_name()), index=False)
trainer.model.save("../models/{}_ieee_fraud_detection.pklz".format(model.print_name()))

100%|██████████| 8905/8905 [36:31<00:00,  4.06it/s, loss_d=4.1684, loss_ge=4.1572]


OSError: [Errno 22] Invalid argument: "submissions/<class 'src.tabular.models.Adversarial.ALAD'>_kaggle_submission.csv"