In [1]:
import torch
from torch.utils.data import DataLoader
import torch.nn.functional as F
import pytorch_lightning as pl
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [2]:
class FraudyNet(pl.LightningModule):
    def __init__(
        self,
        lr: float = 1e-2,
        momentum: float = 0.9,
        batch_size: int = 128,
        train_size: float = 0.7,
        prepare_test: bool = False
    ):
        super().__init__()
        self.encoder = torch.nn.Sequential(
            torch.nn.Linear(29, 14),
            torch.nn.Tanh(),
            torch.nn.Linear(14, 7),
            torch.nn.Tanh()
        )
        self.decoder = torch.nn.Sequential(
            torch.nn.Linear(7, 14),
            torch.nn.Tanh(),
            torch.nn.Linear(14, 29),
            torch.nn.Tanh()
        )
        self.lr = lr
        self.momentum = momentum
        self.batch_size = batch_size
        self.train_size = train_size
        self.prepare_test = prepare_test

    def forward(self, x):
        z = self.encoder(x)
        return self.decoder(z)

    def training_step(self, batch, batch_idx):
        x = batch
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        loss = F.mse_loss(x_hat, x)
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return {
            'loss': loss
        }

    def validation_step(self, batch, batch_idx):
        x = batch
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        loss = F.mse_loss(x_hat, x)
        self.log('val_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return {
            'loss': loss
        }

    def configure_optimizers(self):
        return torch.optim.Adam(
            self.parameters(),
            lr=self.lr,
            #momentum=self.momentum
        )

In [3]:
class DataModule(pl.LightningDataModule):
    def __init__(self, train_size: float = 0.75, batch_size = 128):
        super().__init__()
        self.train_size = train_size
        self.batch_size = batch_size

    def prepare_data(self):
        df = pd.read_csv('creditcard.csv')
        df = df.drop(['Time'], axis=1)
        df['Amount'] = StandardScaler().fit_transform(df['Amount'].values.reshape(-1,1))
        
        x_train, x_test = train_test_split(df, test_size=0.25, random_state = 42)

        # remove fraud for train dataset
        x_train = x_train[x_train['Class'] == 0]
        self.x_train = x_train.drop('Class', axis = 1)

        # keep fraud and non-fraud in test dataset
        self.y_test = x_test['Class'].values
        self.x_test = x_test.drop('Class', axis = 1)

        self.train_ds = torch.FloatTensor(self.x_train.values)
        self.val_ds = torch.FloatTensor(self.x_test.values)

    def train_dataloader(self):
        return DataLoader(
            self.train_ds,
            batch_size=self.batch_size,
            drop_last=True,
            shuffle=True,
            #num_workers=8
        )

    def val_dataloader(self):
        return DataLoader(
            self.val_ds,
            batch_size=self.batch_size,
            drop_last=False,
            #num_workers=8
        )

In [4]:
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import EarlyStopping

logger = TensorBoardLogger("logs/", name="fraud_detection")
early = EarlyStopping(monitor="val_loss", min_delta=0.01)

model = FraudyNet()
dm = DataModule()



trainer = pl.Trainer(
    accelerator="auto",
    max_epochs=100,
    logger=logger,
    )

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [5]:
trainer.fit(model, dm)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Sequential | 525   
1 | decoder | Sequential | 547   
---------------------------------------
1.1 K     Trainable params
0         Non-trainable params
1.1 K     Total params
0.004     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [None]:
#console: tensorboard --logdir <logdir>

In [None]:
# error_groupby = model.error_reconstruction_analysis()

# # Error for every transaction entries. Those with high errors are supposed to be fraud transaction (true=1)
# display(model.error_df)

# # Analyse errors for both normal and fraud transactions
# display(error_groupby)