In [82]:
import torch
import pytorch_lightning as pl
from torch.utils.data import DataLoader
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [83]:
class FraudyNet(pl.LightningModule):
    def __init__(self, lr: float = 1e-2, momentum: float = 0.9, prepare_test: bool = False):
        super().__init__()
        self.encoder = torch.nn.Sequential(
            torch.nn.Linear(30, 20),
            torch.nn.ReLU(),
            torch.nn.Linear(20, 10)
        )
        self.decoder = torch.nn.Sequential(
            torch.nn.Linear(10, 20),
            torch.nn.ReLU(),
            torch.nn.Linear(20, 30)
        )
        self.lr = lr
        self.momentum = momentum
        self.prepare_test = prepare_test

    def forward(self, x):
        z = self.encoder(x)
        return self.decoder(z)

    def training_step(self, batch, batch_idx):
        x = batch
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        loss = torch.nn.functional.mse_loss(x_hat, x)
        self.log('train_loss', loss, on_epoch=True, prog_bar=True, logger=True)
        return {'loss': loss}

    def validation_step(self, batch, batch_idx):
        x = batch
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        loss = torch.nn.functional.mse_loss(x_hat, x)
        self.log('val_loss', loss, on_epoch=True, prog_bar=True, logger=True)
        return {'loss': loss}

    def configure_optimizers(self):
        return torch.optim.SGD(self.parameters(),lr=self.lr,momentum=self.momentum)

In [84]:
class Autoencoder(pl.LightningModule):
    def __init__(self, lr: float = 1e-2, momentum: float = 0.9, prepare_test: bool = False):
        super().__init__()
        self.encoder = torch.nn.Sequential(
            torch.nn.Linear(30, 14),
            torch.nn.Tanh(),
            torch.nn.Linear(14, 7),
            torch.nn.Tanh()
        )
        self.decoder = torch.nn.Sequential(
            torch.nn.Linear(7, 14),
            torch.nn.Tanh(),
            torch.nn.Linear(14, 30),
            torch.nn.Tanh()
        )
        self.lr = lr
        self.momentum = momentum
        self.prepare_test = prepare_test

    def forward(self, x):
        z = self.encoder(x)
        return self.decoder(z)

    def training_step(self, batch, batch_idx):
        x = batch
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        loss = torch.nn.functional.mse_loss(x_hat, x)
        self.log('train_loss', loss, on_epoch=True, prog_bar=True)
        return {'loss': loss}

    def validation_step(self, batch, batch_idx):
        x = batch
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        loss = torch.nn.functional.mse_loss(x_hat, x)
        self.log('val_loss', loss, on_epoch=True, prog_bar=True)
        return {'loss': loss}
    
    def test_step(self, batch, batch_idx):
        x, y = batch
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        loss = torch.nn.functional.mse_loss(x_hat, y)
        self.log('test_loss', loss, on_epoch=True, prog_bar=True)
        return {'loss': loss}

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.lr)

In [85]:
class DataModule(pl.LightningDataModule):
    def __init__(self, train_size: float = 0.7, batch_size = 128):
        super().__init__()
        self.train_size = train_size
        self.batch_size = batch_size

    def prepare_data(self):
        df = pd.read_csv('creditcard.csv')
        df['TimeNorm'] = df['Time'].apply(lambda x: np.ceil(float(x)/3600) % 24.)

        scl = StandardScaler()
        df['TimeNorm'] = scl.fit_transform(df['TimeNorm'].values.reshape(-1,1))
        df['NormAmt'] = scl.fit_transform(df['Amount'].values.reshape(-1,1))
        
        df = df.drop(['Time', 'Amount'], axis = 1)
        
        x_train, x_test = train_test_split(
            df, 
            test_size=(1-self.train_size),
            random_state = 42
        )

        # remove fraud for train dataset
        x_train = x_train[x_train['Class'] == 0]
        self.x_train = x_train.drop('Class', axis = 1)


        # keep fraud and non-fraud in test dataset
        self.y_test = x_test['Class'].values
        self.x_test = x_test.drop('Class', axis = 1)

        self.train_ds = torch.FloatTensor(self.x_train.values)
        self.val_ds = torch.FloatTensor(self.x_test.values)
        self.test_ds = torch.FloatTensor(self.x_test.values, self.y_test)

    def train_dataloader(self):
        return DataLoader(
            self.train_ds,
            batch_size=self.batch_size,
            #drop_last=True,
            shuffle=True,
            #num_workers=8
        )

    def val_dataloader(self):
        return DataLoader(
            self.val_ds,
            batch_size=self.batch_size,
            #drop_last=False,
            #num_workers=8
        )
    
    def test_dataloader(self):
        return DataLoader(
            self.val_ds,
            batch_size=self.batch_size,
            #drop_last=False,
            #num_workers=8
        )

In [86]:
dm = DataModule()
model = Autoencoder()

trainer = pl.Trainer(accelerator="gpu", max_epochs=2, enable_model_summary=True)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [87]:
trainer.fit(model, dm)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Sequential | 539   
1 | decoder | Sequential | 562   
---------------------------------------
1.1 K     Trainable params
0         Non-trainable params
1.1 K     Total params
0.004     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=2` reached.


In [88]:
trainer.validate(model, dm)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Validation: 0it [00:00, ?it/s]

[{'val_loss': 0.619890034198761}]

In [89]:
trainer.test(model, dm)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

ValueError: too many values to unpack (expected 2)