In [1]:
# wandb and pytorch manual 
# https://colab.research.google.com/drive/1XDtq-KT0GkX06a_g1MevuLFOMk4TxjKZ#scrollTo=bZpt5W2NNl6S


In [2]:
from __future__ import print_function
import os
import argparse
import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
# from torchvision import datasets, transforms

import logging
logging.propagate = False
logging.getLogger().setLevel(logging.ERROR)

import warnings
warnings.filterwarnings(action='ignore')

from datetime import datetime
import wandb

from torch.optim.lr_scheduler import StepLR
from tqdm import tqdm
from sklearn.metrics import f1_score, classification_report

In [3]:
!wandb login

[34m[1mwandb[0m: Currently logged in as: [33mbeomgon-yu[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [4]:

now = datetime.now().strftime('%Y%m%d_%H%M%S')
wandb.init(project='Credit Card Fraud Detection',  name=now, mode='online')
wandb.watch_called = False

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

config =wandb.config
config.batch_size = 256
config.epochs = 100
config.lr = 1e-2
config.momentum = 0.1
config.weight_decay = 1e-4
config.device = device
config.seed = 42
config.log_interval = 10
config.num_workers = 8
config.adam = True


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mbeomgon-yu[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [5]:
config.device

'cuda'

# set seed for reproduce

In [6]:
def seed_everything(seed) :
    random.seed(seed)
    os.environ['PYHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True


# dataset

In [7]:
# Pandas is 18 times slower than Numpy (15.8ms vs 0.874 ms). Pandas is 20 times slower than Numpy
# therefore Use numpy for faster data loading

class CDataset(Dataset):
    def __init__(self, df, eval_mode=False):
        self.df = df
        self.eval_mode = eval_mode
        if self.eval_mode:
            self.labels = self.df['Class'].values
            self.df = self.df.drop(columns=['Class']).values
        else:
            self.df = self.df.values
        
    def __getitem__(self, index):
        if self.eval_mode:
            self.x = self.df[index]
            self.y = self.labels[index]
            return torch.Tensor(self.x), self.y
        else:
            self.x = self.df[index]
            return torch.Tensor(self.x)
        
    def __len__(self):
        return len(self.df)

In [8]:
# train_dataset = CDataset(train_df)
# train_loader = DataLoader(train_dataset, batch_size = config.batch_size, shuffle=True, num_workers=config.num_workers)

# val_dataset = CDataset(val_df, eval_mode=True)
# val_loader = DataLoader(val_dataset, batch_size = config.batch_size, shuffle=False, num_workers=config.num_workers)

## model

In [9]:
class AutoEncoder(nn.Module) :
    def __init__(self) :
        super().__init__()
        # self.act = nn.ReLU()
        self.act = nn.GELU()
        # self.act = nn.LeakyReLU()
        
        self.dim = 30
        self.hidden1 = 64
        self.hidden2 = 128
        
        self.encoder = nn.Sequential(
            nn.BatchNorm1d(self.dim),
            nn.Linear(self.dim,self.hidden1),
            nn.BatchNorm1d(self.hidden1),
            self.act,
            nn.Linear(self.hidden1,self.hidden2),
            nn.BatchNorm1d(self.hidden2),
            self.act,
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(self.hidden2,self.hidden1),
            nn.BatchNorm1d(self.hidden1),
            self.act,
            nn.Linear(self.hidden1,self.dim),
        )
        
        self.encoder2 = nn.Sequential(
            nn.BatchNorm1d(self.dim),
            nn.Linear(self.dim,self.hidden1),
            nn.BatchNorm1d(self.hidden1),
            self.act,
            nn.Linear(self.hidden1,self.hidden2),
            nn.BatchNorm1d(self.hidden2),
            self.act,
        )
        
        self.decoder2 = nn.Sequential(
            nn.Linear(self.hidden2,self.hidden1),
            nn.BatchNorm1d(self.hidden1),
            self.act,
            nn.Linear(self.hidden1,self.dim),
        )        
    

          # tied auto encoder
#         self.l1_weight = torch.randn(self.hidden1, self.dim) / torch.sqrt(torch.tensor(self.hidden1))
#         self.l1_bias = torch.zeros(self.hidden1)
        
#         self.l2_weight = torch.randn(self.hidden2, self.hidden1) / torch.sqrt(torch.tensor(self.hidden2))
#         self.l2_bias = torch.zeros(self.hidden2)

#         self.encoder[1].weight = nn.Parameter(self.l1_weight)
#         self.encoder[1].bais = nn.Parameter(self.l1_bias)
#         self.encoder[4].weight = nn.Parameter(self.l2_weight)
#         self.encoder[4].bias = nn.Parameter(self.l2_bias)

        
#         self.decoder[0].weight = nn.Parameter(self.l2_weight.transpose(0,1))
#         self.encoder[0].bais = nn.Parameter(self.l2_bias)
#         self.encoder[3].weight = nn.Parameter(self.l1_weight.transpose(0,1))
#         self.encoder[3].bias = nn.Parameter(self.l1_bias)   
        
        for m in self.modules():
            if isinstance(m, (nn.Conv2d, nn.Linear)):
                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
            elif isinstance(m, (nn.BatchNorm2d, nn.BatchNorm1d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)          
        
    def forward(self, x) :
        
        x = self.encoder(x)
        x1 = self.decoder(x)
        
        x = self.encoder(x1)
        x = self.decoder(x)
        
        return x1, x
        

In [10]:
class Trainer() :
    def __init__(self, model, optimizer, train_loader, val_loader, scheduler, config) :
        self.model = model
        self.optimizer = optimizer
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.scheduler = scheduler
        self.device = config.device
        self.epochs = config.epochs
        self.lr = config.lr
        
        self.criterion = nn.L1Loss().to(self.device)
        
    def fit(self,) :
        self.model.to(self.device)
        best_score = 0
        for epoch in range(self.epochs) :
            self.model.train()
            train_loss = []
            
            for x in iter(self.train_loader) :
                x = x.to(self.device)
                x1, _x = self.model(x)
                
                loss = self.criterion(x, _x)
                loss = self.criterion(x, _x) + 0.5*self.criterion(x, x1)
                
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
                
                train_loss.append(loss.item())
                wandb.log({'train loss' : loss.item()})
                
            score = self.validation(self.model, 0.95)
            
            if self.scheduler is not None :
                self.scheduler.step(score)
            
            print(f'epoch :[{epoch}] train loss [{np.mean(train_loss)}] val score [{score}]')
            # for param_group in self.optimizer.param_groups:
            #     print(param_group['lr'])      
            
            # print(f'epoch :[{epoch}] train loss [{np.mean(train_loss)}] val score [{score}] lr [{self.scheduler.get_lr()}]')

            if best_score < score :
                best_score = score
                torch.save(self.model.state_dict(), '../saved/best_model.pth')
            
    def validation(self, eval_model, threshold) :
        cos = nn.CosineSimilarity(dim=1, eps=1e-6)
        eval_model.eval()
        pred_y = []
        true_y = []
        
        with torch.no_grad():
            for x, y in iter(self.val_loader) :
                x = x.to(self.device)
                y = y.to(self.device)
                
                _, _x = self.model(x)
                diff = cos(x, _x).cpu().tolist()
                batch_pred = np.where(np.array(diff) < threshold, 1, 0).tolist()
                pred_y += batch_pred
                true_y += y.tolist()
                
        f1 = f1_score(true_y, pred_y, average='macro')
        wandb.log({'f1_score' : f1})
                
        return f1

In [11]:

def main(config) :

    train_df = pd.read_csv('../dataset/train.csv')
    val_df = pd.read_csv('../dataset/val.csv')
    train_df = train_df.drop(columns=['ID'])
    val_df = val_df.drop(columns=['ID'])  
    print(train_df.shape)
    
    train_dataset = CDataset(train_df)
    train_loader = DataLoader(train_dataset, batch_size = config.batch_size, shuffle=True, num_workers=config.num_workers)

    val_dataset = CDataset(val_df, eval_mode=True)
    val_loader = DataLoader(val_dataset, batch_size = config.batch_size, shuffle=False, num_workers=config.num_workers)    
    
    seed_everything(config.seed)    

    model = AutoEncoder()
    model.eval()
    
    if config.adam :
        optimizer = torch.optim.Adam(params=model.parameters(), lr = config.lr)
    else :
        optimizer = torch.optim.SGD(model.parameters(), config.lr,
                                    momentum=config.momentum,
                                    weight_decay=config.weight_decay)
    
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=10, threshold_mode='abs', min_lr=1e-8, verbose=True)
    # scheduler = StepLR(optimizer, step_size=50, gamma=0.2)
    
    wandb.watch(model, log='all')

    trainer = Trainer(model, optimizer, train_loader, val_loader, scheduler, config)
    
    wandb.save('model.h5')


    trainer.fit()

In [None]:
if __name__ == '__main__':
    main(config)

(113842, 30)
epoch :[0] train loss [0.40344571193282525] val score [0.4900811722183891]
epoch :[1] train loss [0.24218838666931966] val score [0.5032817761373315]
epoch :[2] train loss [0.21708972176139274] val score [0.5046651501082147]
epoch :[3] train loss [0.20629896156573563] val score [0.5070757950116161]
epoch :[4] train loss [0.19740314808454407] val score [0.5090974190861624]
epoch :[5] train loss [0.1919158743003781] val score [0.510969637786937]
epoch :[6] train loss [0.18862779763307463] val score [0.511433290776233]
epoch :[7] train loss [0.18013745583175272] val score [0.5267313678808235]
epoch :[8] train loss [0.176950240938851] val score [0.5254002744367214]
epoch :[9] train loss [0.1739639648895585] val score [0.5362409406460878]
epoch :[10] train loss [0.17052484169769822] val score [0.541668475427458]
epoch :[11] train loss [0.16739430054185095] val score [0.5507171941799006]
