In [3]:
from imblearn.over_sampling import SMOTE, ADASYN, KMeansSMOTE, SVMSMOTE
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, f1_score, precision_recall_curve
from sklearn.preprocessing import StandardScaler
import pickle

## Data Loading

In [83]:
data = pd.read_csv('preprocessed.csv', index_col=0)
data.head(5)

Unnamed: 0,merchant,category,amt,gender,street,city,zip,city_pop,job,merch_lat,merch_long,hour,age,is_fraud
0,1.519014,1.615782,-0.407826,-0.909206,-0.207491,-0.232599,-0.209999,-0.282589,-0.316898,-0.494354,0.593864,-1.878145,-0.872823,0
1,0.900957,1.548622,0.230039,-0.909206,-0.207491,-0.232599,-0.209999,-0.29367,-0.281569,2.078699,-2.030341,-1.878145,-0.314203,0
2,-0.667123,-0.616973,0.934149,1.099861,-0.207491,-0.232599,-0.209999,-0.280406,0.744516,0.902849,-1.592323,-1.878145,0.629736,0
3,-0.426116,-0.204031,-0.158132,1.099861,1.005067,1.135357,1.018859,-0.287742,0.155359,1.662886,-1.621848,-1.878145,0.343422,0
4,-0.367463,-0.493928,-0.177094,1.099861,-0.207491,-0.232599,-0.209999,-0.293835,-0.369541,0.026941,0.841909,-1.878145,-0.760752,0


In [84]:
# Straitified Train-Test split
y = data['is_fraud'].values
X = data.drop(columns=['is_fraud'], inplace=False).values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, 
                                                    random_state=100, stratify=y)
print(f"% of positive in train data: {y_train[np.where(y_train==1)].shape[0]/y_train.shape[0]*100}")
print(f"% of positive in test data: {y_test[np.where(y_test==1)].shape[0]/y_test.shape[0]*100}")


% of positive in train data: 0.5788434588706052
% of positive in test data: 0.5789158438366799


In [123]:
# sm = SMOTE(random_state=100)
# X_train, y_train = sm.fit_resample(X_train, y_train)
# print(X_train.shape, y_train.shape)
# print(f"% of positive in train data: {y_train[np.where(y_train==1)].shape[0]/y_train.shape[0]*100}")

## DevNet Implementation
* <font size=4>***This model is developed as guided by the paper "Deep Anomaly Detection with Deviation Networks", link- https://arxiv.org/pdf/1911.08623***</font>
* <font size=4>***Without oversampling the model achieved 93% precision, though precision is very poor. If precision is the only metric to consider this model will be the right choice***</font>

### Scoring Net

In [10]:
class ScoringNet(nn.Module):
    def __init__(self, input_size):
        '''2 hidden layers, 64 and 32 neurons and output is a real number'''
        super(ScoringNet, self).__init__()
        self.l1 = nn.Linear(input_size,64) # 64 units
        self.l1_ac = nn.ReLU()
        self.l2 = nn.Linear(64, 32) # 32 Units
        self.l2_ac = nn.ReLU()
        self.l3 = nn.Linear(32, 16) # 16 Units
        self.l3_ac = nn.ReLU()
        self.l4 = nn.Linear(16, 1)
    def forward(self,x):
        out = self.l1(x)
        out = self.l1_ac(out)
        out = self.l2(out)
        out = self.l2_ac(out)
        out = self.l3(out)
        out = self.l3_ac(out)
        out = self.l4(out)
        return out

### Deviation Loss

In [12]:
class DeviationLoss(nn.Module):
    def __init__(self):
       super(DeviationLoss, self).__init__()
    def forward(self,y_pred,y_true):
        """
        1. Randomly sample l=5000 N(0,1) points. Calculate mu and sigma
        2. dev_i = (y_pred[i] - mu)/sigma
        3. cost_i = (1-y_true_i)*|dev_i| + y_true_i*max(0, a-dev_i), here a = 3
        4. Loss = sum of cost_i
        """
        prior_scores = np.random.normal(size=5000)
        mu, sigma = np.mean(prior_scores), np.std(prior_scores)
        dev = (y_pred - mu)/sigma
        zeros = torch.tensor(np.zeros(dev.shape)).type(torch.float32).to('cuda')
        cost = (1-y_true)*torch.abs(dev) + y_true*torch.max(zeros,3-dev)
        loss = torch.mean(cost)
        return loss

### Data Loader and Model Training

In [14]:
class CustomDataset(Dataset):
    def __init__(self, X_train, y_train):
        mask_0 = np.where(y_train==0)
        mask_1 = np.where(y_train==1)
        X_0, X_1, y_0, y_1 = X_train[mask_0], X_train[mask_1], y_train[mask_0], y_train[mask_1]
        self.X_0 = torch.tensor(X_0).type(torch.float32)
        self.X_1 = torch.tensor(X_1).type(torch.float32)
        self.y_0 = torch.tensor(y_0.reshape((-1,1))).type(torch.float32)
        self.y_1 = torch.tensor(y_1.reshape((-1,1))).type(torch.float32)
        self.n_samples = min(self.X_1.shape[0], self.X_0.shape[0])
    def __len__(self):
        return self.n_samples
    def __getitem__(self, index):
        return (self.X_0[index], self.y_0[index],self.X_1[index], self.y_1[index])

In [109]:
def train(X_train,y_train):
    device_gpu = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    batch_size = 100
    input_size = X_train.shape[1]
    lr = 0.005
    model = ScoringNet(input_size)
    model.to(device_gpu)
    loss_fn = DeviationLoss()
    # optimizer = torch.optim.SGD(model.parameters(),lr = lr)
    optimizer = torch.optim.Adam(model.parameters(), lr = lr)
    epochs = 200
    # get the data loader
    dataset = CustomDataset(X_train, y_train)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    total_steps = len(dataloader)
    for i in range(epochs):
        for j, (X_0, y_0, X_1, y_1) in enumerate(dataloader):
            # forward pass
            X = torch.vstack((X_0, X_1)).to(device_gpu)
            y = torch.vstack((y_0, y_1)).to(device_gpu)
            outputs = model(X)
            loss = loss_fn(outputs, y)
            # Backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # if (j+1)%100 == 0:
            # if (i+1)%10 == 0:
            print(f"At epoch {i+1}, step {j}/{total_steps}, deviation loss = {loss.item():0.5f}")
    return model

In [111]:
model = train(X_train, y_train)

At epoch 1, step 0/53, deviation loss = 1.46711
At epoch 1, step 1/53, deviation loss = 1.39668
At epoch 1, step 2/53, deviation loss = 1.25659
At epoch 1, step 3/53, deviation loss = 1.26052
At epoch 1, step 4/53, deviation loss = 1.27604
At epoch 1, step 5/53, deviation loss = 1.19588
At epoch 1, step 6/53, deviation loss = 1.11873
At epoch 1, step 7/53, deviation loss = 1.12488
At epoch 1, step 8/53, deviation loss = 1.06058
At epoch 1, step 9/53, deviation loss = 0.98888
At epoch 1, step 10/53, deviation loss = 0.84442
At epoch 1, step 11/53, deviation loss = 0.63749
At epoch 1, step 12/53, deviation loss = 0.72403
At epoch 1, step 13/53, deviation loss = 0.57809
At epoch 1, step 14/53, deviation loss = 0.57689
At epoch 1, step 15/53, deviation loss = 0.57270
At epoch 1, step 16/53, deviation loss = 0.55401
At epoch 1, step 17/53, deviation loss = 0.56196
At epoch 1, step 18/53, deviation loss = 0.60309
At epoch 1, step 19/53, deviation loss = 0.55352
At epoch 1, step 20/53, deviat

### Test Performance

In [113]:
X_test_tensor = torch.tensor(X_test).type(torch.float32)
y_test_score = model.to('cpu')(X_test_tensor)
# y_test_score

In [115]:
# cutoff - 1.96
y_test_score_np = y_test_score.cpu().detach().numpy()
pred_fn = lambda x: 1 if x >= 1.96 else 0
pred_fn = np.vectorize(pred_fn)
y_pred = pred_fn(y_test_score_np)
# y_pred

In [117]:
cm_test = confusion_matrix(y_test, y_pred)
cm_test

array([[371656,  15095],
       [   154,   2098]])

In [119]:
f1_score(y_test, y_pred)

0.21578812033941888

In [121]:
print(f'Recall = {cm_test[1,1]/(cm_test[1,0]+cm_test[1,1])}')
print(f'Precison = {cm_test[1,1]/(cm_test[0,1]+cm_test[1,1])}')

Recall = 0.9316163410301954
Precison = 0.12202640609550398


In [33]:
net = ScoringNet(13)
X = torch.tensor(X_train).type(torch.float32)
y_pred = net(X)
y_pred.shape

torch.Size([907672, 1])

In [37]:
y_actual = torch.tensor(y_train).type(torch.float32)
y_actual.shape

torch.Size([907672])

In [58]:
loss_fn = DeviationLoss()
loss_fn(y_pred[:10], y_actual[:10])

tensor(0.0481, grad_fn=<MeanBackward0>)

In [71]:
dataset = CustomDataset(X_train, y_train)
datloader = DataLoader(dataset, batch_size=64)
dataitter = iter(datloader)
next(dataitter)

[tensor([[-8.3674e-01, -7.9024e-01, -1.3168e-01, -9.0921e-01, -5.0078e-02,
          -5.5012e-02, -5.0469e-02, -2.7912e-01, -1.3709e-01,  1.2721e+00,
           1.2617e+00, -1.1805e-01, -1.4124e+00],
         [-8.8818e-01, -6.8481e-01, -2.2587e-01, -9.0921e-01, -2.4920e-02,
          -2.6631e-02, -2.4974e-02, -2.9216e-01, -7.6771e-03, -5.4508e-01,
          -1.1243e+00,  7.6199e-01, -8.4118e-01],
         [-2.8187e-01, -4.9393e-01, -2.3342e-01,  1.0999e+00, -2.0749e-01,
          -2.3260e-01, -2.1000e-01, -2.8909e-01, -2.9628e-01,  2.0712e-01,
           1.4712e-01, -5.5808e-01, -3.8236e-01],
         [-7.6163e-01, -7.7914e-01, -1.7304e-01,  1.0999e+00, -2.1625e-01,
          -2.4248e-01, -2.1888e-01, -5.7920e-02, -2.7775e-01, -1.6496e+00,
          -3.1943e-02,  3.2197e-01, -1.2208e+00],
         [ 1.9127e+00,  2.1942e+00, -4.1905e-01,  1.0999e+00, -7.7209e-03,
          -7.2270e-03, -7.5432e-03, -2.8669e-01, -8.8732e-02,  4.6214e-01,
           1.1246e+00, -9.9810e-01,  2.0711e+00],
