In [6]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [83]:
cc_data = pd.read_csv("creditcard.csv")

In [84]:
transactionData = cc_data.drop(['Time'], axis=1)
transactionData['Amount'] = StandardScaler().fit_transform(transactionData['Amount'].values.reshape(-1, 1))
assert(len(X) == len(y))

X = transactionData.drop("Class", axis=1).values
y = transactionData['Class'].values

In [85]:
X_tensor = torch.as_tensor(X)
y_tensor = torch.as_tensor(y)

In [86]:
X_tensor.shape

torch.Size([284807, 29])

In [110]:
class binaryClassification(nn.Module):
    def __init__(self):
        super(binaryClassification, self).__init__()
        self.layer_1 = nn.Linear(29, 64) 
        self.layer_2 = nn.Linear(64, 64)
        self.layer_out = nn.Linear(64, 1) 
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.1)
        self.batchnorm1 = nn.BatchNorm1d(64)
        self.batchnorm2 = nn.BatchNorm1d(64)
        
    def forward(self, inputs):
        x = self.relu(self.layer_1(inputs))
        x = self.batchnorm1(x)
        x = self.relu(self.layer_2(x))
        x = self.batchnorm2(x)
        x = self.dropout(x)
        x = self.layer_out(x)
        
        return x



In [114]:
model = binaryClassification().double()

In [36]:
transactionData = cc_data.drop(['Time'], axis=1)
transactionData['Amount'] = StandardScaler().fit_transform(transactionData['Amount'].values.reshape(-1, 1))

56962


In [54]:
num_epochs = 100
minibatch_size = 32
learning_rate = 1e-3

In [128]:
from torch.utils.data import TensorDataset, DataLoader

X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=1)

train_data = TensorDataset(X_train, y_train)

test_data = TensorDataset(X_test)

train_loader = DataLoader(dataset=train_data, batch_size=minibatch_size, 
                          shuffle=True)

test_loader = DataLoader(dataset=test_data, batch_size=1)

In [124]:
print(type(X_test))

<class 'torch.Tensor'>


In [119]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), learning_rate)

In [98]:
history = {}
history['train_loss'] = []
history['test_loss'] = []

In [105]:
def binary_acc(y_pred, y_test):
    y_pred_tag = torch.round(torch.sigmoid(y_pred))

    correct_results_sum = (y_pred_tag == y_test).sum().float()
    acc = correct_results_sum/y_test.shape[0]
    acc = torch.round(acc * 100)
    
    return acc

In [121]:
model.train()
for e in range(1, num_epochs+1):
    epoch_loss = 0
    epoch_acc = 0
    for X_batch, y_batch in train_loader:
        #X_batch, y_batch = X_batch.to("cpu"), y_batch.to("cpu")
        optimizer.zero_grad()
        
        y_pred = model(X_batch)
        
        loss = criterion(y_pred, y_batch.unsqueeze(1).float())
        acc = binary_acc(y_pred, y_batch.unsqueeze(1))
        
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        

    print(f'Epoch {e+0:03}: | Loss: {epoch_loss/len(train_loader):.5f} | Acc: {epoch_acc/len(train_loader):.3f}')
    

torch.save(model.state_dict(), './credit_card_model.pth')

Epoch 001: | Loss: 0.01705 | Acc: 99.688
Epoch 002: | Loss: 0.00365 | Acc: 99.926
Epoch 003: | Loss: 0.00323 | Acc: 99.930
Epoch 004: | Loss: 0.00292 | Acc: 99.938
Epoch 005: | Loss: 0.00284 | Acc: 99.938
Epoch 006: | Loss: 0.00261 | Acc: 99.939
Epoch 007: | Loss: 0.00257 | Acc: 99.938
Epoch 008: | Loss: 0.00243 | Acc: 99.941
Epoch 009: | Loss: 0.00225 | Acc: 99.944
Epoch 010: | Loss: 0.00221 | Acc: 99.945
Epoch 011: | Loss: 0.00203 | Acc: 99.948
Epoch 012: | Loss: 0.00203 | Acc: 99.950
Epoch 013: | Loss: 0.00203 | Acc: 99.951
Epoch 014: | Loss: 0.00180 | Acc: 99.950
Epoch 015: | Loss: 0.00183 | Acc: 99.953
Epoch 016: | Loss: 0.00175 | Acc: 99.960
Epoch 017: | Loss: 0.00178 | Acc: 99.955
Epoch 018: | Loss: 0.00184 | Acc: 99.956
Epoch 019: | Loss: 0.00165 | Acc: 99.957
Epoch 020: | Loss: 0.00158 | Acc: 99.961
Epoch 021: | Loss: 0.00145 | Acc: 99.961
Epoch 022: | Loss: 0.00160 | Acc: 99.954
Epoch 023: | Loss: 0.00143 | Acc: 99.963
Epoch 024: | Loss: 0.00138 | Acc: 99.961
Epoch 025: | Los

AttributeError: 'DataLoader' object has no attribute 'labels'

In [132]:
y_pred_list = []
model.eval()
with torch.no_grad():
    for X_batch in test_loader:
        #print(X_batch)
        y_test_pred = model(X_batch[0])
        y_test_pred = torch.sigmoid(y_test_pred)
        y_pred_tag = torch.round(y_test_pred)
        y_pred_list.append(y_pred_tag.cpu().numpy())
y_pred_list = [a.squeeze().tolist() for a in y_pred_list]

In [136]:
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test, y_pred_list))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56875
           1       0.93      0.62      0.74        87

    accuracy                           1.00     56962
   macro avg       0.97      0.81      0.87     56962
weighted avg       1.00      1.00      1.00     56962



In [137]:
confusion_matrix(y_test, y_pred_list)

array([[56871,     4],
       [   33,    54]])

In [140]:
y_test[y_test == 1]

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])