In [1]:
# Importing the libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Importing sklearn functions
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import train_test_split, cross_val_score

from sklearn import metrics
from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_score, f1_score, accuracy_score, classification_report

# Importing data sampling methods
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import OneSidedSelection

# Importing torch
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
device

device(type='cuda')

In [4]:
# Intel CPU fastener
from sklearnex import patch_sklearn
patch_sklearn()

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


# Dealing with the data

In [5]:
# Scales the column
def scale_the_column(dataframe, column_name, column_index):
    rob_scaler = RobustScaler()
    df[f'scaled_{column_name}'] = rob_scaler.fit_transform(df[column_name].values.reshape(-1,1))
    df.drop([column_name], axis=1, inplace=True)
    scaled_column = df[f'scaled_{column_name}']
    df.drop([f'scaled_{column_name}'], axis=1, inplace=True)
    df.insert(column_index, f'scaled_{column_name}', scaled_column)
    return dataframe

# Undersampling with One Sided Selection
def undersampling(X_train, y_train):
    undersampling = OneSidedSelection(n_neighbors=3, n_seeds_S=200)
    X_train, y_train= undersampling.fit_resample(X_train, y_train)
    return X_train,y_train

# Oversampling with SMOTE
def oversampling_SMOTE(X_train, y_train):
    oversample = SMOTE(sampling_strategy='auto')
    X_train, y_train = oversample.fit_resample(X_train, y_train)
    return X_train,y_train

In [None]:
  print("UNDERSAMPLED DATASET")
  print_score_board(X_train_under, y_train_under, X_test, y_test, model)
  print("SMOTE DATASET")
  print_score_board(X_train_smote, y_train_smote, X_test, y_test, model)
  print("UNDERSAMPLED SMOTE DATASET")
  print_score_board(X_train_both, y_train_both, X_test, y_test, model)

# Data Reading

In [6]:
df = pd.read_csv('creditcard.csv')
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [7]:
df = scale_the_column(df, "Amount",0)
df = scale_the_column(df, "Time", 1)

In [8]:
# Seperate the target and features
X = df.drop('Class', axis=1)
y = df['Class']

In [9]:
# Train - Test Split
X_train_org, X_test, y_train_org, y_test = train_test_split(X, y, test_size=0.30, random_state = 42)
print('X_train_shape: ', X_train_org.shape, '\nX_test_Shape: ', X_test.shape,'\ny_train_shape: ', y_train_org.shape, '\ny_test_shape: ' , y_test.shape)

X_train_shape:  (199364, 30) 
X_test_Shape:  (85443, 30) 
y_train_shape:  (199364,) 
y_test_shape:  (85443,)


# Model Run

In [10]:
auc_scores=[]
cross=[]
precisions=[]
f_scores=[]

In [11]:
# undersampling with one-sided-selection
X_train_under, y_train_under = undersampling(X_train_org, y_train_org)
print(X_train_under.shape, y_train_under.shape)

(13137, 30) (13137,)


In [12]:
# oversampling with SMOTE
X_train_smote, y_train_smote = oversampling_SMOTE(X_train_org, y_train_org)
print(X_train_smote.shape, y_train_smote.shape)

(398016, 30) (398016,)


In [13]:
# undersampling with one-sided-selection and oversampling with SMOTE
X_train_both, y_train_both = oversampling_SMOTE(X_train_under, y_train_under)
print(X_train_both.shape, y_train_both.shape)

(25562, 30) (25562,)


# Model Trainings

In [14]:
from sklearn.preprocessing import StandardScaler

In [15]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train_under)
X_test = scaler.transform(X_test)

In [16]:
# Convert data to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train).to(device)
y_train_tensor = torch.LongTensor(y_train_under.values).to(device)
X_test_tensor = torch.FloatTensor(X_test).to(device)
y_test_tensor = torch.LongTensor(y_test.values).to(device)

In [17]:
# Define the neural network architecture
class ANN(nn.Module):
    def __init__(self, input_dim):
        super(ANN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 2)  # Output layer with 2 classes (fraud, not fraud)

    def forward(self, x):
        x = torch.tanh(self.fc1(x))
        x = torch.tanh(self.fc2(x))
        x = self.fc3(x)
        return x

In [18]:
# Initialize the ANN model
input_dim = X_train.shape[1]
model = ANN(input_dim)
model.to(device)

ANN(
  (fc1): Linear(in_features=30, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=2, bias=True)
)

In [19]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [20]:
epochs = 40
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

    if (epoch+1) % 5 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')


Epoch [5/40], Loss: 0.6032
Epoch [10/40], Loss: 0.5227
Epoch [15/40], Loss: 0.4459
Epoch [20/40], Loss: 0.3702
Epoch [25/40], Loss: 0.2967
Epoch [30/40], Loss: 0.2303
Epoch [35/40], Loss: 0.1756
Epoch [40/40], Loss: 0.1341


In [21]:
# Evaluation
model.eval()
with torch.no_grad():
    outputs = model(X_test_tensor)
    _, predicted = torch.max(outputs, 1)
    accuracy = accuracy_score(y_test, predicted.cpu().numpy())
    print(f'Accuracy on test set: {accuracy:.2f}')

    # Compute precision, recall, and AUC score
    print("Classification Report:")
    print(classification_report(y_test, predicted.cpu().numpy()))

    # Compute AUC score
    y_prob = torch.softmax(outputs, dim=1)[:, 1].cpu().numpy()
    auc_score = roc_auc_score(y_test, y_prob)
    print(f'AUC Score: {auc_score:.2f}')

Accuracy on test set: 1.00
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     85307
           1       0.77      0.83      0.80       136

    accuracy                           1.00     85443
   macro avg       0.88      0.92      0.90     85443
weighted avg       1.00      1.00      1.00     85443

AUC Score: 0.95


# Comparison