## Importing the libraries

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, Dense, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint

## Load the dataset

In [4]:
df = pd.read_csv("creditcard.csv")
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 284807 entries, 0 to 284806
Data columns (total 31 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Time    284807 non-null  float64
 1   V1      284807 non-null  float64
 2   V2      284807 non-null  float64
 3   V3      284807 non-null  float64
 4   V4      284807 non-null  float64
 5   V5      284807 non-null  float64
 6   V6      284807 non-null  float64
 7   V7      284807 non-null  float64
 8   V8      284807 non-null  float64
 9   V9      284807 non-null  float64
 10  V10     284807 non-null  float64
 11  V11     284807 non-null  float64
 12  V12     284807 non-null  float64
 13  V13     284807 non-null  float64
 14  V14     284807 non-null  float64
 15  V15     284807 non-null  float64
 16  V16     284807 non-null  float64
 17  V17     284807 non-null  float64
 18  V18     284807 non-null  float64
 19  V19     284807 non-null  float64
 20  V20     284807 non-null  float64
 21  V21     28

## Preprocessing data

In [7]:
def scale_amount(df, scaler):
    df['Amount'] = scaler.transform(df['Amount'].to_numpy().reshape(-1, 1))
    return df

def normalize_time(df, time_min, time_max):
    time = df['Time']
    df['Time'] = (time - time_min) / (time_max - time_min)
    return df

## Scaling Amount and Normalizing Time data

In [9]:
scaler = RobustScaler()
scaler.fit(df['Amount'].to_numpy().reshape(-1, 1))

time_min, time_max = df['Time'].min(), df['Time'].max()

In [10]:
df = scale_amount(df, scaler)
df = normalize_time(df, time_min, time_max)

## Checking for class imbalance¶

In [12]:
not_frauds = df.query('Class == 0')
frauds = df.query('Class == 1')
not_frauds['Class'].value_counts(), frauds['Class'].value_counts()

(Class
 0    284315
 Name: count, dtype: int64,
 Class
 1    492
 Name: count, dtype: int64)

## Undersampling the data for balancing

In [14]:
balanced_df = pd.concat([frauds, not_frauds.sample(len(frauds), random_state=1)])
balanced_df['Class'].value_counts()

Class
1    492
0    492
Name: count, dtype: int64

## Randomising/shuffling the data

In [16]:
balanced_df = balanced_df.sample(frac=1, random_state=1)
balanced_df

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
189959,0.744404,-0.865285,-0.979506,2.587540,-2.781144,-0.887336,-0.579689,-0.976755,0.132058,-1.658263,...,-0.106978,-0.010528,-0.211955,0.021026,0.358237,-0.209483,0.062051,0.074730,-0.195626,0
107637,0.408213,-2.271755,-0.457655,-2.589055,2.230778,-4.278983,0.388610,0.102485,0.813128,-1.092921,...,1.096342,0.658399,1.711676,0.333540,0.538591,-0.193529,0.258194,0.247269,11.218193,1
275992,0.965502,-2.027135,-1.131890,-1.135194,1.086963,-0.010547,0.423797,3.790880,-1.155595,-0.063434,...,-0.315105,0.575520,0.490842,0.756502,-0.142685,-0.602777,0.508712,-0.091646,8.555858,1
120862,0.439760,0.531678,-1.108844,0.276972,0.386453,-1.038906,-0.810526,0.395582,-0.322635,0.068460,...,0.000589,-0.824566,-0.174821,0.479535,-0.094335,0.698329,-0.130716,0.083227,5.094669,0
207960,0.792328,1.878626,0.162765,-0.167433,3.465196,0.197332,1.157212,-0.676783,0.473890,-0.386278,...,-0.217428,-0.785738,0.406279,-0.056071,-0.560484,-0.388620,-0.012717,-0.038421,-0.223713,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
236229,0.860700,-1.319844,0.290232,-0.223288,-0.351133,2.003048,0.004449,2.111141,-0.155835,-1.277863,...,0.259482,0.301030,-0.388021,-1.449786,1.720770,-0.282374,-0.106111,0.026727,2.379375,0
15810,0.157716,-25.942434,14.601998,-27.368650,6.378395,-19.104033,-4.684806,-18.261393,17.052566,-3.742605,...,1.784316,-1.917759,-1.235787,0.161105,1.820378,-0.219359,1.388786,0.406810,1.089779,1
1569,0.007107,-0.693097,0.720897,0.487926,1.545283,-0.123343,0.151906,1.821822,-0.176592,-1.514396,...,0.200782,0.193611,0.288196,-0.081502,0.281742,-0.136080,0.050083,0.147487,3.604136,0
107067,0.406674,-1.512516,1.133139,-1.601052,2.813401,-2.664503,-0.310371,-1.520895,0.852996,-1.496495,...,0.729828,0.485286,0.567005,0.323586,0.040871,0.825814,0.414482,0.267265,4.137637,1


## Spliting data into train-test validation sets

In [18]:
balanced_df_np = balanced_df.to_numpy()

x_train, y_train = balanced_df_np[:700, :-1], balanced_df_np[:700, -1].astype(int)
x_test, y_test = balanced_df_np[700:842, :-1], balanced_df_np[700:842, -1].astype(int)
x_val, y_val = balanced_df_np[842:, :-1], balanced_df_np[842:, -1].astype(int)
x_train.shape, y_train.shape, x_test.shape, y_test.shape, x_val.shape, y_val.shape

((700, 30), (700,), (142, 30), (142,), (142, 30), (142,))

## Checking the split distribution

In [20]:
pd.Series(y_train).value_counts(), pd.Series(y_test).value_counts(), pd.Series(y_val).value_counts()

(1    353
 0    347
 Name: count, dtype: int64,
 0    73
 1    69
 Name: count, dtype: int64,
 0    72
 1    70
 Name: count, dtype: int64)

## Creating and compiling a Shallow Neural Network

In [22]:
shallow_nn = Sequential([
    InputLayer((x_train.shape[1],)),
    Dense(2, activation='relu'),
    BatchNormalization(),
    Dense(1, activation='sigmoid')
])

checkpoint = ModelCheckpoint('shallow_nn.keras', save_best_only=True)
shallow_nn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [23]:
shallow_nn.summary()

## Train the model on training data

In [25]:
shallow_nn.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=50, callbacks=checkpoint)

Epoch 1/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.3612 - loss: 0.9520 - val_accuracy: 0.3099 - val_loss: 0.8474
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4120 - loss: 0.8366 - val_accuracy: 0.3451 - val_loss: 0.7939
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4237 - loss: 0.7988 - val_accuracy: 0.3592 - val_loss: 0.7557
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4309 - loss: 0.7682 - val_accuracy: 0.4085 - val_loss: 0.7221
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5193 - loss: 0.7674 - val_accuracy: 0.7113 - val_loss: 0.6859
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4943 - loss: 0.7299 - val_accuracy: 0.7113 - val_loss: 0.6511
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x23d494847d0>

## Creating a prediction function

In [27]:
def neural_net_predictions(model, x):
  return (model.predict(x).flatten() > 0.5).astype(int)

## Classification report of the Shallow_NN model

In [29]:
print(classification_report(y_val, neural_net_predictions(shallow_nn, x_val), target_names=['Not Fraud', 'Fraud']))

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
              precision    recall  f1-score   support

   Not Fraud       0.93      0.99      0.96        72
       Fraud       0.98      0.93      0.96        70

    accuracy                           0.96       142
   macro avg       0.96      0.96      0.96       142
weighted avg       0.96      0.96      0.96       142



## Evaluate the model on the testing data

In [31]:
print(classification_report(y_test, neural_net_predictions(shallow_nn, x_test), target_names=['Not Fraud', 'Fraud']))

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
              precision    recall  f1-score   support

   Not Fraud       0.92      0.95      0.93        73
       Fraud       0.94      0.91      0.93        69

    accuracy                           0.93       142
   macro avg       0.93      0.93      0.93       142
weighted avg       0.93      0.93      0.93       142



## Predicting for a random instance

In [33]:
instance = {'Time':37167, 
            'V1':-7.923890701,
            'V2':-5.198360199, 
            'V3':-3.000023922, 
            'V4':4.420666202, 
            'V5':2.272193965, 
            'V6':-3.394483429, 
            'V7':-5.283435335, 
            'V8':0.131618922, 
            'V9':0.658176429, 
            'V10':-0.794993882, 
            'V11':3.266066016, 
            'V12':-2.719184951, 
            'V13':-0.124103963, 
            'V14':-5.274865819, 
            'V15':0.638575003, 
            'V16':-2.995830378, 
            'V17':-4.698433449, 
            'V18':-1.711871225, 
            'V19':3.025260992, 
            'V20':-2.169810892, 
            'V21':-0.734307917, 
            'V22':-0.59992626, 
            'V23':-4.908301176, 
            'V24':0.410170235, 
            'V25':-1.16766025, 
            'V26':0.520507647, 
            'V27':1.937421403, 
            'V28':-1.552592839, 
            'Amount':12.31}

In [34]:
def preprocess_instance(instance, scaler, time_min, time_max):
    processing_instance = pd.DataFrame([instance], columns=df.columns)
    processing_instance = scale_amount(processing_instance, scaler)
    processing_instance = normalize_time(processing_instance, time_min, time_max)
    return processing_instance

In [35]:
processed_instance = preprocess_instance(instance, scaler, time_min, time_max)
required_columns = df.drop(columns=['Class']).columns.tolist()
processed_instance = processed_instance[required_columns]

In [36]:
abs_prediction = shallow_nn.predict(processed_instance)
prediction = (abs_prediction.flatten() > 0.5).astype(int)
print(f'Predicted probability: {abs_prediction[0][0]}')
print(f'Predicted class: {prediction[0]}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Predicted probability: 0.9206668138504028
Predicted class: 1
