<a href="https://colab.research.google.com/github/Umang-ai/FraudDetection/blob/main/fraudDetectionCNNwithFold.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
!pip install tensorflow
!pip install imblearn
!pip install kaggle
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score
from imblearn.over_sampling import SMOTE
import os



In [None]:
from google.colab import files
files.upload()  # Upload kaggle.json manually

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"shivamsaurav17","key":"1ce29e11460f38b93dea40af20d2566c"}'}

In [None]:
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
# Download dataset
!kaggle datasets download -d ealaxi/paysim1 --force
!unzip paysim1.zip -d /content/


# Load dataset
df = pd.read_csv("/content/PS_20174392719_1491204439457_log.csv")

Dataset URL: https://www.kaggle.com/datasets/ealaxi/paysim1
License(s): CC-BY-SA-4.0
Downloading paysim1.zip to /content
 94% 167M/178M [00:00<00:00, 897MB/s] 
100% 178M/178M [00:00<00:00, 462MB/s]
Archive:  paysim1.zip
replace /content/PS_20174392719_1491204439457_log.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: /content/PS_20174392719_1491204439457_log.csv  


In [None]:
# Drop irrelevant columns if necessary
if 'Unnamed: 0' in df.columns:
    df.drop(columns=['Unnamed: 0'], inplace=True)

# Drop non-numeric columns
if 'nameOrig' in df.columns and 'nameDest' in df.columns:
    df.drop(columns=['nameOrig', 'nameDest'], inplace=True)


In [None]:

# Encode categorical feature
label_enc = LabelEncoder()
df['type'] = label_enc.fit_transform(df['type'])

# Define features and target variable
X = df.drop(columns=['isFraud'])
y = df['isFraud']

In [None]:

# Normalize the data (before splitting)
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Reshape for CNN (1D convolution)
X = X.reshape(X.shape[0], X.shape[1], 1)



In [None]:

# K-Fold Cross Validation (3 folds)
kf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
fold = 1
accuracy_scores = []
roc_auc_scores = []

for train_idx, test_idx in kf.split(X, y):
    print(f"\nTraining on Fold {fold}...")

    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

    # Apply SMOTE only to the training set
    smote = SMOTE(sampling_strategy=0.2, random_state=42)
    X_train_resampled, y_train_resampled = smote.fit_resample(X_train.reshape(X_train.shape[0], -1), y_train)

    # Reshape back for CNN
    X_train_resampled = X_train_resampled.reshape(X_train_resampled.shape[0], X_train.shape[1], 1)

    # Build CNN model
    model = keras.Sequential([
        keras.layers.Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(X_train.shape[1], 1)),
        keras.layers.MaxPooling1D(pool_size=2),
        keras.layers.Conv1D(filters=32, kernel_size=2, activation='relu'),
        keras.layers.Flatten(),
        keras.layers.Dense(128, activation='relu'),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(1, activation='sigmoid')
    ])

    # Compile model
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Train model (5 epochs per fold)
    model.fit(X_train_resampled, y_train_resampled, epochs=5, batch_size=32, validation_data=(X_test, y_test), verbose=1)

    # Evaluate model
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f'Test Accuracy: {accuracy * 100:.2f}%')
    accuracy_scores.append(accuracy)

    # Predictions
    y_pred = model.predict(X_test)
    y_pred = (y_pred > 0.5).astype(int)

    # Confusion Matrix
    conf_matrix = confusion_matrix(y_test, y_pred)
    print("Confusion Matrix:")
    print(conf_matrix)

    # Classification Report
    class_report = classification_report(y_test, y_pred)
    print("\nClassification Report:")
    print(class_report)

    # ROC-AUC Score
    if len(np.unique(y_test)) == 2:  # Ensure binary classification
        roc_auc = roc_auc_score(y_test, y_pred)
        roc_auc_scores.append(roc_auc)
        print(f"ROC-AUC Score: {roc_auc:.4f}")

    fold += 1



Training on Fold 1...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/5
[1m158861/158861[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m423s[0m 3ms/step - accuracy: 0.9798 - loss: 0.0520 - val_accuracy: 0.9980 - val_loss: 0.0063
Epoch 2/5
[1m158861/158861[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m414s[0m 3ms/step - accuracy: 0.9895 - loss: 0.0275 - val_accuracy: 0.9968 - val_loss: 0.0097
Epoch 3/5
[1m158861/158861[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m419s[0m 3ms/step - accuracy: 0.9906 - loss: 0.0248 - val_accuracy: 0.9949 - val_loss: 0.0135
Epoch 4/5
[1m158861/158861[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m422s[0m 3ms/step - accuracy: 0.9913 - loss: 0.0234 - val_accuracy: 0.9923 - val_loss: 0.0191
Epoch 5/5
[1m158861/158861[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m418s[0m 3ms/step - accuracy: 0.9917 - loss: 0.0222 - val_accuracy: 0.9919 - val_loss: 0.0207
[1m66278/66278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 1ms/step - accuracy: 0.9930 - loss: 0.0187
Test Accuracy: 99.19%
[1m66278/66278[0m

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/5
[1m158861/158861[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m432s[0m 3ms/step - accuracy: 0.9793 - loss: 0.0537 - val_accuracy: 0.9929 - val_loss: 0.0185
Epoch 2/5
[1m158861/158861[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m427s[0m 3ms/step - accuracy: 0.9897 - loss: 0.0277 - val_accuracy: 0.9946 - val_loss: 0.0158
Epoch 3/5
[1m158861/158861[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m424s[0m 3ms/step - accuracy: 0.9910 - loss: 0.0247 - val_accuracy: 0.9949 - val_loss: 0.0127
Epoch 4/5
[1m158861/158861[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m425s[0m 3ms/step - accuracy: 0.9917 - loss: 0.0236 - val_accuracy: 0.9964 - val_loss: 0.0089
Epoch 5/5
[1m158861/158861[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m424s[0m 3ms/step - accuracy: 0.9920 - loss: 0.0227 - val_accuracy: 0.9942 - val_loss: 0.0144
[1m66278/66278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 1ms/step - accuracy: 0.9941 - loss: 0.0155
Test Accuracy: 99.42%
[1m66278/66278[0m

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/5
[1m158861/158861[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m421s[0m 3ms/step - accuracy: 0.9793 - loss: 0.0531 - val_accuracy: 0.9912 - val_loss: 0.0187
Epoch 2/5
[1m158861/158861[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m420s[0m 3ms/step - accuracy: 0.9896 - loss: 0.0273 - val_accuracy: 0.9892 - val_loss: 0.0250
Epoch 3/5
[1m158861/158861[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m420s[0m 3ms/step - accuracy: 0.9907 - loss: 0.0249 - val_accuracy: 0.9888 - val_loss: 0.0244
Epoch 4/5
[1m158861/158861[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m414s[0m 3ms/step - accuracy: 0.9914 - loss: 0.0238 - val_accuracy: 0.9956 - val_loss: 0.0103
Epoch 5/5
[1m158861/158861[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m419s[0m 3ms/step - accuracy: 0.9919 - loss: 0.0225 - val_accuracy: 0.9935 - val_loss: 0.0161
[1m66278/66278[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 1ms/step - accuracy: 0.9936 - loss: 0.0178
Test Accuracy: 99.35%
[1m66278/66278[0m

In [None]:
# Final Performance
print("\nFinal Model Evaluation:")
print(f"Average Test Accuracy: {np.mean(accuracy_scores) * 100:.2f}%")
print(f"Average ROC-AUC Score: {np.mean(roc_auc_scores):.4f}")


Final Model Evaluation:
Average Test Accuracy: 99.32%
Average ROC-AUC Score: 0.9842
