In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, MultiLabelBinarizer
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import matplotlib.pyplot as plt

  from pandas.core import (


In [2]:
data = pd.read_csv('Fraud.csv')
data = data.sample(frac=0.001)

In [3]:
data.drop(['isFraud', 'isFlaggedFraud', 'nameOrig', 'nameDest'], axis=1, inplace=True)
data = pd.get_dummies(data, columns=['type'], drop_first=True)


In [4]:
data['transaction_difference'] = data['oldbalanceOrg'] - data['newbalanceOrig']
data['dest_balance_difference'] = data['newbalanceDest'] - data['oldbalanceDest']
data['isLargeTransaction'] = data['amount'] > 200000

In [5]:
def detect_anomalies(transaction):
    anomalies = []
    if transaction['transaction_difference'] < 0:
        anomalies.append('Negative transaction difference')
    if transaction['dest_balance_difference'] != 0 and transaction.get('type_TRANSFER', 0) == 1:
        anomalies.append('Unexpected destination balance difference')
    if transaction['isLargeTransaction']:
        anomalies.append('Large transaction')
    if transaction['amount'] > 10000:
        anomalies.append('Very high transaction amount')
    if transaction['amount'] < 10:
        anomalies.append('Very low transaction amount')
    if transaction['oldbalanceOrg'] < transaction['amount']:
        anomalies.append('Origin balance less than transaction amount')
    if transaction['newbalanceDest'] < transaction['oldbalanceDest']:
        anomalies.append('Destination balance decreased after transaction')
    return anomalies

In [6]:
data

Unnamed: 0,step,amount,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest,type_CASH_OUT,type_DEBIT,type_PAYMENT,type_TRANSFER,transaction_difference,dest_balance_difference,isLargeTransaction
721538,37,126656.69,0.00,0.00,680022.83,1588179.79,True,False,False,False,0.00,908156.96,False
527291,20,1852.00,0.00,0.00,0.00,0.00,False,False,True,False,0.00,0.00,False
846513,41,27635.12,872.00,0.00,0.00,27635.12,True,False,False,False,872.00,27635.12,False
920386,43,177524.06,872016.36,1049540.41,441235.92,263711.86,False,False,False,False,-177524.05,-177524.06,False
2936837,229,283640.38,0.00,0.00,3948060.54,4231700.93,True,False,False,False,0.00,283640.39,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
804526,40,2759.60,473062.00,470302.40,0.00,0.00,False,False,True,False,2759.60,0.00,False
573933,24,108430.26,3573248.54,3681678.80,657682.86,549252.60,False,False,False,False,-108430.26,-108430.26,False
4608247,329,299980.83,20063.00,320043.83,711087.53,411106.70,False,False,False,False,-299980.83,-299980.83,True
2845367,226,975.84,515701.82,514725.98,0.00,0.00,False,False,True,False,975.84,0.00,False


In [7]:
data['anomalies'] = data.apply(detect_anomalies, axis=1)

In [8]:
mlb = MultiLabelBinarizer()
y = mlb.fit_transform(data['anomalies'])
X = data.drop(['anomalies'], axis=1)

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [11]:
# Hyperparameter tuning for SVM
param_grid = {
    'estimator__C': [0.1, 1, 10, 100],
    'estimator__kernel': ['linear', 'rbf']
}
svm_base = SVC(probability=True)
svm_ovr = OneVsRestClassifier(svm_base)
svm_grid = GridSearchCV(svm_ovr, param_grid, cv=5, scoring='f1_micro')
svm_grid.fit(X_train_scaled, y_train)

In [12]:
best_svm_model = svm_grid.best_estimator_

In [13]:
# Train RandomForest
rf_model = OneVsRestClassifier(RandomForestClassifier(random_state=42))
rf_model.fit(X_train, y_train)

In [14]:
svm_predictions = best_svm_model.predict(X_test_scaled)
rf_predictions = rf_model.predict(X_test)

In [15]:
# Performance metrics
def print_metrics(y_test, predictions, model_name):
    print(f"\n{model_name} Performance Metrics:")
    print(f"Accuracy: {accuracy_score(y_test, predictions):.4f}")
    print(f"Precision: {precision_score(y_test, predictions, average='micro'):.4f}")
    print(f"Recall: {recall_score(y_test, predictions, average='micro'):.4f}")
    print(f"F1 Score: {f1_score(y_test, predictions, average='micro'):.4f}")
    print(classification_report(y_test, predictions))

In [16]:
# Print performance metrics for both models
print_metrics(y_test, svm_predictions, "SVM")
print_metrics(y_test, rf_predictions, "RandomForest")


SVM Performance Metrics:
Accuracy: 0.9607
Precision: 0.9882
Recall: 0.9944
F1 Score: 0.9913
              precision    recall  f1-score   support

           0       1.00      0.98      0.99       250
           1       1.00      1.00      1.00       368
           2       1.00      1.00      1.00       284
           3       0.97      1.00      0.98       826
           4       0.97      1.00      0.99       107
           5       1.00      0.99      0.99      1022

   micro avg       0.99      0.99      0.99      2857
   macro avg       0.99      0.99      0.99      2857
weighted avg       0.99      0.99      0.99      2857
 samples avg       0.89      0.89      0.89      2857


RandomForest Performance Metrics:
Accuracy: 0.9929
Precision: 0.9986
Recall: 0.9982
F1 Score: 0.9984
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       250
           1       1.00      1.00      1.00       368
           2       1.00      1.00      1.00  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [17]:
def process_and_display_transaction(new_transaction, model, scaler, X_train_columns, anomaly_labels):
    new_data = pd.DataFrame([new_transaction])
    new_data = pd.get_dummies(new_data, columns=['type'], drop_first=True)
    new_data = new_data.reindex(columns=X_train_columns, fill_value=0)

    # Scale new data using the same scaler used for training
    new_data_scaled = scaler.transform(new_data)

    # Predict anomalies
    predictions = model.predict(new_data_scaled)

    # Interpret predictions
    warnings = []
    detailed_status = []
    
    for i, label in enumerate(anomaly_labels):
        status = predictions[0][i]
        detailed_status.append(f"{label}: {status}")
        
        if status == 1:
            warnings.append(f"Warning: {label} detected!")

    print("Transaction Details:")
    print(new_transaction)
    print("\nAnomaly Status:")
    for status in detailed_status:
        print(status)
    
    if warnings:
        print("\nWarnings:")
        for warning in warnings:
            print(warning)
    else:
        print("\nNo anomalies detected. Transaction appears normal.")


In [18]:
def process_and_display_transaction_rf(new_transaction, model, X_train_columns, anomaly_labels):
    new_data = pd.DataFrame([new_transaction])
    new_data = pd.get_dummies(new_data, columns=['type'], drop_first=True)

    new_data['transaction_difference'] = new_data['oldbalanceOrg'] - new_data['newbalanceOrig']
    new_data['dest_balance_difference'] = new_data['newbalanceDest'] - new_data['oldbalanceDest']
    new_data['isLargeTransaction'] = new_data['amount'] > 200000

    for column in X_train_columns:
        if column not in new_data.columns:
            new_data[column] = 0
    new_data = new_data.reindex(columns=X_train_columns, fill_value=0)

    predictions = model.predict(new_data)

    warnings = []
    detailed_status = []
    
    for i, label in enumerate(anomaly_labels):
        status = predictions[0][i]
        detailed_status.append(f"{label}: {status}")
        
        if status == 1:
            warnings.append(f"Warning: {label} detected!")

    print("Transaction Details:")
    print(new_transaction)
    print("\nAnomaly Status:")
    for status in detailed_status:
        print(status)
    
    if warnings:
        print("\nWarnings:")
        for warning in warnings:
            print(warning)
    else:
        print("\nNo anomalies detected. Transaction appears normal.")


In [19]:
example_transaction_non_fraudulent = {
    'step': 1,
    'type': 'PAYMENT',
    'amount': 500.00,
    'nameOrig': 'C100000001',
    'oldbalanceOrg': 10000.00,
    'newbalanceOrig': 9500.00,
    'nameDest': 'C100000002',
    'oldbalanceDest': 5000.00,
    'newbalanceDest': 5500.00
}

In [20]:
example_transaction_fraudulent = {
    'step': 1,
    'type': 'TRANSFER',
    'amount': 215310.3,
    'nameOrig': 'C200000001',
    'oldbalanceOrg': 500.00,
    'newbalanceOrig': 0.00,
    'nameDest': 'C200000002',
    'oldbalanceDest': 0.00,
    'newbalanceDest': 0.00
}

In [21]:
print("\nSVM Model Predictions:")
process_and_display_transaction(example_transaction_non_fraudulent, best_svm_model, scaler, X_train.columns, mlb.classes_)
process_and_display_transaction(example_transaction_fraudulent, best_svm_model, scaler, X_train.columns, mlb.classes_)


SVM Model Predictions:
Transaction Details:
{'step': 1, 'type': 'PAYMENT', 'amount': 500.0, 'nameOrig': 'C100000001', 'oldbalanceOrg': 10000.0, 'newbalanceOrig': 9500.0, 'nameDest': 'C100000002', 'oldbalanceDest': 5000.0, 'newbalanceDest': 5500.0}

Anomaly Status:
Destination balance decreased after transaction: 0
Large transaction: 0
Negative transaction difference: 1
Origin balance less than transaction amount: 0
Unexpected destination balance difference: 0
Very high transaction amount: 0

Transaction Details:
{'step': 1, 'type': 'TRANSFER', 'amount': 215310.3, 'nameOrig': 'C200000001', 'oldbalanceOrg': 500.0, 'newbalanceOrig': 0.0, 'nameDest': 'C200000002', 'oldbalanceDest': 0.0, 'newbalanceDest': 0.0}

Anomaly Status:
Destination balance decreased after transaction: 0
Large transaction: 0
Negative transaction difference: 1
Origin balance less than transaction amount: 1
Unexpected destination balance difference: 0
Very high transaction amount: 1



In [22]:
print("\nRandomForest Model Predictions:")
process_and_display_transaction_rf(example_transaction_non_fraudulent, rf_model, X_train.columns, mlb.classes_)
process_and_display_transaction_rf(example_transaction_fraudulent, rf_model, X_train.columns, mlb.classes_)


RandomForest Model Predictions:
Transaction Details:
{'step': 1, 'type': 'PAYMENT', 'amount': 500.0, 'nameOrig': 'C100000001', 'oldbalanceOrg': 10000.0, 'newbalanceOrig': 9500.0, 'nameDest': 'C100000002', 'oldbalanceDest': 5000.0, 'newbalanceDest': 5500.0}

Anomaly Status:
Destination balance decreased after transaction: 0
Large transaction: 0
Negative transaction difference: 0
Origin balance less than transaction amount: 0
Unexpected destination balance difference: 0
Very high transaction amount: 0

No anomalies detected. Transaction appears normal.
Transaction Details:
{'step': 1, 'type': 'TRANSFER', 'amount': 215310.3, 'nameOrig': 'C200000001', 'oldbalanceOrg': 500.0, 'newbalanceOrig': 0.0, 'nameDest': 'C200000002', 'oldbalanceDest': 0.0, 'newbalanceDest': 0.0}

Anomaly Status:
Destination balance decreased after transaction: 0
Large transaction: 1
Negative transaction difference: 0
Origin balance less than transaction amount: 1
Unexpected destination balance difference: 0
Very hig

In [27]:
example_transaction_non_fraudulent_2 = {
    'step': 1,
    'type': 'PAYMENT',
    'amount': 500.00,
    'nameOrig': 'C100000003',
    'oldbalanceOrg': 1000.00,
    'newbalanceOrig': 500.00,
    'nameDest': 'C100000004',
    'oldbalanceDest': 1000.00,
    'newbalanceDest': 1500.00
}
print("SVM: ")
print()
process_and_display_transaction(example_transaction_non_fraudulent_2, best_svm_model, scaler, X_train.columns, mlb.classes_)
print("RANDOM FOREST: ")
print()
process_and_display_transaction_rf(example_transaction_non_fraudulent_2, rf_model, X_train.columns, mlb.classes_)

SVM: 

Transaction Details:
{'step': 1, 'type': 'PAYMENT', 'amount': 500.0, 'nameOrig': 'C100000003', 'oldbalanceOrg': 1000.0, 'newbalanceOrig': 500.0, 'nameDest': 'C100000004', 'oldbalanceDest': 1000.0, 'newbalanceDest': 1500.0}

Anomaly Status:
Destination balance decreased after transaction: 0
Large transaction: 0
Negative transaction difference: 1
Origin balance less than transaction amount: 1
Unexpected destination balance difference: 0
Very high transaction amount: 0

RANDOM FOREST: 

Transaction Details:
{'step': 1, 'type': 'PAYMENT', 'amount': 500.0, 'nameOrig': 'C100000003', 'oldbalanceOrg': 1000.0, 'newbalanceOrig': 500.0, 'nameDest': 'C100000004', 'oldbalanceDest': 1000.0, 'newbalanceDest': 1500.0}

Anomaly Status:
Destination balance decreased after transaction: 0
Large transaction: 0
Negative transaction difference: 0
Origin balance less than transaction amount: 0
Unexpected destination balance difference: 0
Very high transaction amount: 0

No anomalies detected. Transacti

In [28]:
example_transaction_fraudulent_2 = {
    'step': 1,
    'type': 'TRANSFER',
    'amount': 215310.3,
    'nameOrig': 'C200000001',
    'oldbalanceOrg': 500.00,
    'newbalanceOrig': 0.00,
    'nameDest': 'C200000002',
    'oldbalanceDest': 0.00,
    'newbalanceDest': 0.00
}
print("SVM: ")
print()
process_and_display_transaction(example_transaction_fraudulent_2, best_svm_model, scaler, X_train.columns, mlb.classes_)
print("RANDOM FOREST: ")
print()
process_and_display_transaction_rf(example_transaction_fraudulent_2, rf_model, X_train.columns, mlb.classes_)

SVM: 

Transaction Details:
{'step': 1, 'type': 'TRANSFER', 'amount': 215310.3, 'nameOrig': 'C200000001', 'oldbalanceOrg': 500.0, 'newbalanceOrig': 0.0, 'nameDest': 'C200000002', 'oldbalanceDest': 0.0, 'newbalanceDest': 0.0}

Anomaly Status:
Destination balance decreased after transaction: 0
Large transaction: 0
Negative transaction difference: 1
Origin balance less than transaction amount: 1
Unexpected destination balance difference: 0
Very high transaction amount: 1

RANDOM FOREST: 

Transaction Details:
{'step': 1, 'type': 'TRANSFER', 'amount': 215310.3, 'nameOrig': 'C200000001', 'oldbalanceOrg': 500.0, 'newbalanceOrig': 0.0, 'nameDest': 'C200000002', 'oldbalanceDest': 0.0, 'newbalanceDest': 0.0}

Anomaly Status:
Destination balance decreased after transaction: 0
Large transaction: 1
Negative transaction difference: 0
Origin balance less than transaction amount: 1
Unexpected destination balance difference: 0
Very high transaction amount: 1



In [29]:
example_transaction_fraudulent_3 = {
    'step': 1,
    'type': 'TRANSFER',
    'amount': 250000.00, 
    'nameOrig': 'C300000001',
    'oldbalanceOrg': 1000.00,
    'newbalanceOrig': 0.00,
    'nameDest': 'C300000002',
    'oldbalanceDest': 0.00,
    'newbalanceDest': 0.00 
}
print("SVM: ")
print()
process_and_display_transaction(example_transaction_fraudulent_3, best_svm_model, scaler, X_train.columns, mlb.classes_)
print("RANDOM FOREST: ")
print()
process_and_display_transaction_rf(example_transaction_fraudulent_3, rf_model, X_train.columns, mlb.classes_)

SVM: 

Transaction Details:
{'step': 1, 'type': 'TRANSFER', 'amount': 250000.0, 'nameOrig': 'C300000001', 'oldbalanceOrg': 1000.0, 'newbalanceOrig': 0.0, 'nameDest': 'C300000002', 'oldbalanceDest': 0.0, 'newbalanceDest': 0.0}

Anomaly Status:
Destination balance decreased after transaction: 0
Large transaction: 0
Negative transaction difference: 1
Origin balance less than transaction amount: 1
Unexpected destination balance difference: 0
Very high transaction amount: 1

RANDOM FOREST: 

Transaction Details:
{'step': 1, 'type': 'TRANSFER', 'amount': 250000.0, 'nameOrig': 'C300000001', 'oldbalanceOrg': 1000.0, 'newbalanceOrig': 0.0, 'nameDest': 'C300000002', 'oldbalanceDest': 0.0, 'newbalanceDest': 0.0}

Anomaly Status:
Destination balance decreased after transaction: 0
Large transaction: 1
Negative transaction difference: 0
Origin balance less than transaction amount: 1
Unexpected destination balance difference: 0
Very high transaction amount: 1



In [30]:
example_transaction_fraudulent_4 = {
    'step': 1,
    'type': 'TRANSFER',
    'amount': 5.00,
    'nameOrig': 'C400000001',
    'oldbalanceOrg': 1.00,
    'newbalanceOrig': -4.00,  
    'nameDest': 'C400000002',
    'oldbalanceDest': 2000.00,
    'newbalanceDest': 1500.00 
}
print("RANDOM FOREST: ")
print()
process_and_display_transaction_rf(example_transaction_fraudulent_4, rf_model, X_train.columns, mlb.classes_)

RANDOM FOREST: 

Transaction Details:
{'step': 1, 'type': 'TRANSFER', 'amount': 5.0, 'nameOrig': 'C400000001', 'oldbalanceOrg': 1.0, 'newbalanceOrig': -4.0, 'nameDest': 'C400000002', 'oldbalanceDest': 2000.0, 'newbalanceDest': 1500.0}

Anomaly Status:
Destination balance decreased after transaction: 0
Large transaction: 0
Negative transaction difference: 0
Origin balance less than transaction amount: 1
Unexpected destination balance difference: 0
Very high transaction amount: 0

