In [1]:
import pandas as pd
import numpy as np 
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, MultiLabelBinarizer
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import matplotlib.pyplot as plt

  from pandas.core import (


In [2]:
data = pd.read_csv('Fraud.csv')
data = data.sample(frac=0.001)

In [3]:
data.drop(['isFraud', 'isFlaggedFraud', 'nameOrig', 'nameDest'], axis=1, inplace=True)
data = pd.get_dummies(data, columns=['type'], drop_first=True)

In [4]:
data['transaction_difference'] = data['oldbalanceOrg'] - data['newbalanceOrig']
data['dest_balance_difference'] = data['newbalanceDest'] - data['oldbalanceDest']
data['isLargeTransaction'] = data['amount'] > 200000

In [5]:
def detect_anomalies(transaction):
    anomalies = []
    if transaction['transaction_difference'] < 0:
        anomalies.append('Negative transaction difference')
    if transaction['dest_balance_difference'] != 0 and transaction.get('type_TRANSFER', 0) == 1:
        anomalies.append('Unexpected destination balance difference')
    if transaction['isLargeTransaction']:
        anomalies.append('Large transaction')
    if transaction['amount'] > 10000:
        anomalies.append('Very high transaction amount')
    if transaction['amount'] < 10:
        anomalies.append('Very low transaction amount')
    if transaction['oldbalanceOrg'] < transaction['amount']:
        anomalies.append('Origin balance less than transaction amount')
    if transaction['newbalanceDest'] < transaction['oldbalanceDest']:
        anomalies.append('Destination balance decreased after transaction')
    return anomalies

In [6]:
data['anomalies'] = data.apply(detect_anomalies, axis=1)

In [7]:
mlb = MultiLabelBinarizer()
y = mlb.fit_transform(data['anomalies'])
X = data.drop(['anomalies'], axis=1)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [10]:
# Hyperparameter tuning for SVM
param_grid = {
    'estimator__C': [0.1, 1, 10, 100],
    'estimator__kernel': ['linear', 'rbf']
}
svm_base = SVC(probability=True)
svm_ovr = OneVsRestClassifier(svm_base)
svm_grid = GridSearchCV(svm_ovr, param_grid, cv=5, scoring='f1_micro')
svm_grid.fit(X_train_scaled, y_train)



In [11]:
best_svm_model = svm_grid.best_estimator_

In [12]:
svm_predictions = best_svm_model.predict(X_test_scaled)

In [13]:
def print_metrics(y_test, predictions, model_name):
    print(f"\n{model_name} Performance Metrics:")
    print(f"Accuracy: {accuracy_score(y_test, predictions):.4f}")
    print(f"Precision: {precision_score(y_test, predictions, average='micro'):.4f}")
    print(f"Recall: {recall_score(y_test, predictions, average='micro'):.4f}")
    print(f"F1 Score: {f1_score(y_test, predictions, average='micro'):.4f}")
    print(classification_report(y_test, predictions))
print_metrics(y_test, svm_predictions, "SVM")


SVM Performance Metrics:
Accuracy: 0.9647
Precision: 0.9887
Recall: 0.9959
F1 Score: 0.9923
              precision    recall  f1-score   support

           0       1.00      0.98      0.99       272
           1       1.00      1.00      1.00       351
           2       1.00      1.00      1.00       295
           3       0.97      1.00      0.98       839
           4       1.00      1.00      1.00       123
           5       1.00      0.99      0.99      1029
           6       0.00      0.00      0.00         0

   micro avg       0.99      1.00      0.99      2909
   macro avg       0.85      0.85      0.85      2909
weighted avg       0.99      1.00      0.99      2909
 samples avg       0.90      0.90      0.89      2909



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [14]:
def process_and_display_transaction_svm(new_transaction, model, scaler, X_train_columns, anomaly_labels):
    new_data = pd.DataFrame([new_transaction])
    new_data = pd.get_dummies(new_data, columns=['type'], drop_first=True)
    new_data = new_data.reindex(columns=X_train_columns, fill_value=0)

    # Scale new data using the same scaler used for training
    new_data_scaled = scaler.transform(new_data)

    # Predict anomalies
    predictions = model.predict(new_data_scaled)

    # Interpret predictions
    warnings = []
    detailed_status = []
    
    for i, label in enumerate(anomaly_labels):
        status = predictions[0][i]
        detailed_status.append(f"{label}: {status}")
        
        if status == 1:
            warnings.append(f"Warning: {label} detected!")

    print("Transaction Details:")
    print(new_transaction)
    print("\nAnomaly Status:")
    for status in detailed_status:
        print(status)
    
    if warnings:
        print("\nWarnings:")
        for warning in warnings:
            print(warning)
    else:
        print("\nNo anomalies detected. Transaction appears normal.")


In [16]:
example_transaction = {
    'step': 1,
    'type': 'PAYMENT',
    'amount': 500.00,
    'nameOrig': 'C100000001',
    'oldbalanceOrg': 10000.00,
    'newbalanceOrig': 9500.00,
    'nameDest': 'C100000002',
    'oldbalanceDest': 5000.00,
    'newbalanceDest': 5500.00
}

process_and_display_transaction_svm(example_transaction, best_svm_model, scaler, X_train.columns, mlb.classes_)

Transaction Details:
{'step': 1, 'type': 'PAYMENT', 'amount': 500.0, 'nameOrig': 'C100000001', 'oldbalanceOrg': 10000.0, 'newbalanceOrig': 9500.0, 'nameDest': 'C100000002', 'oldbalanceDest': 5000.0, 'newbalanceDest': 5500.0}

Anomaly Status:
Destination balance decreased after transaction: 0
Large transaction: 0
Negative transaction difference: 1
Origin balance less than transaction amount: 0
Unexpected destination balance difference: 0
Very high transaction amount: 0
Very low transaction amount: 0



In [17]:
example_transaction_1 = {
    'step': 1,
    'type': 'TRANSFER',
    'amount': 215310.3,
    'nameOrig': 'C200000001',
    'oldbalanceOrg': 500.00,
    'newbalanceOrig': 0.00,
    'nameDest': 'C200000002',
    'oldbalanceDest': 0.00,
    'newbalanceDest': 0.00
}
process_and_display_transaction_svm(example_transaction_1, best_svm_model, scaler, X_train.columns, mlb.classes_)

Transaction Details:
{'step': 1, 'type': 'TRANSFER', 'amount': 215310.3, 'nameOrig': 'C200000001', 'oldbalanceOrg': 500.0, 'newbalanceOrig': 0.0, 'nameDest': 'C200000002', 'oldbalanceDest': 0.0, 'newbalanceDest': 0.0}

Anomaly Status:
Destination balance decreased after transaction: 0
Large transaction: 0
Negative transaction difference: 1
Origin balance less than transaction amount: 1
Unexpected destination balance difference: 0
Very high transaction amount: 1
Very low transaction amount: 0



In [19]:
example_transaction_2 = {
    'step': 1,
    'type': 'PAYMENT',
    'amount': 500.00,
    'nameOrig': 'C100000003',
    'oldbalanceOrg': 1000.00,
    'newbalanceOrig': 500.00,
    'nameDest': 'C100000004',
    'oldbalanceDest': 1000.00,
    'newbalanceDest': 1500.00
}
process_and_display_transaction_svm(example_transaction_2, best_svm_model, scaler, X_train.columns, mlb.classes_)

Transaction Details:
{'step': 1, 'type': 'PAYMENT', 'amount': 500.0, 'nameOrig': 'C100000003', 'oldbalanceOrg': 1000.0, 'newbalanceOrig': 500.0, 'nameDest': 'C100000004', 'oldbalanceDest': 1000.0, 'newbalanceDest': 1500.0}

Anomaly Status:
Destination balance decreased after transaction: 0
Large transaction: 0
Negative transaction difference: 1
Origin balance less than transaction amount: 1
Unexpected destination balance difference: 0
Very high transaction amount: 0
Very low transaction amount: 0

