In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

In [2]:
data = pd.read_csv('Fraud.csv')
data = data.sample(frac=0.001)

In [3]:
data.drop(['isFraud', 'isFlaggedFraud', 'nameOrig', 'nameDest'], axis=1, inplace=True)

In [4]:
data['transaction_difference'] = data['oldbalanceOrg'] - data['newbalanceOrig']
data['dest_balance_difference'] = data['newbalanceDest'] - data['oldbalanceDest']
data['isLargeTransaction'] = data['amount'] > 200000  # flag large transactions

In [5]:
data = pd.get_dummies(data, columns=['type'], drop_first=True)

In [6]:
def detect_anomalies(transaction):
    anomalies = []
    if transaction['transaction_difference'] < 0:
        anomalies.append('Negative transaction difference')
    if transaction['dest_balance_difference'] != 0 and transaction.get('type_TRANSFER', 0) == 1:
        anomalies.append('Unexpected destination balance difference')
    if transaction['isLargeTransaction']:
        anomalies.append('Large transaction')
    if transaction['amount'] > 10000:
        anomalies.append('Very high transaction amount')
    if transaction['amount'] < 10:
        anomalies.append('Very low transaction amount')
    if transaction['oldbalanceOrg'] < transaction['amount']:
        anomalies.append('Origin balance less than transaction amount')
    if transaction['newbalanceDest'] < transaction['oldbalanceDest']:
        anomalies.append('Destination balance decreased after transaction')
    return anomalies


In [7]:
data

Unnamed: 0,step,amount,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest,transaction_difference,dest_balance_difference,isLargeTransaction,type_CASH_OUT,type_DEBIT,type_PAYMENT,type_TRANSFER
3368261,254,297657.03,10702790.74,11000447.77,1989325.12,1691668.09,-297657.03,-297657.03,True,False,False,False,False
5658028,396,608363.47,180865.60,0.00,617738.30,1226101.77,180865.60,608363.47,True,True,False,False,False
5385651,376,2236.95,1902414.21,1904651.16,25914798.30,25912561.35,-2236.95,-2236.95,False,False,False,False,False
5285811,372,11019.68,0.00,0.00,0.00,0.00,0.00,0.00,False,False,False,True,False
568773,23,10406.19,66456.00,56049.81,0.00,0.00,10406.19,0.00,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4104704,302,15496.52,11082.00,0.00,0.00,0.00,11082.00,0.00,False,False,False,True,False
295912,15,187420.98,0.00,0.00,2045885.98,2233306.96,0.00,187420.98,False,True,False,False,False
4127557,302,181253.60,0.00,0.00,1083711.89,1264965.49,0.00,181253.60,False,True,False,False,False
5955528,405,48096.57,81798.00,129894.57,195269.01,147172.44,-48096.57,-48096.57,False,False,False,False,False


In [8]:
data['anomalies'] = data.apply(detect_anomalies, axis=1)

In [9]:
data

Unnamed: 0,step,amount,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest,transaction_difference,dest_balance_difference,isLargeTransaction,type_CASH_OUT,type_DEBIT,type_PAYMENT,type_TRANSFER,anomalies
3368261,254,297657.03,10702790.74,11000447.77,1989325.12,1691668.09,-297657.03,-297657.03,True,False,False,False,False,"[Negative transaction difference, Large transa..."
5658028,396,608363.47,180865.60,0.00,617738.30,1226101.77,180865.60,608363.47,True,True,False,False,False,"[Large transaction, Very high transaction amou..."
5385651,376,2236.95,1902414.21,1904651.16,25914798.30,25912561.35,-2236.95,-2236.95,False,False,False,False,False,"[Negative transaction difference, Destination ..."
5285811,372,11019.68,0.00,0.00,0.00,0.00,0.00,0.00,False,False,False,True,False,"[Very high transaction amount, Origin balance ..."
568773,23,10406.19,66456.00,56049.81,0.00,0.00,10406.19,0.00,False,False,False,True,False,[Very high transaction amount]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4104704,302,15496.52,11082.00,0.00,0.00,0.00,11082.00,0.00,False,False,False,True,False,"[Very high transaction amount, Origin balance ..."
295912,15,187420.98,0.00,0.00,2045885.98,2233306.96,0.00,187420.98,False,True,False,False,False,"[Very high transaction amount, Origin balance ..."
4127557,302,181253.60,0.00,0.00,1083711.89,1264965.49,0.00,181253.60,False,True,False,False,False,"[Very high transaction amount, Origin balance ..."
5955528,405,48096.57,81798.00,129894.57,195269.01,147172.44,-48096.57,-48096.57,False,False,False,False,False,"[Negative transaction difference, Very high tr..."


In [10]:
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()
y = mlb.fit_transform(data['anomalies'])

# Assuming the rest of your data preparation is correct
X = data.drop(['anomalies'], axis=1)

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

In [13]:
predictions = model.predict(X_test)
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       1.00      0.98      0.99       251
           1       1.00      1.00      1.00       349
           2       1.00      1.00      1.00       289
           3       1.00      1.00      1.00       809
           4       1.00      0.99      1.00       106
           5       1.00      1.00      1.00      1021

   micro avg       1.00      1.00      1.00      2825
   macro avg       1.00      0.99      1.00      2825
weighted avg       1.00      1.00      1.00      2825
 samples avg       0.88      0.88      0.88      2825



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [14]:
def process_and_display_transaction(new_transaction, model, X_train_columns, anomaly_labels):
    new_data = pd.DataFrame([new_transaction])
    new_data = pd.get_dummies(new_data, columns=['type'], drop_first=True)

    new_data['transaction_difference'] = new_data['oldbalanceOrg'] - new_data['newbalanceOrig']
    new_data['dest_balance_difference'] = new_data['newbalanceDest'] - new_data['oldbalanceDest']
    new_data['isLargeTransaction'] = new_data['amount'] > 200000

    for column in X_train_columns:
        if column not in new_data.columns:
            new_data[column] = 0
    new_data = new_data.reindex(columns=X_train_columns, fill_value=0)

    predictions = model.predict(new_data)

    warnings = []
    detailed_status = []
    
    for i, label in enumerate(anomaly_labels):
        status = predictions[0][i]
        detailed_status.append(f"{label}: {status}")
        
        if status == 1:
            warnings.append(f"Warning: {label} detected!")

    print("Transaction Details:")
    print(new_transaction)
    print("\nAnomaly Status:")
    for status in detailed_status:
        print(status)
    
    if warnings:
        print("\nWarnings:")
        for warning in warnings:
            print(warning)
    else:
        print("\nNo anomalies detected. Transaction appears normal.")


In [15]:
example_transaction = {
    'step': 1,
    'type': 'PAYMENT',
    'amount': 1234.56,
    'nameOrig': 'C1234567890',
    'oldbalanceOrg': 5000.00,
    'newbalanceOrig': 3765.44,
    'nameDest': 'C0987654321',
    'oldbalanceDest': 2000.00,
    'newbalanceDest': 2000.00
}
process_and_display_transaction(example_transaction, model, X_train.columns, mlb.classes_)

Transaction Details:
{'step': 1, 'type': 'PAYMENT', 'amount': 1234.56, 'nameOrig': 'C1234567890', 'oldbalanceOrg': 5000.0, 'newbalanceOrig': 3765.44, 'nameDest': 'C0987654321', 'oldbalanceDest': 2000.0, 'newbalanceDest': 2000.0}

Anomaly Status:
Destination balance decreased after transaction: 0
Large transaction: 0
Negative transaction difference: 0
Origin balance less than transaction amount: 0
Unexpected destination balance difference: 0
Very high transaction amount: 0

No anomalies detected. Transaction appears normal.


In [16]:
example_transaction_1 = {
    'step': 1,
    'type': 'TRANSFER',
    'amount': 250000.00,  # Very high transaction amount and Large transaction
    'nameOrig': 'C300000001',
    'oldbalanceOrg': 500.00,
    'newbalanceOrig': -249500.00,  # Negative transaction difference and Origin balance less than transaction amount
    'nameDest': 'C300000002',
    'oldbalanceDest': 1000.00,
    'newbalanceDest': 1000.00
}
process_and_display_transaction(example_transaction_1, model, X_train.columns, mlb.classes_)

Transaction Details:
{'step': 1, 'type': 'TRANSFER', 'amount': 250000.0, 'nameOrig': 'C300000001', 'oldbalanceOrg': 500.0, 'newbalanceOrig': -249500.0, 'nameDest': 'C300000002', 'oldbalanceDest': 1000.0, 'newbalanceDest': 1000.0}

Anomaly Status:
Destination balance decreased after transaction: 0
Large transaction: 1
Negative transaction difference: 0
Origin balance less than transaction amount: 1
Unexpected destination balance difference: 0
Very high transaction amount: 1



In [17]:
example_transaction_2 = {
    'step': 1,
    'type': 'PAYMENT',
    'amount': 500.00,
    'nameOrig': 'C100000003',
    'oldbalanceOrg': 1000.00,
    'newbalanceOrig': 500.00,
    'nameDest': 'C100000004',
    'oldbalanceDest': 1000.00,
    'newbalanceDest': 1500.00
}
process_and_display_transaction(example_transaction_2, model, X_train.columns, mlb.classes_)

Transaction Details:
{'step': 1, 'type': 'PAYMENT', 'amount': 500.0, 'nameOrig': 'C100000003', 'oldbalanceOrg': 1000.0, 'newbalanceOrig': 500.0, 'nameDest': 'C100000004', 'oldbalanceDest': 1000.0, 'newbalanceDest': 1500.0}

Anomaly Status:
Destination balance decreased after transaction: 0
Large transaction: 0
Negative transaction difference: 0
Origin balance less than transaction amount: 0
Unexpected destination balance difference: 0
Very high transaction amount: 0

No anomalies detected. Transaction appears normal.
