In [95]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from catboost import CatBoostClassifier, Pool
import numpy as np
from sklearn.metrics import accuracy_score

In [96]:
Data = pd.read_csv('Fraud.csv')
Data

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,PAYMENT,9839.64,C1231006815,170136.00,160296.36,M1979787155,0.00,0.00,0,0
1,1,PAYMENT,1864.28,C1666544295,21249.00,19384.72,M2044282225,0.00,0.00,0,0
2,1,TRANSFER,181.00,C1305486145,181.00,0.00,C553264065,0.00,0.00,1,0
3,1,CASH_OUT,181.00,C840083671,181.00,0.00,C38997010,21182.00,0.00,1,0
4,1,PAYMENT,11668.14,C2048537720,41554.00,29885.86,M1230701703,0.00,0.00,0,0
...,...,...,...,...,...,...,...,...,...,...,...
6362615,743,CASH_OUT,339682.13,C786484425,339682.13,0.00,C776919290,0.00,339682.13,1,0
6362616,743,TRANSFER,6311409.28,C1529008245,6311409.28,0.00,C1881841831,0.00,0.00,1,0
6362617,743,CASH_OUT,6311409.28,C1162922333,6311409.28,0.00,C1365125890,68488.84,6379898.11,1,0
6362618,743,TRANSFER,850002.52,C1685995037,850002.52,0.00,C2080388513,0.00,0.00,1,0


In [97]:
sampled_data = Data.sample(frac=0.1, random_state=42)

sampled_data['transaction_difference'] = sampled_data['oldbalanceOrg'] - sampled_data['newbalanceOrig']
sampled_data['dest_balance_difference'] = sampled_data['oldbalanceDest'] - sampled_data['newbalanceDest']
sampled_data['isLargeTransaction'] = sampled_data['amount'] > sampled_data['amount'].mean()
sampled_data.drop(['isFraud', 'isFlaggedFraud', 'nameOrig', 'nameDest'], axis=1, inplace=True)


sampled_data = pd.get_dummies(sampled_data, columns=['type'], drop_first=True)
sampled_data.head()

Unnamed: 0,step,amount,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest,transaction_difference,dest_balance_difference,isLargeTransaction,type_CASH_OUT,type_DEBIT,type_PAYMENT,type_TRANSFER
3737323,278,330218.42,20866.0,351084.42,452419.57,122201.15,-330218.42,330218.42,True,False,False,False,False
264914,15,11647.08,30370.0,18722.92,0.0,0.0,11647.08,0.0,False,False,False,True,False
85647,10,152264.21,106589.0,258853.21,201303.01,49038.8,-152264.21,152264.21,False,False,False,False,False
5899326,403,1551760.63,0.0,0.0,3198359.45,4750120.08,0.0,-1551760.63,True,False,False,False,True
2544263,206,78172.3,2921331.58,2999503.88,415821.9,337649.6,-78172.3,78172.3,False,False,False,False,False


In [98]:
def detect_anomalies(transaction):
    anomalies = []
    if transaction['transaction_difference'] < 0:
        anomalies.append('Negative transaction difference')
    if transaction['dest_balance_difference'] != 0 and 'type_TRANSFER' in transaction and transaction['type_TRANSFER']:
        anomalies.append('Unexpected destination balance difference')
    if transaction['isLargeTransaction']:
        anomalies.append('Large transaction')
    if transaction['amount'] > 10000:
        anomalies.append('Very high transaction amount')
    if transaction['amount'] < 10:
        anomalies.append('Very low transaction amount')
    if transaction['oldbalanceOrg'] < transaction['amount']:
        anomalies.append('Origin balance less than transaction amount')
    if transaction['newbalanceDest'] < transaction['oldbalanceDest']:
        anomalies.append('Destination balance decreased after transaction')
    return anomalies

def generate_warning(anomalies):
    if anomalies:
        return f"Warning: {'; '.join(anomalies)}"
    else:
        return "No anomalies detected"
    

In [99]:
sampled_data['anomalies'] = sampled_data.apply(detect_anomalies, axis=1)
#sampled_data['warning'] = sampled_data['anomalies'].apply(generate_warning)

sampled_data
#print(sampled_data['warning'].head())

Unnamed: 0,step,amount,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest,transaction_difference,dest_balance_difference,isLargeTransaction,type_CASH_OUT,type_DEBIT,type_PAYMENT,type_TRANSFER,anomalies
3737323,278,330218.42,20866.00,351084.42,452419.57,122201.15,-330218.42,330218.42,True,False,False,False,False,"[Negative transaction difference, Large transa..."
264914,15,11647.08,30370.00,18722.92,0.00,0.00,11647.08,0.00,False,False,False,True,False,[Very high transaction amount]
85647,10,152264.21,106589.00,258853.21,201303.01,49038.80,-152264.21,152264.21,False,False,False,False,False,"[Negative transaction difference, Very high tr..."
5899326,403,1551760.63,0.00,0.00,3198359.45,4750120.08,0.00,-1551760.63,True,False,False,False,True,"[Unexpected destination balance difference, La..."
2544263,206,78172.30,2921331.58,2999503.88,415821.90,337649.60,-78172.30,78172.30,False,False,False,False,False,"[Negative transaction difference, Very high tr..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1172200,132,415721.93,0.00,0.00,932883.24,1348605.16,0.00,-415721.92,True,True,False,False,False,"[Large transaction, Very high transaction amou..."
4791099,345,1268.70,0.00,0.00,4417140.07,4418408.76,0.00,-1268.69,False,True,False,False,False,[Origin balance less than transaction amount]
3150498,236,11321.01,0.00,0.00,0.00,0.00,0.00,0.00,False,False,False,True,False,"[Very high transaction amount, Origin balance ..."
2361946,190,2791.68,58727.00,55935.32,0.00,0.00,2791.68,0.00,False,False,False,True,False,[]


In [100]:
from sklearn.preprocessing import MultiLabelBinarizer
X = sampled_data.drop(['anomalies'], axis=1)
mlb = MultiLabelBinarizer()
Y = mlb.fit_transform(sampled_data['anomalies'])

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42)
categorical_features_indices = np.where(X.dtypes == 'object')[0]


models = {}
results = {}


for idx, anomaly in enumerate(mlb.classes_):
    print(f'Training model for anomaly: {anomaly}')
    
    Y_anomaly = Y[:, idx]
    
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y_anomaly, test_size=0.3, random_state=42)
    
    model = CatBoostClassifier(iterations=1000, 
                               learning_rate=0.1, 
                               depth=6, 
                               loss_function='Logloss', 
                               eval_metric='AUC',
                               verbose=100)
    
    model.fit(X_train, Y_train, cat_features=categorical_features_indices, eval_set=(X_test, Y_test), early_stopping_rounds=50)
    
    y_pred = model.predict(X_test)
    
    accuracy = accuracy_score(Y_test, y_pred)
    conf_matrix = confusion_matrix(Y_test, y_pred)
    class_report = classification_report(Y_test, y_pred)
    
    models[anomaly] = model
    results[anomaly] = {
        'accuracy': accuracy,
        'confusion_matrix': conf_matrix,
        'classification_report': class_report
    }
    
    print(f'Accuracy for {anomaly}: {accuracy}')
    print(f'Confusion Matrix for {anomaly}:')
    print(conf_matrix)
    print(f'Classification Report for {anomaly}:')
    print(class_report)

anomaly_labels = mlb.classes_

Training model for anomaly: Destination balance decreased after transaction
0:	test: 1.0000000	best: 1.0000000 (0)	total: 145ms	remaining: 2m 24s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 1
bestIteration = 0

Shrink model to first 1 iterations.
Accuracy for Destination balance decreased after transaction: 0.9999895221580163
Confusion Matrix for Destination balance decreased after transaction:
[[153820      0]
 [     2  37057]]
Classification Report for Destination balance decreased after transaction:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    153820
           1       1.00      1.00      1.00     37059

    accuracy                           1.00    190879
   macro avg       1.00      1.00      1.00    190879
weighted avg       1.00      1.00      1.00    190879

Training model for anomaly: Large transaction
0:	test: 1.0000000	best: 1.0000000 (0)	total: 173ms	remaining: 2m 52s
Stopped by overfitting dete

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy for Very low transaction amount: 0.999811398844294
Confusion Matrix for Very low transaction amount:
[[190843      0]
 [    36      0]]
Classification Report for Very low transaction amount:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    190843
           1       0.00      0.00      0.00        36

    accuracy                           1.00    190879
   macro avg       0.50      0.50      0.50    190879
weighted avg       1.00      1.00      1.00    190879



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [101]:
'''model = CatBoostClassifier(iterations=1000, 
                           learning_rate=0.1, 
                           depth=6, 
                           loss_function='Logloss', 
                           eval_metric='AUC',
                           verbose=100)


model.fit(X_train, Y_train, cat_features=categorical_features_indices, eval_set=(X_test, Y_test), early_stopping_rounds=50)


y_pred = model.predict(X_test)


accuracy = accuracy_score(Y_test, y_pred)
conf_matrix = confusion_matrix(Y_test, y_pred)
class_report = classification_report(Y_test, y_pred)
print(f'Accuracy: {accuracy}')
print('Confusion Matrix:')
print(conf_matrix)
print('Classification Report:')
print(class_report)'''

"model = CatBoostClassifier(iterations=1000, \n                           learning_rate=0.1, \n                           depth=6, \n                           loss_function='Logloss', \n                           eval_metric='AUC',\n                           verbose=100)\n\n\nmodel.fit(X_train, Y_train, cat_features=categorical_features_indices, eval_set=(X_test, Y_test), early_stopping_rounds=50)\n\n\ny_pred = model.predict(X_test)\n\n\naccuracy = accuracy_score(Y_test, y_pred)\nconf_matrix = confusion_matrix(Y_test, y_pred)\nclass_report = classification_report(Y_test, y_pred)\nprint(f'Accuracy: {accuracy}')\nprint('Confusion Matrix:')\nprint(conf_matrix)\nprint('Classification Report:')\nprint(class_report)"

In [109]:
'''anomaly_labels = [
    'Negative transaction difference', 
    'Unexpected destination balance difference',
    'Large transaction',
    'Very high transaction amount',
    'Very low transaction amount',
    'Origin balance less than transaction amount',
    'Destination balance decreased after transaction'
]'''

def process_and_display_transaction(new_transaction, models, X_train_columns):
    new_data = pd.DataFrame([new_transaction])
    new_data = pd.get_dummies(new_data, columns=['type'], drop_first=True)

    new_data['transaction_difference'] = new_data['oldbalanceOrg'] - new_data['newbalanceOrig']
    new_data['dest_balance_difference'] = new_data['newbalanceDest'] - new_data['oldbalanceDest']
    new_data['isLargeTransaction'] = new_data['amount'] > 200000

    for column in X_train_columns:
        if column not in new_data.columns:
            new_data[column] = 0
    new_data = new_data.reindex(columns=X_train_columns, fill_value=0)

    warnings = []
    detailed_status = []
    
    for anomaly in anomaly_labels:
        model = models[anomaly]
        prediction = model.predict(new_data)[0]
        detailed_status.append(f"{anomaly}: {prediction}")
        
        if prediction == 1:
            warnings.append(f"Warning: {anomaly} detected!")

    print("Transaction Details:")
    print(new_transaction)
    print("\nAnomaly Status:")
    for status in detailed_status:
        print(status)
    
    if warnings:
        print("\nWarnings:")
        for warning in warnings:
            print(warning)
    else:
        print("\nNo anomalies detected. Transaction appears normal.")


In [110]:
example_transaction = {
    'step': 1,
    'type': 'PAYMENT',
    'amount': 1234.56,
    'nameOrig': 'C1234567890',
    'oldbalanceOrg': 5000.00,
    'newbalanceOrig': 3765.44,
    'nameDest': 'C0987654321',
    'oldbalanceDest': 2000.00,
    'newbalanceDest': 2000.00
}
process_and_display_transaction(example_transaction, models, X_train.columns)

Transaction Details:
{'step': 1, 'type': 'PAYMENT', 'amount': 1234.56, 'nameOrig': 'C1234567890', 'oldbalanceOrg': 5000.0, 'newbalanceOrig': 3765.44, 'nameDest': 'C0987654321', 'oldbalanceDest': 2000.0, 'newbalanceDest': 2000.0}

Anomaly Status:
Destination balance decreased after transaction: 0
Large transaction: 0
Negative transaction difference: 0
Origin balance less than transaction amount: 0
Unexpected destination balance difference: 0
Very high transaction amount: 0
Very low transaction amount: 0

No anomalies detected. Transaction appears normal.


In [104]:
example_transaction_1 = {
    'step': 1,
    'type': 'TRANSFER',
    'amount': 215310.3,
    'nameOrig': 'C200000001',
    'oldbalanceOrg': 500.00,
    'newbalanceOrig': 0.00,
    'nameDest': 'C200000002',
    'oldbalanceDest': 0.00,
    'newbalanceDest': 0.00
}
process_and_display_transaction(example_transaction_1, models, X_train.columns)

Transaction Details:
{'step': 1, 'type': 'TRANSFER', 'amount': 215310.3, 'nameOrig': 'C200000001', 'oldbalanceOrg': 500.0, 'newbalanceOrig': 0.0, 'nameDest': 'C200000002', 'oldbalanceDest': 0.0, 'newbalanceDest': 0.0}

Anomaly Status:
Destination balance decreased after transaction: 0
Large transaction: 1
Negative transaction difference: 0
Origin balance less than transaction amount: 1
Unexpected destination balance difference: 0
Very high transaction amount: 1
Very low transaction amount: 0



In [105]:
example_transaction_2 = {
    'step': 1,
    'type': 'PAYMENT',
    'amount': 500.00,
    'nameOrig': 'C100000003',
    'oldbalanceOrg': 1000.00,
    'newbalanceOrig': 500.00,
    'nameDest': 'C100000004',
    'oldbalanceDest': 1000.00,
    'newbalanceDest': 1500.00
}
process_and_display_transaction(example_transaction_2, models, X_train.columns)

Transaction Details:
{'step': 1, 'type': 'PAYMENT', 'amount': 500.0, 'nameOrig': 'C100000003', 'oldbalanceOrg': 1000.0, 'newbalanceOrig': 500.0, 'nameDest': 'C100000004', 'oldbalanceDest': 1000.0, 'newbalanceDest': 1500.0}

Anomaly Status:
Destination balance decreased after transaction: 1
Large transaction: 0
Negative transaction difference: 0
Origin balance less than transaction amount: 0
Unexpected destination balance difference: 0
Very high transaction amount: 0
Very low transaction amount: 0

