In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
data = pd.read_csv('Fraud.csv')

In [3]:
data

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,PAYMENT,9839.64,C1231006815,170136.00,160296.36,M1979787155,0.00,0.00,0,0
1,1,PAYMENT,1864.28,C1666544295,21249.00,19384.72,M2044282225,0.00,0.00,0,0
2,1,TRANSFER,181.00,C1305486145,181.00,0.00,C553264065,0.00,0.00,1,0
3,1,CASH_OUT,181.00,C840083671,181.00,0.00,C38997010,21182.00,0.00,1,0
4,1,PAYMENT,11668.14,C2048537720,41554.00,29885.86,M1230701703,0.00,0.00,0,0
...,...,...,...,...,...,...,...,...,...,...,...
6362615,743,CASH_OUT,339682.13,C786484425,339682.13,0.00,C776919290,0.00,339682.13,1,0
6362616,743,TRANSFER,6311409.28,C1529008245,6311409.28,0.00,C1881841831,0.00,0.00,1,0
6362617,743,CASH_OUT,6311409.28,C1162922333,6311409.28,0.00,C1365125890,68488.84,6379898.11,1,0
6362618,743,TRANSFER,850002.52,C1685995037,850002.52,0.00,C2080388513,0.00,0.00,1,0


In [4]:
sampled_data = data.sample(frac=0.1, random_state=42)


sampled_data['transaction_difference'] = sampled_data['oldbalanceOrg'] - sampled_data['newbalanceOrig']
sampled_data['dest_balance_difference'] = sampled_data['oldbalanceDest'] - sampled_data['newbalanceDest']
sampled_data['isLargeTransaction'] = sampled_data['amount'] > sampled_data['amount'].mean()


sampled_data = pd.get_dummies(sampled_data, columns=['type'], drop_first=True)


In [5]:
sampled_data.head()

Unnamed: 0,step,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud,transaction_difference,dest_balance_difference,isLargeTransaction,type_CASH_OUT,type_DEBIT,type_PAYMENT,type_TRANSFER
3737323,278,330218.42,C632336343,20866.0,351084.42,C834976624,452419.57,122201.15,0,0,-330218.42,330218.42,True,False,False,False,False
264914,15,11647.08,C1264712553,30370.0,18722.92,M215391829,0.0,0.0,0,0,11647.08,0.0,False,False,False,True,False
85647,10,152264.21,C1746846248,106589.0,258853.21,C1607284477,201303.01,49038.8,0,0,-152264.21,152264.21,False,False,False,False,False
5899326,403,1551760.63,C333676753,0.0,0.0,C1564353608,3198359.45,4750120.08,0,0,0.0,-1551760.63,True,False,False,False,True
2544263,206,78172.3,C813403091,2921331.58,2999503.88,C1091768874,415821.9,337649.6,0,0,-78172.3,78172.3,False,False,False,False,False


In [6]:
X = sampled_data.drop(columns=['isFraud', 'nameOrig', 'nameDest', 'isFlaggedFraud'])
y = sampled_data['isFraud']

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

In [8]:
y_pred

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [9]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[190636      5]
 [    56    182]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    190641
           1       0.97      0.76      0.86       238

    accuracy                           1.00    190879
   macro avg       0.99      0.88      0.93    190879
weighted avg       1.00      1.00      1.00    190879



In [10]:
def detect_anomalies(transaction):
    anomalies = []
    if transaction['transaction_difference'] < 0:
        anomalies.append('Negative transaction difference')
    if transaction['dest_balance_difference'] != 0 and 'type_TRANSFER' in transaction and transaction['type_TRANSFER']:
        anomalies.append('Unexpected destination balance difference')
    if transaction['isLargeTransaction']:
        anomalies.append('Large transaction')
    if transaction['amount'] > 10000:
        anomalies.append('Very high transaction amount')
    if transaction['amount'] < 10:
        anomalies.append('Very low transaction amount')
    if transaction['oldbalanceOrg'] < transaction['amount']:
        anomalies.append('Origin balance less than transaction amount')
    if transaction['newbalanceDest'] < transaction['oldbalanceDest']:
        anomalies.append('Destination balance decreased after transaction')
    return anomalies

In [11]:
def generate_warning(anomalies):
    if anomalies:
        return f"Warning: {'; '.join(anomalies)}"
    else:
        return "No anomalies detected"

In [12]:
sampled_data['anomalies'] = sampled_data.apply(detect_anomalies, axis=1)
sampled_data['warning'] = sampled_data['anomalies'].apply(generate_warning)

resultsample=sampled_data[['step', 'amount', 'type_CASH_OUT', 'type_DEBIT', 'type_PAYMENT', 'type_TRANSFER', 'isFraud', 'anomalies', 'warning']]
resultsample.to_csv('transaction_anomalies.csv', index=False)

print("Results saved to transaction_anomalies.csv")

Results saved to transaction_anomalies.csv


In [1]:
resultsample.head(100)

NameError: name 'resultsample' is not defined

In [14]:
def test_new_transaction(new_transaction):
    new_transaction['transaction_difference'] = new_transaction['oldbalanceOrg'] - new_transaction['newbalanceOrig']
    new_transaction['dest_balance_difference'] = new_transaction['oldbalanceDest'] - new_transaction['newbalanceDest']
    new_transaction['isLargeTransaction'] = new_transaction['amount'] > data['amount'].mean()

    new_transaction = pd.get_dummies(new_transaction, columns=['type'], drop_first=True)
    
    missing_cols = set(X_train.columns) - set(new_transaction.columns)
    for col in missing_cols:
        new_transaction[col] = 0
    new_transaction = new_transaction[X_train.columns]
    
    prediction = model.predict(new_transaction)[0]
    anomalies = detect_anomalies(new_transaction.iloc[0])
    warning = generate_warning(anomalies)
    
    return prediction, warning

In [15]:
# fraud
new_fraudulent_transaction_data = pd.DataFrame({
    'transactionstep': [1],
    'type': ['TRANSFER'],
    'amount': [20000],  
    'nameOrig': ['C1234567890'],
    'oldbalanceOrg': [5000],  
    'newbalanceOrig': [0],
    'nameDest': ['C0987654321'],
    'oldbalanceDest': [0],
    'newbalanceDest': [0],
    'isFlaggedFraud': [0]
})

prediction, warning = test_new_transaction(new_fraudulent_transaction_data)
print(prediction)
# print(f"Prediction: {'Fraudulent' if prediction == 1 else 'Non-Fraudulent'}")
print(f"Warning: {warning}")


0


In [16]:
# non fraud
new_transaction_data = pd.DataFrame({
    'transactionstep': [1],
    'type': ['TRANSFER'],
    'amount': [2000],
    'nameOrig': ['C1234567890'],
    'oldbalanceOrg': [5000],
    'newbalanceOrig': [3000],
    'nameDest': ['C0987654321'],
    'oldbalanceDest': [0],
    'newbalanceDest': [2000],
    'isFlaggedFraud': [0]
})

prediction, warning = test_new_transaction(new_transaction_data)
print(prediction)
# print(f"Prediction: {'Fraudulent' if prediction == 1 else 'Non-Fraudulent'}")
print(f"Warning: {warning}")

0


In [17]:
new_fraudulent_transaction_data_2 = pd.DataFrame({
    'transactionstep': [2],
    'type': ['CASH_OUT'],
    'amount': [50],  
    'nameOrig': ['C2345678901'],
    'oldbalanceOrg': [100], 
    'newbalanceOrig': [0],
    'nameDest': ['C9876543210'],
    'oldbalanceDest': [5000],
    'newbalanceDest': [4950],  
    'isFlaggedFraud': [0]
})

prediction, warning = test_new_transaction(new_fraudulent_transaction_data_2)
# print(f"Prediction: {'Fraudulent' if prediction == 1 else 'Non-Fraudulent'}")
print(f"Warning: {warning}")



In [18]:
new_fraudulent_transaction_data_3 = pd.DataFrame({
    'transactionstep': [3],
    'type': ['CASH_OUT'],
    'amount': [15000],  # Large amount
    'nameOrig': ['C3456789012'],
    'oldbalanceOrg': [5000],  
    'newbalanceOrig': [0],
    'nameDest': ['C8765432109'],
    'oldbalanceDest': [20000],
    'newbalanceDest': [5000],  
    'isFlaggedFraud': [0]
})

prediction_3, warning_3 = test_new_transaction(new_fraudulent_transaction_data_3)
# print(f"Prediction: {'Fraudulent' if prediction_3 == 1 else 'Non-Fraudulent'}")
print(f"Warning: {warning_3}")




In [19]:
from collections import Counter

counter = Counter(y)
print(f"Original class distribution: {counter}")

Original class distribution: Counter({0: 635445, 1: 817})


In [20]:
!pip install -U scikit-learn imbalanced-learn




[notice] A new release of pip is available: 24.0 -> 24.1
[notice] To update, run: C:\Users\Varun\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [21]:
print(sampled_data['isFraud'].value_counts())

isFraud
0    635445
1       817
Name: count, dtype: int64


In [22]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils import resample

In [23]:
data1 = pd.read_csv('Fraud.csv')

In [24]:
data = data1.sample(frac=0.1, random_state=42)

data['transaction_difference'] = data['oldbalanceOrg'] - data['newbalanceOrig']
data['dest_balance_difference'] = data['oldbalanceDest'] - data['newbalanceDest']
data['isLargeTransaction'] = data['amount'] > data['amount'].mean()


data = pd.get_dummies(data, columns=['type'], drop_first=True)

In [25]:
data

Unnamed: 0,step,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud,transaction_difference,dest_balance_difference,isLargeTransaction,type_CASH_OUT,type_DEBIT,type_PAYMENT,type_TRANSFER
3737323,278,330218.42,C632336343,20866.00,351084.42,C834976624,452419.57,122201.15,0,0,-330218.42,330218.42,True,False,False,False,False
264914,15,11647.08,C1264712553,30370.00,18722.92,M215391829,0.00,0.00,0,0,11647.08,0.00,False,False,False,True,False
85647,10,152264.21,C1746846248,106589.00,258853.21,C1607284477,201303.01,49038.80,0,0,-152264.21,152264.21,False,False,False,False,False
5899326,403,1551760.63,C333676753,0.00,0.00,C1564353608,3198359.45,4750120.08,0,0,0.00,-1551760.63,True,False,False,False,True
2544263,206,78172.30,C813403091,2921331.58,2999503.88,C1091768874,415821.90,337649.60,0,0,-78172.30,78172.30,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1172200,132,415721.93,C1050495198,0.00,0.00,C547650791,932883.24,1348605.16,0,0,0.00,-415721.92,True,True,False,False,False
4791099,345,1268.70,C1093037836,0.00,0.00,C628982495,4417140.07,4418408.76,0,0,0.00,-1268.69,False,True,False,False,False
3150498,236,11321.01,C1477386440,0.00,0.00,M805812799,0.00,0.00,0,0,0.00,0.00,False,False,False,True,False
2361946,190,2791.68,C398656338,58727.00,55935.32,M1075339200,0.00,0.00,0,0,2791.68,0.00,False,False,False,True,False


In [26]:

print("Class distribution before resampling:")
print(data['isFraud'].value_counts())


df_majority = data[data['isFraud'] == 0]
df_minority = data[data['isFraud'] == 1]


df_minority_upsampled = resample(df_minority, 
                                 replace=True,    # sample with replacement
                                 n_samples=len(df_majority), # to match majority class
                                 random_state=42) # reproducible results



Class distribution before resampling:
isFraud
0    635445
1       817
Name: count, dtype: int64


In [27]:

data_upsampled = pd.concat([df_majority, df_minority_upsampled])


print("Class distribution after resampling:")
print(data_upsampled['isFraud'].value_counts())


Class distribution after resampling:
isFraud
0    635445
1    635445
Name: count, dtype: int64


In [28]:


X = data_upsampled.drop(columns=['isFraud', 'nameOrig', 'nameDest', 'isFlaggedFraud'])
y = data_upsampled['isFraud']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)


model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


y_pred = model.predict(X_test)




In [29]:

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


def detect_anomalies(transaction):
    anomalies = []
    if transaction['transaction_difference'] < 0:
        anomalies.append('Negative transaction difference')
    if transaction['dest_balance_difference'] != 0 and 'type_TRANSFER' in transaction and transaction['type_TRANSFER']:
        anomalies.append('Unexpected destination balance difference')
    if transaction['isLargeTransaction']:
        anomalies.append('Large transaction')
    if transaction['amount'] > 10000:
        anomalies.append('Very high transaction amount')
    if transaction['amount'] < 10:
        anomalies.append('Very low transaction amount')
    if transaction['oldbalanceOrg'] < transaction['amount']:
        anomalies.append('Origin balance less than transaction amount')
    if transaction['newbalanceDest'] < transaction['oldbalanceDest']:
        anomalies.append('Destination balance decreased after transaction')
    return anomalies


data['anomalies'] = data.apply(detect_anomalies, axis=1)


def generate_warning(anomalies):
    if anomalies:
        return f"Warning: {'; '.join(anomalies)}"
    else:
        return "No anomalies detected"

data['warning'] = data['anomalies'].apply(generate_warning)


resultsample = data[['step', 'amount', 'type_CASH_OUT', 'type_DEBIT', 'type_PAYMENT', 'type_TRANSFER', 'isFraud', 'anomalies', 'warning']]


resultsample.to_csv('transaction_anomalies.csv', index=False)

print("Results saved to transaction_anomalies.csv")


def test_new_transaction(new_transaction):

    new_transaction['transaction_difference'] = new_transaction['oldbalanceOrg'] - new_transaction['newbalanceOrig']
    new_transaction['dest_balance_difference'] = new_transaction['oldbalanceDest'] - new_transaction['newbalanceDest']
    new_transaction['isLargeTransaction'] = new_transaction['amount'] > data['amount'].mean()
    
    new_transaction = pd.get_dummies(new_transaction, columns=['type'], drop_first=True)
    

    missing_cols = set(X_train.columns) - set(new_transaction.columns)
    for col in missing_cols:
        new_transaction[col] = 0
    new_transaction = new_transaction[X_train.columns]
    

    prediction = model.predict(new_transaction)[0]
    anomalies = detect_anomalies(new_transaction.iloc[0])
    warning = generate_warning(anomalies)
    
    return prediction, warning


new_fraudulent_transaction_data = pd.DataFrame({
    'transactionstep': [1],
    'type': ['TRANSFER'],
    'amount': [20000],  
    'nameOrig': ['C1234567890'],
    'oldbalanceOrg': [5000], 
    'newbalanceOrig': [0],
    'nameDest': ['C0987654321'],
    'oldbalanceDest': [0],
    'newbalanceDest': [0],
    'isFlaggedFraud': [0]
})


prediction, warning = test_new_transaction(new_fraudulent_transaction_data)
print(f"Prediction: {'Fraudulent' if prediction == 1 else 'Non-Fraudulent'}")
print(f"Warning: {warning}")


new_fraudulent_transaction_data_2 = pd.DataFrame({
    'transactionstep': [2],
    'type': ['CASH_OUT'],
    'amount': [50],  
    'nameOrig': ['C2345678901'],
    'oldbalanceOrg': [100],  
    'newbalanceOrig': [0],
    'nameDest': ['C9876543210'],
    'oldbalanceDest': [5000],
    'newbalanceDest': [4950],  
    'isFlaggedFraud': [0]
})

prediction_2, warning_2 = test_new_transaction(new_fraudulent_transaction_data_2)
print(f"Prediction: {'Fraudulent' if prediction_2 == 1 else 'Non-Fraudulent'}")
print(f"Warning: {warning_2}")

[[317640     37]
 [     0 317768]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    317677
           1       1.00      1.00      1.00    317768

    accuracy                           1.00    635445
   macro avg       1.00      1.00      1.00    635445
weighted avg       1.00      1.00      1.00    635445

Results saved to transaction_anomalies.csv
Prediction: Non-Fraudulent
Prediction: Non-Fraudulent
