<a href="https://colab.research.google.com/github/dspydev/python-projects/blob/main/fraud_detection_using_machine_learning_algorithms.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
# Import necessary libraries
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

In [7]:
# Read data from csv file
data = pd.read_csv("synthetic_financial_datasets_for_fraud_detection.csv")

In [8]:
# Display first five rows of data
print(data.head())

   step      type    amount     nameOrig  oldbalanceOrg  newbalanceOrig  \
0     1   PAYMENT   9839.64  C1231006815       170136.0       160296.36   
1     1   PAYMENT   1864.28  C1666544295        21249.0        19384.72   
2     1  TRANSFER    181.00  C1305486145          181.0            0.00   
3     1  CASH_OUT    181.00   C840083671          181.0            0.00   
4     1   PAYMENT  11668.14  C2048537720        41554.0        29885.86   

      nameDest  oldbalanceDest  newbalanceDest  isFraud  isFlaggedFraud  
0  M1979787155             0.0             0.0      0.0             0.0  
1  M2044282225             0.0             0.0      0.0             0.0  
2   C553264065             0.0             0.0      1.0             0.0  
3    C38997010         21182.0             0.0      1.0             0.0  
4  M1230701703             0.0             0.0      0.0             0.0  


In [9]:
# Display the number of rows in the dataset
print("Number of rows in the dataset:", len(data))

Number of rows in the dataset: 14247


In [10]:
# Check for missing values in the data
print(data.isnull().sum())

step              0
type              0
amount            0
nameOrig          0
oldbalanceOrg     1
newbalanceOrig    1
nameDest          1
oldbalanceDest    1
newbalanceDest    1
isFraud           1
isFlaggedFraud    1
dtype: int64


In [11]:
# Remove rows with missing values
data = data.dropna()

In [12]:
# Explore transaction types
print(data.type.value_counts())

PAYMENT     7869
CASH_IN     2472
CASH_OUT    2054
TRANSFER    1438
DEBIT        413
Name: type, dtype: int64


In [13]:
# Convert transaction types to numerical values
# and visualize the distribution of transaction types
type = data["type"].value_counts()
transactions = type.index
quantity = type.values
figure = px.pie(data, values=quantity, names=transactions,hole=0.5, title="Transaction Type")
figure.show()
data["type"] = data["type"].map({"CASH_OUT": 1, "PAYMENT": 2, "CASH_IN": 3, "TRANSFER": 4, "DEBIT": 5})

In [14]:
# Convert "isFraud" values to strings
data["isFraud"] = data["isFraud"].map({0: "No Fraud", 1: "Fraud"})
print(data.head())

   step  type    amount     nameOrig  oldbalanceOrg  newbalanceOrig  \
0     1     2   9839.64  C1231006815       170136.0       160296.36   
1     1     2   1864.28  C1666544295        21249.0        19384.72   
2     1     4    181.00  C1305486145          181.0            0.00   
3     1     1    181.00   C840083671          181.0            0.00   
4     1     2  11668.14  C2048537720        41554.0        29885.86   

      nameDest  oldbalanceDest  newbalanceDest   isFraud  isFlaggedFraud  
0  M1979787155             0.0             0.0  No Fraud             0.0  
1  M2044282225             0.0             0.0  No Fraud             0.0  
2   C553264065             0.0             0.0     Fraud             0.0  
3    C38997010         21182.0             0.0     Fraud             0.0  
4  M1230701703             0.0             0.0  No Fraud             0.0  


In [15]:
# Split data into training and testing sets
x = np.array(data[["type", "amount", "oldbalanceOrg", "newbalanceOrig"]])
y = np.array(data[["isFraud"]])
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.10, random_state=42)

In [16]:
# Train a decision tree classifier on the training data
model = DecisionTreeClassifier()
model.fit(xtrain, ytrain)

In [17]:
# Evaluate the model's accuracy on the testing data
print(model.score(xtest, ytest))

0.9985964912280701


In [48]:
# Make a prediction for a new transaction
# The features of the new transaction are stored in a NumPy array
# The array contains four values representing the transaction type, amount, old balance, and new balance
# Type of transaction: 4 (TRANSFER) (1 = CASH_OUT, 2 = PAYMENT, 3 = CASH_IN, 4 = TRANSFER, 5 = DEBIT)
# Transaction amount: 5000
# Sender's old account balance: 5000
# Sender's new account balance: 0.0
new_transaction = np.array([[4, 5000, 5000, 0.0]])

# Use the model to make a prediction for the new transaction
prediction = model.predict(new_transaction)

# Print the prediction for the new transaction
if prediction == 0:
    print("Prediction for the new transaction: No Fraud")
else:
    print("Prediction for the new transaction: Fraud")

Prediction for the new transaction: Fraud
