In [None]:
!pip3 install sklearn

#### Necessary Imports

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px

import warnings
warnings.filterwarnings("ignore")

In [3]:
data = pd.read_csv(r"./datasets/dataset.csv")

#### Exploratory Data Analysis (EDA)

In [None]:
data.isnull().sum() # Checking for null values 
data = data.dropna()
data.info()

##### Distribution of Transaction Types

In [5]:
type = data["type"].value_counts()
transactions = type.index
quantity = type.values

figure = px.pie(data, 
             values=quantity, 
             names=transactions,hole = 0.5, 
             title="Distribution of Transaction Type")
figure.show()

In [6]:
# Checking correlation
correlation = data.corr()
# output not really correlated with any input
#print(correlation["isFraud"].sort_values(ascending=False)) 

In [7]:
# Mapping some categorical column values
data["type"] = data["type"].map({"CASH_OUT": 1, "PAYMENT": 2, 
                                 "CASH_IN": 3, "TRANSFER": 4,
                                 "DEBIT": 5})
data["isFraud"] = data["isFraud"].map({0: "No Fraud", 1: "Fraud"})
#print(data.head())

#### Machine Learning on the Payments

##### Train-Test Split

In [None]:
from sklearn.model_selection import train_test_split
x = np.array(data[["type", "amount", "oldbalanceOrg", "newbalanceOrig"]])
y = np.array(data[["isFraud"]])
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.10, random_state=42)

##### Logistic Regression Classifier

In [48]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(xtrain, ytrain)
print(f"Accuracy is: {lr.score(xtest, ytest)}")

Accuracy is: 0.9995049209287997


In [49]:
#features = [type, amount, oldbalanceOrg, newbalanceOrig]
features = np.array([[4, 9000.60, 9000.60, 0.0]])
print(f"Prediction is: {lr.predict(features)}")

Prediction is: ['Fraud']


##### K- Nearest neighbors Classifier Method

In [50]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(xtrain, ytrain.ravel())
print(f"Accuracy is: {knn.score(xtest, ytest)}")

Accuracy is: 0.9996652322470931


In [51]:
#features = [type, amount, oldbalanceOrg, newbalanceOrig]
features = np.array([[4, 9000.60, 9000.60, 0.0]])
print(f"Prediction is: {knn.predict(features)}")

Prediction is: ['No Fraud']


##### Gradient Booster Classifier Method

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
gb = GradientBoostingClassifier(random_state=0)
gb.fit(xtrain, ytrain.ravel())
print(f"Accuracy is: {gb.score(xtest, ytest)}")

In [60]:
#features = [type, amount, oldbalanceOrg, newbalanceOrig]
features = np.array([[4, 9000.60, 9000.60, 0.0]])
print(f"Prediction is: {gb.predict(features)}")

Prediction is: ['No Fraud']


##### Random Forest Classifier Method

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier()
rf.fit(xtrain, ytrain)
print(f"Accuracy is: {rf.score(xtest, ytest)}")

In [61]:
#features = [type, amount, oldbalanceOrg, newbalanceOrig]
features = np.array([[4, 9000.60, 9000.60, 0.0]])
print(f"Prediction is: {rf.predict(features)}")

Prediction is: ['Fraud']


##### Naive Bayes Classifier Method

In [None]:
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()
nb.fit(xtrain, ytrain)
print(f"Accuracy is: {nb.score(xtest, ytest)}")

In [None]:
#features = [type, amount, oldbalanceOrg, newbalanceOrig]
features = np.array([[4, 9000.60, 9000.60, 0.0]])
print(f"Prediction is: {nb.predict(features)}")

##### Decision Tree Classifier Method

In [None]:
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier()
dt.fit(xtrain, ytrain)
print(f"Accuracy is: {dt.score(xtest, ytest)}")

In [None]:
#features = [type, amount, oldbalanceOrg, newbalanceOrig]
features = np.array([[4, 9000.60, 9000.60, 0.0]])
print(f"Prediction is: {dt.predict(features)}")