# Online Payments Fraud Detection

In [3]:
# Importing the Dependencies

import pandas as pd
import numpy as np

In [4]:
# Reading the csv file as DataFrame

data = pd.read_csv("/content/Online Fraud Detection.csv")
print(data.head())

   step      type    amount     nameOrig  oldbalanceOrg  newbalanceOrig  \
0     1   PAYMENT   9839.64  C1231006815       170136.0       160296.36   
1     1   PAYMENT   1864.28  C1666544295        21249.0        19384.72   
2     1  TRANSFER    181.00  C1305486145          181.0            0.00   
3     1  CASH_OUT    181.00   C840083671          181.0            0.00   
4     1   PAYMENT  11668.14  C2048537720        41554.0        29885.86   

      nameDest  oldbalanceDest  newbalanceDest  isFraud  isFlaggedFraud  
0  M1979787155             0.0             0.0      0.0             0.0  
1  M2044282225             0.0             0.0      0.0             0.0  
2   C553264065             0.0             0.0      1.0             0.0  
3    C38997010         21182.0             0.0      1.0             0.0  
4  M1230701703             0.0             0.0      0.0             0.0  


In [5]:
# Checking the null values

print(data.isnull().sum())

step              0
type              0
amount            0
nameOrig          0
oldbalanceOrg     0
newbalanceOrig    0
nameDest          1
oldbalanceDest    1
newbalanceDest    1
isFraud           1
isFlaggedFraud    1
dtype: int64


In [6]:
# Dropping the null values

data.dropna(inplace=True)

In [7]:
# Exploring transaction type

print(data.type.value_counts())

PAYMENT     14480
CASH_OUT     6471
CASH_IN      5517
TRANSFER     2859
DEBIT         528
Name: type, dtype: int64


In [8]:
print(data.type.value_counts())

PAYMENT     14480
CASH_OUT     6471
CASH_IN      5517
TRANSFER     2859
DEBIT         528
Name: type, dtype: int64


In [9]:
# Checking correlation

correlation = data.corr()
print(correlation["isFraud"].sort_values(ascending=False))

isFraud           1.000000
amount            0.078878
oldbalanceOrg    -0.005706
newbalanceDest   -0.008500
oldbalanceDest   -0.013686
newbalanceOrig   -0.017815
step             -0.049139
isFlaggedFraud         NaN
Name: isFraud, dtype: float64


In [10]:
# Transforming the categorical features into numerical
# Transformin "isFraud" column into "No Fraud" into "Fraud"

data["type"] = data["type"].map({"CASH_OUT": 1, "PAYMENT": 2,
                                 "CASH_IN": 3, "TRANSFER": 4,
                                 "DEBIT": 5})
data["isFraud"] = data["isFraud"].map({0: "No Fraud", 1: "Fraud"})
print(data.head())

   step  type    amount     nameOrig  oldbalanceOrg  newbalanceOrig  \
0     1     2   9839.64  C1231006815       170136.0       160296.36   
1     1     2   1864.28  C1666544295        21249.0        19384.72   
2     1     4    181.00  C1305486145          181.0            0.00   
3     1     1    181.00   C840083671          181.0            0.00   
4     1     2  11668.14  C2048537720        41554.0        29885.86   

      nameDest  oldbalanceDest  newbalanceDest   isFraud  isFlaggedFraud  
0  M1979787155             0.0             0.0  No Fraud             0.0  
1  M2044282225             0.0             0.0  No Fraud             0.0  
2   C553264065             0.0             0.0     Fraud             0.0  
3    C38997010         21182.0             0.0     Fraud             0.0  
4  M1230701703             0.0             0.0  No Fraud             0.0  


## Online Payments Fraud Detection

In [12]:
# Splitting the data into training set and test set

from sklearn.model_selection import train_test_split
X = np.array(data[["type", "amount", "oldbalanceOrg", "newbalanceOrig"]])
y = np.array(data[["isFraud"]])

In [15]:
# Training a machine learning model

from sklearn.tree import DecisionTreeClassifier
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.10)
model = DecisionTreeClassifier()
model.fit(Xtrain, ytrain)
print(model.score(Xtest, ytest))

0.9966510381781648


In [19]:
# Prediction
# features = [type, amount, oldbalanceOrg, newbalanceOrig]
features = np.array([[4, 9000.60, 9000.60, 0.0]])
print(model.predict(features))

['No Fraud']


## Pickle

In [20]:
# Importing the pickle 

import pickle

In [21]:
# Creating a model_online pickle file

pickle.dump(model, open("model_online.pkl", "wb"))