# Online Payments Fraud Detection using ML 

In [10]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns 
import plotly.express as px

In [11]:
data= pd.read_csv("detection.csv")

In [12]:
data.head()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,PAYMENT,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,0,0
1,1,PAYMENT,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,0,0
2,1,TRANSFER,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,0
3,1,CASH_OUT,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,0
4,1,PAYMENT,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,0,0


In [13]:
data.tail()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
6362615,743,CASH_OUT,339682.13,C786484425,339682.13,0.0,C776919290,0.0,339682.13,1,0
6362616,743,TRANSFER,6311409.28,C1529008245,6311409.28,0.0,C1881841831,0.0,0.0,1,0
6362617,743,CASH_OUT,6311409.28,C1162922333,6311409.28,0.0,C1365125890,68488.84,6379898.11,1,0
6362618,743,TRANSFER,850002.52,C1685995037,850002.52,0.0,C2080388513,0.0,0.0,1,0
6362619,743,CASH_OUT,850002.52,C1280323807,850002.52,0.0,C873221189,6510099.11,7360101.63,1,0


In [14]:
data.isnull().sum()

step              0
type              0
amount            0
nameOrig          0
oldbalanceOrg     0
newbalanceOrig    0
nameDest          0
oldbalanceDest    0
newbalanceDest    0
isFraud           0
isFlaggedFraud    0
dtype: int64

In [15]:
numeric_df = data.select_dtypes(include=['float64', 'int64'])
corr = numeric_df.corr()
print(corr["isFraud"].sort_values(ascending=False))


isFraud           1.000000
amount            0.076688
isFlaggedFraud    0.044109
step              0.031578
oldbalanceOrg     0.010154
newbalanceDest    0.000535
oldbalanceDest   -0.005885
newbalanceOrig   -0.008148
Name: isFraud, dtype: float64


In [16]:
data.type.value_counts()

type
CASH_OUT    2237500
PAYMENT     2151495
CASH_IN     1399284
TRANSFER     532909
DEBIT         41432
Name: count, dtype: int64

In [17]:
type=data["type"].value_counts()
trans=type.index 
quantity=type.values 

figure= px.pie(data,
               values=quantity,
               names=trans,
               title="distribution of transction types")
figure.show()

In [18]:
data["type"]=data["type"].replace({"CASH_OUT":1,
                                   "PAYMENT":2,
                                   "CASH_IN":3, 
                                   "TRANSFER":4, 
                                   "DEBIT":5})


Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



In [19]:
data["isFraud"]=data["isFraud"].replace({0:"NoFraud", 1:"Fraud"})

In [20]:
data.columns

Index(['step', 'type', 'amount', 'nameOrig', 'oldbalanceOrg', 'newbalanceOrig',
       'nameDest', 'oldbalanceDest', 'newbalanceDest', 'isFraud',
       'isFlaggedFraud'],
      dtype='object')

In [21]:
from sklearn.model_selection import train_test_split
x= np.array(data[['step', 'type', 'amount','oldbalanceOrg', 'newbalanceOrig']])
y=np.array(data[['isFraud']])
x_train, x_test, y_train, y_test=train_test_split(x, y, test_size=0.10, random_state=42)

In [22]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
model1= DecisionTreeClassifier()
model2=LogisticRegression()
model1.fit(x_train, y_train.ravel())
model2.fit(x_train, y_train.ravel())



lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression



In [23]:
print("DecisionTreeClassifier:", model1.score(x_test, y_test))
print("LogisticRegression:", model2.score(x_test, y_test))

DecisionTreeClassifier: 0.9996463720919998
LogisticRegression: 0.9990915691963373


In [24]:
data.tail()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
6362615,743,1,339682.13,C786484425,339682.13,0.0,C776919290,0.0,339682.13,Fraud,0
6362616,743,4,6311409.28,C1529008245,6311409.28,0.0,C1881841831,0.0,0.0,Fraud,0
6362617,743,1,6311409.28,C1162922333,6311409.28,0.0,C1365125890,68488.84,6379898.11,Fraud,0
6362618,743,4,850002.52,C1685995037,850002.52,0.0,C2080388513,0.0,0.0,Fraud,0
6362619,743,1,850002.52,C1280323807,850002.52,0.0,C873221189,6510099.11,7360101.63,Fraud,0


In [25]:
fetures= np.array([[1, 2, 9839.64,339682.13, 0]])
print(model1.predict(fetures))

['NoFraud']


In [26]:
feture= np.array([[743,1,339682.13,170136.0, 0]])
print(model1.predict(feture))

['Fraud']
