In [52]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn import utils
from sklearn.metrics import log_loss
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import confusion_matrix

In [53]:
df = pd.read_csv('Fraud.csv')

In [54]:
#One hot the dataset
onehot = OneHotEncoder(handle_unknown='ignore')
df_onehot = pd.DataFrame(onehot.fit_transform(np.array(df['type']).reshape(-1,1)).toarray())
df = df.join(df_onehot)
features = ['amount', 'oldbalanceOrg', 'newbalanceOrig','oldbalanceDest', 'newbalanceDest','isFlaggedFraud',0,1,2,3,4]

In [55]:
X = np.array(df[features])
y = df['isFraud']
## Scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [56]:
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=3000)

In [57]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report

treeModel = DecisionTreeClassifier(min_samples_split=3).fit(X_train, y_train)
treePred = treeModel.predict(X_test)

cm = confusion_matrix(y_test, treePred)
print("Confusion Matrix")
print(cm)
print()
print("TN (True Negative) predictions:", cm[0][0])
print("FP (False Positive) predictions:", cm[0][1])
print("FN (False Negative) predictions:", cm[1][0])
print("TP (True Positive) predictions:", cm[1][1])

Confusion Matrix
[[1588441     197]
 [    213    1804]]

TN (True Negative) predictions: 1588441
FP (False Positive) predictions: 197
FN (False Negative) predictions: 213
TP (True Positive) predictions: 1804


In [58]:
classification_report = classification_report(y_test, treePred)
print("classification Report")
print(classification_report)
print()

classification Report
              precision    recall  f1-score   support

           0       1.00      1.00      1.00   1588638
           1       0.90      0.89      0.90      2017

    accuracy                           1.00   1590655
   macro avg       0.95      0.95      0.95   1590655
weighted avg       1.00      1.00      1.00   1590655




In [59]:
#accuracy and error of testing set
accuracy = accuracy_score(y_test, treePred)
error = 1-accuracy
print()
print("Accuracy=",accuracy)
print("Error=",error)

#Precsion, Recall, and F1_score
precision = precision_score(y_test, treePred)
recall = recall_score(y_test, treePred)
f1 = f1_score(y_test, treePred)
print()
print("Precision=", precision)
print("Recall=", recall)
print("F1_score=", f1)


Accuracy= 0.9997422445470576
Error= 0.00025775545294237645

Precision= 0.9015492253873063
Recall= 0.8943976202280615
F1_score= 0.8979591836734693
