In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn import utils
from sklearn.metrics import log_loss
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import confusion_matrix
import pickle

In [2]:
df = pd.read_csv('modeldata.csv')

In [3]:
df

Unnamed: 0,is_transfer,amount,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest,isFlaggedFraud,isFraud
0,0,229133.94,15325.00,0.0,5083.00,51513.44,0,0
1,0,181.00,181.00,0.0,21182.00,0.00,0,1
2,1,181.00,181.00,0.0,0.00,0.00,0,1
3,1,215310.30,705.00,0.0,22425.00,0.00,0,0
4,1,311685.89,10835.00,0.0,6267.00,2719172.89,0,0
...,...,...,...,...,...,...,...,...
2770404,1,6311409.28,6311409.28,0.0,0.00,0.00,0,1
2770405,0,339682.13,339682.13,0.0,0.00,339682.13,0,1
2770406,0,6311409.28,6311409.28,0.0,68488.84,6379898.11,0,1
2770407,1,850002.52,850002.52,0.0,0.00,0.00,0,1


In [4]:
X = df[['is_transfer', 'amount', 'oldbalanceOrg', 'newbalanceOrig','oldbalanceDest', 'newbalanceDest','isFlaggedFraud']]
y = df['isFraud']
## Scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=3000)

In [8]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report

treeModel = DecisionTreeClassifier(min_samples_split=3).fit(X_train, y_train)
treePred = treeModel.predict(X_test)

cm = confusion_matrix(y_test, treePred)
print("Confusion Matrix")
print(cm)
print()
print("TN (True Negative) predictions:", cm[0][0])
print("FP (False Positive) predictions:", cm[0][1])
print("FN (False Negative) predictions:", cm[1][0])
print("TP (True Positive) predictions:", cm[1][1])

Confusion Matrix
[[690347    203]
 [   260   1793]]

TN (True Negative) predictions: 690347
FP (False Positive) predictions: 203
FN (False Negative) predictions: 260
TP (True Positive) predictions: 1793


In [9]:
classification_report = classification_report(y_test, treePred)
print("classification Report")
print(classification_report)
print()

classification Report
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    690550
           1       0.90      0.87      0.89      2053

    accuracy                           1.00    692603
   macro avg       0.95      0.94      0.94    692603
weighted avg       1.00      1.00      1.00    692603




In [10]:
#accuracy and error of testing set
accuracy = accuracy_score(y_test, treePred)
error = 1-accuracy
print()
print("Accuracy=",accuracy)
print("Error=",error)

#Precsion, Recall, and F1_score
precision = precision_score(y_test, treePred)
recall = recall_score(y_test, treePred)
f1 = f1_score(y_test, treePred)
print()
print("Precision=", precision)
print("Recall=", recall)
print("F1_score=", f1)


Accuracy= 0.9993315073714668
Error= 0.0006684926285331905

Precision= 0.8982965931863728
Recall= 0.873356064296152
F1_score= 0.8856507779698691


In [9]:
filename = 'RFFraudModel'
pickle.dump(treeModel, open(filename, 'wb'))

### Making a new prediction using loaded model

In [10]:
# load the model from disk
loaded_model = pickle.load(open(filename, 'rb'))

In [11]:
#Pick sample record from database
import pymongo

In [12]:
client = pymongo.MongoClient()
db = client["Fraud"]
fraud_data = db["FraudData"]

In [13]:
X_1 = fraud_data.find_one()

In [16]:
X_1['_id']

ObjectId('6259eb9bf3d4ffef6a8bdf11')

In [17]:
X_1

{'_id': ObjectId('6259eb9bf3d4ffef6a8bdf11'),
 'step': 1,
 'type': 'PAYMENT',
 'amount': 7861.64,
 'nameOrig': 'C1912850431',
 'oldbalanceOrg': 176087.23,
 'newbalanceOrig': 168225.59,
 'nameDest': 'M633326333',
 'oldbalanceDest': None,
 'newbalanceDest': None,
 'isFraud': 0,
 'isFlaggedFraud': 0}

In [18]:
#Takes a dictionary from pymongo
#Fixes features
#If type !=CASH_OUT or TRANSFER, isFraud is returned as automatically 0.
#Otherwise, transform data point to be fed into model.predict and get a prediction.
#Then, upload new data point into database. 