**Importing librairies**

In [None]:
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split
import scipy as sp
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

**Data loading**

In [None]:
datainput = pd.read_csv('creditcard.csv')  

In [None]:
datainput.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


**print the complete shape of dataset**

In [None]:
print("Shape of Complete Dataset: ")
print(datainput.shape,"\n")

Shape of Complete Dataset: 
(284807, 31) 



**check variable missing**

In [None]:
datainput.isnull().any().any()

False

In [None]:
false = datainput[datainput['Class']==1]
true = datainput[datainput['Class']==0]
n=len(false)/float(len(true))
print (n)
print('False Detection : {}'.format(len(datainput[datainput['Class']==1])))
print('True Detection:{}'.format(len(datainput[datainput['Class']==0])),"\n")

0.0017304750013189597
False Detection : 492
True Detection:284315 



In [None]:
datainput['Class'].value_counts(normalize=True)*100

0    99.827251
1     0.172749
Name: Class, dtype: float64

It can be seen that the imbalance of the data in this sample shows 0.0017 with detection results while 492 False and 284315 true transactions

Estimates statistically various parameters such as the mean of the maximum value of the standard deviation of the minimum values and the different percentiles

In [None]:
#False Detection Transaction
print("False Detection Transaction")
print("___________________________")
print(false.Amount.describe(),"\n")

#True Detection Transaction
print("True Detection Transaction")
print("___________________________")
print(true.Amount.describe(),"\n")

False Detection Transaction
___________________________
count     492.000000
mean      122.211321
std       256.683288
min         0.000000
25%         1.000000
50%         9.250000
75%       105.890000
max      2125.870000
Name: Amount, dtype: float64 

True Detection Transaction
___________________________
count    284315.000000
mean         88.291022
std         250.105092
min           0.000000
25%           5.650000
50%          22.000000
75%          77.050000
max       25691.160000
Name: Amount, dtype: float64 



**Feature Selection and Label**

In [None]:
#select all columns except the last for all rows
X=datainput.iloc[:,:-1].values
#select the las columns of all rows
Y=datainput.iloc[:, -1].values

print(X.shape)
print('----------------------')
print(Y.shape)

(284807, 30)
----------------------
(284807,)


**Model Training**

Now we divide the data set into two parts, One for training and one for testing

In [None]:
#train test split method
X_train, X_test, Y_train, Y_test= train_test_split(X,Y, test_size=0.3)

In [None]:
X_train.shape

(199364, 30)

In [None]:
Y_train.shape

(199364,)

**Applying Decision Tree classifier**

In [None]:
classifier=DecisionTreeClassifier(max_depth=4)
classifier.fit(X_train, Y_train)
predicted=classifier.predict(X_test)
print("\n Predicted value:\n",predicted)


 Predicted value:
 [0 0 0 ... 0 0 0]


**Accuracy Decision Tree Classifier**

In [None]:
DecisionTree= metrics.accuracy_score(Y_test, predicted) 
print("\n The Accuracy Score Using Algorithm Decision Tree Classifier: ", DecisionTree)


 The Accuracy Score Using Algorithm Decision Tree Classifier:  0.9991456292499094


**Validation ad Evaluation Parameters**

**Using Precision, recall value, and F score as parameters.**

In [None]:
#Precision
print('Precision')
# use TP/(TP+FP) where (TP=True Positif, TN=True Negative,FP = False Positive, FN = False Negative)
precision=precision_score(Y_test, predicted, pos_label=1)
print('\n Score Precision :\n',precision )

Precision

 Score Precision :
 0.7553956834532374


In [None]:
#Recall
# Recall = TP / (TP + FN)
print("Recall")
recall=recall_score(Y_test, predicted, pos_label=1)
print("\n Recall Score :\n", recall)

Recall

 Recall Score :
 0.7291666666666666


In [None]:
#F1-Score
print('F1-Score')
fscore=f1_score(Y_test, predicted, pos_label=1)
print("\n F1 Score :\n", fscore)

F1-Score

 F1 Score :
 0.7420494699646643
