# Imports

In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix

# Readings 

In [15]:
columns = ["trustLevel","totalScanTimeInSeconds","grandTotal","lineItemVoids","scansWithoutRegistration","quantityModifications","scannedLineItemsPerSecond","valuePerSecond","lineItemVoidsPerPosition","fraud"]
df_train = pd.read_csv("train.csv", delimiter="|")
df_test = pd.read_csv("test.csv", delimiter="|")

# Preparing Data

In [16]:
# X = np.array(df_train.loc[:,["trustLevel","scansWithoutRegistration","lineItemVoids","quantityModifications"]])
X = np.array(df_train.loc[:,["trustLevel","totalScanTimeInSeconds","grandTotal","lineItemVoids","scansWithoutRegistration","quantityModifications","scannedLineItemsPerSecond","valuePerSecond","lineItemVoidsPerPosition"]
])
y = np.array(df_train["fraud"])

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.3)

# Training the Model 

In [18]:
model = MLPClassifier(max_iter = 500)
model.fit(X_train,y_train)
pred = model.predict(X_test)

In [19]:
print("Accuracy score: ")
print(accuracy_score(y_test,pred))
print("Confusion Matrix: ")
print(confusion_matrix(y_test,pred))

Accuracy score: 
0.9414893617021277
Confusion Matrix: 
[[531   0]
 [ 33   0]]


In [20]:
print('Index\tPredicted\t\tActual')
for i in range(len(pred)):
    if pred[i] != y_test[i]:
        print(i,'\t',pred[i],'\t',y_test[i],'******')

Index	Predicted		Actual
33 	 0 	 1 ******
58 	 0 	 1 ******
70 	 0 	 1 ******
74 	 0 	 1 ******
84 	 0 	 1 ******
85 	 0 	 1 ******
88 	 0 	 1 ******
97 	 0 	 1 ******
136 	 0 	 1 ******
140 	 0 	 1 ******
143 	 0 	 1 ******
149 	 0 	 1 ******
178 	 0 	 1 ******
184 	 0 	 1 ******
186 	 0 	 1 ******
266 	 0 	 1 ******
308 	 0 	 1 ******
310 	 0 	 1 ******
371 	 0 	 1 ******
377 	 0 	 1 ******
380 	 0 	 1 ******
382 	 0 	 1 ******
401 	 0 	 1 ******
415 	 0 	 1 ******
429 	 0 	 1 ******
444 	 0 	 1 ******
450 	 0 	 1 ******
489 	 0 	 1 ******
513 	 0 	 1 ******
533 	 0 	 1 ******
536 	 0 	 1 ******
549 	 0 	 1 ******
558 	 0 	 1 ******


# Predictions of the Model for the test.csv

In [21]:
dataToPredict = np.array(df_test.loc[:,["trustLevel","totalScanTimeInSeconds","grandTotal","lineItemVoids","scansWithoutRegistration","quantityModifications","scannedLineItemsPerSecond","valuePerSecond","lineItemVoidsPerPosition"]])
pred = model.predict(dataToPredict)

# Extracting the Rows which are Associated with Fraud

In [22]:
new_df = pd.DataFrame(columns=columns[0:9])

In [23]:
#print("\nPredicted Results")
for i in range(len(pred)):
    if(pred[i] == 1):
        new_df = new_df.append(df_test.iloc[i], ignore_index=True)
         

In [24]:
new_df

Unnamed: 0,trustLevel,totalScanTimeInSeconds,grandTotal,lineItemVoids,scansWithoutRegistration,quantityModifications,scannedLineItemsPerSecond,valuePerSecond,lineItemVoidsPerPosition


# Exporting frauds to CSV file

In [106]:
new_df.to_csv('frauds.csv')

# Preperation for specifying fraud column in the test.csv file

In [107]:
new_test_dataset = pd.DataFrame(columns=columns)
new_test_dataset = df_test
new_test_dataset

Unnamed: 0,trustLevel,totalScanTimeInSeconds,grandTotal,lineItemVoids,scansWithoutRegistration,quantityModifications,scannedLineItemsPerSecond,valuePerSecond,lineItemVoidsPerPosition
0,4,467,88.48,4,8,4,0.014989,0.189465,0.571429
1,3,1004,58.99,7,6,1,0.026892,0.058755,0.259259
2,1,162,14.00,4,5,4,0.006173,0.086420,4.000000
3,5,532,84.79,9,3,4,0.026316,0.159380,0.642857
4,5,890,42.16,4,0,0,0.021348,0.047371,0.210526
...,...,...,...,...,...,...,...,...,...
498116,4,783,59.10,2,2,0,0.012771,0.075479,0.200000
498117,1,278,98.90,9,5,4,0.050360,0.355755,0.642857
498118,3,300,5.41,6,6,4,0.030000,0.018033,0.666667
498119,2,1524,33.97,2,5,3,0.005906,0.022290,0.222222


In [108]:
new_test_dataset["fraud"] = 0
new_test_dataset

Unnamed: 0,trustLevel,totalScanTimeInSeconds,grandTotal,lineItemVoids,scansWithoutRegistration,quantityModifications,scannedLineItemsPerSecond,valuePerSecond,lineItemVoidsPerPosition,fraud
0,4,467,88.48,4,8,4,0.014989,0.189465,0.571429,0
1,3,1004,58.99,7,6,1,0.026892,0.058755,0.259259,0
2,1,162,14.00,4,5,4,0.006173,0.086420,4.000000,0
3,5,532,84.79,9,3,4,0.026316,0.159380,0.642857,0
4,5,890,42.16,4,0,0,0.021348,0.047371,0.210526,0
...,...,...,...,...,...,...,...,...,...,...
498116,4,783,59.10,2,2,0,0.012771,0.075479,0.200000,0
498117,1,278,98.90,9,5,4,0.050360,0.355755,0.642857,0
498118,3,300,5.41,6,6,4,0.030000,0.018033,0.666667,0
498119,2,1524,33.97,2,5,3,0.005906,0.022290,0.222222,0


# Finding frauds in test.csv file 

In [109]:
df_1 = pd.merge(df_test,new_df,  how='left', indicator='is_fraud')
df_1.drop('fraud', inplace=True, axis=1)
df_1['is_fraud'] = np.where(df_1.is_fraud == 'both', 1, 0)

In [110]:
df_1

Unnamed: 0,trustLevel,totalScanTimeInSeconds,grandTotal,lineItemVoids,scansWithoutRegistration,quantityModifications,scannedLineItemsPerSecond,valuePerSecond,lineItemVoidsPerPosition,is_fraud
0,4,467,88.48,4,8,4,0.014989,0.189465,0.571429,0
1,3,1004,58.99,7,6,1,0.026892,0.058755,0.259259,0
2,1,162,14.00,4,5,4,0.006173,0.086420,4.000000,0
3,5,532,84.79,9,3,4,0.026316,0.159380,0.642857,0
4,5,890,42.16,4,0,0,0.021348,0.047371,0.210526,0
...,...,...,...,...,...,...,...,...,...,...
498116,4,783,59.10,2,2,0,0.012771,0.075479,0.200000,0
498117,1,278,98.90,9,5,4,0.050360,0.355755,0.642857,0
498118,3,300,5.41,6,6,4,0.030000,0.018033,0.666667,0
498119,2,1524,33.97,2,5,3,0.005906,0.022290,0.222222,0


In [113]:
for i in range(len(df_1)):
    if(df_1.iloc[i]["is_fraud"] == 1):
        print(df_1.iloc[i])

trustLevel                     1.000000
totalScanTimeInSeconds       587.000000
grandTotal                     3.830000
lineItemVoids                  0.000000
scansWithoutRegistration       9.000000
quantityModifications          0.000000
scannedLineItemsPerSecond      0.022147
valuePerSecond                 0.006525
lineItemVoidsPerPosition       0.000000
is_fraud                       1.000000
Name: 108, dtype: float64
trustLevel                     1.000000
totalScanTimeInSeconds       583.000000
grandTotal                    86.070000
lineItemVoids                  9.000000
scansWithoutRegistration       9.000000
quantityModifications          5.000000
scannedLineItemsPerSecond      0.029160
valuePerSecond                 0.147633
lineItemVoidsPerPosition       0.529412
is_fraud                       1.000000
Name: 139, dtype: float64
trustLevel                    1.000000
totalScanTimeInSeconds       77.000000
grandTotal                   68.760000
lineItemVoids                11

Name: 1263, dtype: float64
trustLevel                     1.000000
totalScanTimeInSeconds       177.000000
grandTotal                    86.100000
lineItemVoids                  3.000000
scansWithoutRegistration      10.000000
quantityModifications          0.000000
scannedLineItemsPerSecond      0.101695
valuePerSecond                 0.486441
lineItemVoidsPerPosition       0.166667
is_fraud                       1.000000
Name: 1330, dtype: float64
trustLevel                      1.000000
totalScanTimeInSeconds       1279.000000
grandTotal                     36.710000
lineItemVoids                   2.000000
scansWithoutRegistration        9.000000
quantityModifications           1.000000
scannedLineItemsPerSecond       0.014855
valuePerSecond                  0.028702
lineItemVoidsPerPosition        0.105263
is_fraud                        1.000000
Name: 1379, dtype: float64
trustLevel                      1.000000
totalScanTimeInSeconds       1802.000000
grandTotal                 

Name: 2380, dtype: float64
trustLevel                     1.000000
totalScanTimeInSeconds       530.000000
grandTotal                    75.970000
lineItemVoids                 11.000000
scansWithoutRegistration       9.000000
quantityModifications          3.000000
scannedLineItemsPerSecond      0.032075
valuePerSecond                 0.143340
lineItemVoidsPerPosition       0.647059
is_fraud                       1.000000
Name: 2404, dtype: float64
trustLevel                      1.000000
totalScanTimeInSeconds       1266.000000
grandTotal                     72.180000
lineItemVoids                   8.000000
scansWithoutRegistration        8.000000
quantityModifications           5.000000
scannedLineItemsPerSecond       0.019747
valuePerSecond                  0.057014
lineItemVoidsPerPosition        0.320000
is_fraud                        1.000000
Name: 2411, dtype: float64
trustLevel                      1.000000
totalScanTimeInSeconds       1453.000000
grandTotal                 

Name: 3668, dtype: float64
trustLevel                     1.000000
totalScanTimeInSeconds       571.000000
grandTotal                    21.840000
lineItemVoids                  8.000000
scansWithoutRegistration       9.000000
quantityModifications          4.000000
scannedLineItemsPerSecond      0.015762
valuePerSecond                 0.038249
lineItemVoidsPerPosition       0.888889
is_fraud                       1.000000
Name: 3716, dtype: float64
trustLevel                      1.000000
totalScanTimeInSeconds       1025.000000
grandTotal                      3.870000
lineItemVoids                  11.000000
scansWithoutRegistration        8.000000
quantityModifications           3.000000
scannedLineItemsPerSecond       0.000976
valuePerSecond                  0.003776
lineItemVoidsPerPosition       11.000000
is_fraud                        1.000000
Name: 3723, dtype: float64
trustLevel                     1.000000
totalScanTimeInSeconds       371.000000
grandTotal                   

trustLevel                      1.000000
totalScanTimeInSeconds       1768.000000
grandTotal                     39.810000
lineItemVoids                   4.000000
scansWithoutRegistration       10.000000
quantityModifications           4.000000
scannedLineItemsPerSecond       0.005656
valuePerSecond                  0.022517
lineItemVoidsPerPosition        0.400000
is_fraud                        1.000000
Name: 4902, dtype: float64
trustLevel                     1.000000
totalScanTimeInSeconds       533.000000
grandTotal                    75.910000
lineItemVoids                 10.000000
scansWithoutRegistration      10.000000
quantityModifications          0.000000
scannedLineItemsPerSecond      0.043152
valuePerSecond                 0.142420
lineItemVoidsPerPosition       0.434783
is_fraud                       1.000000
Name: 4906, dtype: float64
trustLevel                     1.000000
totalScanTimeInSeconds       872.000000
grandTotal                    58.170000
lineItemVoids   

Name: 6300, dtype: float64
trustLevel                     1.000000
totalScanTimeInSeconds       448.000000
grandTotal                    14.480000
lineItemVoids                  0.000000
scansWithoutRegistration       9.000000
quantityModifications          2.000000
scannedLineItemsPerSecond      0.040179
valuePerSecond                 0.032321
lineItemVoidsPerPosition       0.000000
is_fraud                       1.000000
Name: 6341, dtype: float64
trustLevel                     1.000000
totalScanTimeInSeconds       254.000000
grandTotal                    23.480000
lineItemVoids                  7.000000
scansWithoutRegistration      10.000000
quantityModifications          5.000000
scannedLineItemsPerSecond      0.003937
valuePerSecond                 0.092441
lineItemVoidsPerPosition       7.000000
is_fraud                       1.000000
Name: 6439, dtype: float64
trustLevel                     1.000000
totalScanTimeInSeconds       339.000000
grandTotal                    65.820000

trustLevel                      1.000000
totalScanTimeInSeconds       1758.000000
grandTotal                     33.380000
lineItemVoids                  11.000000
scansWithoutRegistration       10.000000
quantityModifications           5.000000
scannedLineItemsPerSecond       0.013083
valuePerSecond                  0.018987
lineItemVoidsPerPosition        0.478261
is_fraud                        1.000000
Name: 7407, dtype: float64
trustLevel                      1.000000
totalScanTimeInSeconds       1015.000000
grandTotal                     78.250000
lineItemVoids                   8.000000
scansWithoutRegistration       10.000000
quantityModifications           3.000000
scannedLineItemsPerSecond       0.026601
valuePerSecond                  0.077094
lineItemVoidsPerPosition        0.296296
is_fraud                        1.000000
Name: 7417, dtype: float64
trustLevel                     1.000000
totalScanTimeInSeconds       592.000000
grandTotal                    32.080000
lineIt

Name: 8704, dtype: float64
trustLevel                      1.000000
totalScanTimeInSeconds       1244.000000
grandTotal                     93.960000
lineItemVoids                   8.000000
scansWithoutRegistration       10.000000
quantityModifications           5.000000
scannedLineItemsPerSecond       0.008039
valuePerSecond                  0.075531
lineItemVoidsPerPosition        0.800000
is_fraud                        1.000000
Name: 8713, dtype: float64
trustLevel                     1.000000
totalScanTimeInSeconds       698.000000
grandTotal                    29.730000
lineItemVoids                  7.000000
scansWithoutRegistration       9.000000
quantityModifications          5.000000
scannedLineItemsPerSecond      0.012894
valuePerSecond                 0.042593
lineItemVoidsPerPosition       0.777778
is_fraud                       1.000000
Name: 8760, dtype: float64
trustLevel                     1.000000
totalScanTimeInSeconds       831.000000
grandTotal                   

Name: 9917, dtype: float64
trustLevel                     1.000000
totalScanTimeInSeconds       573.000000
grandTotal                    17.790000
lineItemVoids                  8.000000
scansWithoutRegistration      10.000000
quantityModifications          3.000000
scannedLineItemsPerSecond      0.015707
valuePerSecond                 0.031047
lineItemVoidsPerPosition       0.888889
is_fraud                       1.000000
Name: 9926, dtype: float64
trustLevel                      1.000000
totalScanTimeInSeconds       1650.000000
grandTotal                      0.760000
lineItemVoids                   6.000000
scansWithoutRegistration       10.000000
quantityModifications           5.000000
scannedLineItemsPerSecond       0.002424
valuePerSecond                  0.000461
lineItemVoidsPerPosition        1.500000
is_fraud                        1.000000
Name: 9928, dtype: float64
trustLevel                     1.000000
totalScanTimeInSeconds       826.000000
grandTotal                   

trustLevel                      1.000000
totalScanTimeInSeconds       1354.000000
grandTotal                     30.440000
lineItemVoids                   8.000000
scansWithoutRegistration        9.000000
quantityModifications           5.000000
scannedLineItemsPerSecond       0.022157
valuePerSecond                  0.022482
lineItemVoidsPerPosition        0.266667
is_fraud                        1.000000
Name: 10925, dtype: float64
trustLevel                     1.000000
totalScanTimeInSeconds       367.000000
grandTotal                    73.630000
lineItemVoids                  0.000000
scansWithoutRegistration      10.000000
quantityModifications          2.000000
scannedLineItemsPerSecond      0.008174
valuePerSecond                 0.200627
lineItemVoidsPerPosition       0.000000
is_fraud                       1.000000
Name: 10926, dtype: float64
trustLevel                     1.000000
totalScanTimeInSeconds       858.000000
grandTotal                    60.560000
lineItemVoids 

trustLevel                      1.000000
totalScanTimeInSeconds       1158.000000
grandTotal                     88.220000
lineItemVoids                   7.000000
scansWithoutRegistration       10.000000
quantityModifications           5.000000
scannedLineItemsPerSecond       0.000864
valuePerSecond                  0.076183
lineItemVoidsPerPosition        7.000000
is_fraud                        1.000000
Name: 12088, dtype: float64
trustLevel                      1.000000
totalScanTimeInSeconds       1781.000000
grandTotal                     45.960000
lineItemVoids                   2.000000
scansWithoutRegistration       10.000000
quantityModifications           2.000000
scannedLineItemsPerSecond       0.000561
valuePerSecond                  0.025806
lineItemVoidsPerPosition        2.000000
is_fraud                        1.000000
Name: 12098, dtype: float64
trustLevel                     1.000000
totalScanTimeInSeconds       877.000000
grandTotal                    41.280000
line

Name: 13559, dtype: float64
trustLevel                      1.000000
totalScanTimeInSeconds       1390.000000
grandTotal                     99.050000
lineItemVoids                   6.000000
scansWithoutRegistration        9.000000
quantityModifications           4.000000
scannedLineItemsPerSecond       0.011511
valuePerSecond                  0.071259
lineItemVoidsPerPosition        0.375000
is_fraud                        1.000000
Name: 13565, dtype: float64
trustLevel                      1.000000
totalScanTimeInSeconds       1718.000000
grandTotal                     16.760000
lineItemVoids                  10.000000
scansWithoutRegistration       10.000000
quantityModifications           4.000000
scannedLineItemsPerSecond       0.015716
valuePerSecond                  0.009756
lineItemVoidsPerPosition        0.370370
is_fraud                        1.000000
Name: 13627, dtype: float64
trustLevel                     1.000000
totalScanTimeInSeconds       481.000000
grandTotal      

Name: 14566, dtype: float64
trustLevel                     1.000000
totalScanTimeInSeconds       808.000000
grandTotal                    23.940000
lineItemVoids                 11.000000
scansWithoutRegistration       8.000000
quantityModifications          5.000000
scannedLineItemsPerSecond      0.014851
valuePerSecond                 0.029629
lineItemVoidsPerPosition       0.916667
is_fraud                       1.000000
Name: 14589, dtype: float64
trustLevel                      1.000000
totalScanTimeInSeconds       1825.000000
grandTotal                     69.250000
lineItemVoids                  11.000000
scansWithoutRegistration       10.000000
quantityModifications           5.000000
scannedLineItemsPerSecond       0.000548
valuePerSecond                  0.037945
lineItemVoidsPerPosition       11.000000
is_fraud                        1.000000
Name: 14595, dtype: float64
trustLevel                    1.000000
totalScanTimeInSeconds       92.000000
grandTotal                  

KeyboardInterrupt: 

# Exporting the New test.csv File as test_with_frauds.csv

In [112]:
df_1.to_csv('test_with_frauds.csv')