In [1]:
import numpy as np
import pandas as pd
import csv
import sys
import os

import matplotlib.pyplot as plt    
    
from sklearn import preprocessing
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.ensemble import BaggingClassifier
import warnings
import pickle

max_iters = 501


def svm(x,y,filename):

   # Model output file name
   file = (os.path.splitext(filename))[0]
   fname = './models/svm_' + file +'/'

   # File for writing precision,recall, f-measure scores for fraud transactions
   f = open('./prf/svm_'+ file + '_prf' +'.txt' ,'w')
   f.write('precision,recall,f-score \n')

   # Stratified sampling based on Y
   X_train, X_test, y_train, y_test = train_test_split(x, y,stratify=y , test_size=0.30, random_state=42)

   # Create 15% validation set and 15% test set split
   X_val, X_test, y_val, y_test = train_test_split(X_test, y_test,stratify=y_test , test_size=0.50, random_state=42)

   #Iterations
   it = 1
   
   # Run training algorithm for multiple class weights
   while it < max_iters:
       cw = {}
       cw[0] = 1
       cw[1] = it
       # Train
       print('**************************************')
       print("Iteration number  " , it)
       svm = LinearSVC(class_weight = cw, dual = False ,tol=1e-05,max_iter = 1000)
       print('Class weights ', cw)
       svm.fit(X_train,y_train)

       # Save trained model to disk
       name = fname + str(cw[1]) + '.sav'
       pickle.dump(svm, open(name, 'wb'))

       #Predict on validation data
       y_val_pred = svm.predict(X_val)
       print('Performance on validation data - Confusion matrix')
       print(confusion_matrix(y_val,y_val_pred))
   
       precision,recall,fscore,support=score(y_val,y_val_pred,average=None)
       print('Precision, Recall, F-score, Support on validation data' )
       print("Precision" , precision)
       print("Recall" , recall)
       print("F-score" , fscore)
       print("Support" , support)

       p1 = precision[1]
       r1 = recall[1]
       f1 = fscore[1]

       f.write(str(p1) +','+ str(r1) + ',' + str(f1) + '\n')    
       it += 1

   f.close()

def run():
   filename = 'm3.csv'
   df = pd.read_csv(filename, usecols = [9,10,11,12,13,14] , header = 0)
   
   results = list(map(int, df['Anomaly'])) 
   print('Number of fraudulent transactions ' , sum(results))

   features = ['Airfare', 'Lodging', 'Meals', 'Other_Transportation', 'Other_Expenses']
   targets = ['Anomaly']

   # Separating out the features and target variables
   x = df.loc[:, features].values
   y = df.loc[:, targets].values
    
   scaler = preprocessing.StandardScaler()
   x = scaler.fit_transform(x) 

   y  = [i for j in y for i in j]
   
   #Ignore warnings
   warnings.filterwarnings("ignore", category=FutureWarning)

   print("**************** SVM *******************")
   svm(x,y,filename)
  
run()

Number of fraudulent transactions  4097
**************** SVM *******************
**************************************
Iteration number   1
Class weights  {0: 1, 1: 1}
Performance on validation data - Confusion matrix
[[79310    12]
 [  207   407]]
Precision, Recall, F-score, Support on validation data
Precision [0.99739678 0.97136038]
Recall [0.99984872 0.66286645]
F-score [0.99862125 0.78799613]
Support [79322   614]
**************************************
Iteration number   2
Class weights  {0: 1, 1: 2}
Performance on validation data - Confusion matrix
[[79302    20]
 [  185   429]]
Precision, Recall, F-score, Support on validation data
Precision [0.99767258 0.95545657]
Recall [0.99974786 0.69869707]
F-score [0.99870914 0.80714958]
Support [79322   614]
**************************************
Iteration number   3
Class weights  {0: 1, 1: 3}
Performance on validation data - Confusion matrix
[[79293    29]
 [  162   452]]
Precision, Recall, F-score, Support on validation data
Precision

Performance on validation data - Confusion matrix
[[79061   261]
 [    7   607]]
Precision, Recall, F-score, Support on validation data
Precision [0.99991147 0.69930876]
Recall [0.99670961 0.98859935]
F-score [0.99830797 0.81916329]
Support [79322   614]
**************************************
Iteration number   26
Class weights  {0: 1, 1: 26}
Performance on validation data - Confusion matrix
[[79051   271]
 [    3   611]]
Precision, Recall, F-score, Support on validation data
Precision [0.99996205 0.69274376]
Recall [0.99658355 0.99511401]
F-score [0.99826994 0.81684492]
Support [79322   614]
**************************************
Iteration number   27
Class weights  {0: 1, 1: 27}
Performance on validation data - Confusion matrix
[[78884   438]
 [    1   613]]
Precision, Recall, F-score, Support on validation data
Precision [0.99998732 0.58325404]
Recall [0.9944782  0.99837134]
F-score [0.99722515 0.73633634]
Support [79322   614]
**************************************
Iteration number

Performance on validation data - Confusion matrix
[[77542  1780]
 [    1   613]]
Precision, Recall, F-score, Support on validation data
Precision [0.9999871  0.25616381]
Recall [0.97755982 0.99837134]
F-score [0.98864629 0.40771533]
Support [79322   614]
**************************************
Iteration number   50
Class weights  {0: 1, 1: 50}
Performance on validation data - Confusion matrix
[[77505  1817]
 [    1   613]]
Precision, Recall, F-score, Support on validation data
Precision [0.9999871  0.25226337]
Recall [0.97709337 0.99837134]
F-score [0.98840768 0.40275953]
Support [79322   614]
**************************************
Iteration number   51
Class weights  {0: 1, 1: 51}
Performance on validation data - Confusion matrix
[[77467  1855]
 [    1   613]]
Precision, Recall, F-score, Support on validation data
Precision [0.99998709 0.24837925]
Recall [0.97661431 0.99837134]
F-score [0.98816251 0.39779364]
Support [79322   614]
**************************************
Iteration number

Performance on validation data - Confusion matrix
[[76732  2590]
 [    1   613]]
Precision, Recall, F-score, Support on validation data
Precision [0.99998697 0.19138308]
Recall [0.96734828 0.99837134]
F-score [0.98339688 0.32119466]
Support [79322   614]
**************************************
Iteration number   74
Class weights  {0: 1, 1: 74}
Performance on validation data - Confusion matrix
[[76700  2622]
 [    1   613]]
Precision, Recall, F-score, Support on validation data
Precision [0.99998696 0.18948995]
Recall [0.96694486 0.99837134]
F-score [0.98318838 0.31852429]
Support [79322   614]
**************************************
Iteration number   75
Class weights  {0: 1, 1: 75}
Performance on validation data - Confusion matrix
[[76666  2656]
 [    1   613]]
Precision, Recall, F-score, Support on validation data
Precision [0.99998696 0.18751912]
Recall [0.96651623 0.99837134]
F-score [0.98296675 0.31573526]
Support [79322   614]
**************************************
Iteration number

Performance on validation data - Confusion matrix
[[76059  3263]
 [    1   613]]
Precision, Recall, F-score, Support on validation data
Precision [0.99998685 0.15815273]
Recall [0.95886387 0.99837134]
F-score [0.97899371 0.27305122]
Support [79322   614]
**************************************
Iteration number   98
Class weights  {0: 1, 1: 98}
Performance on validation data - Confusion matrix
[[76039  3283]
 [    1   613]]
Precision, Recall, F-score, Support on validation data
Precision [0.99998685 0.15734086]
Recall [0.95861173 0.99837134]
F-score [0.97886227 0.27184035]
Support [79322   614]
**************************************
Iteration number   99
Class weights  {0: 1, 1: 99}
Performance on validation data - Confusion matrix
[[76012  3310]
 [    1   613]]
Precision, Recall, F-score, Support on validation data
Precision [0.99998684 0.15625797]
Recall [0.95827135 0.99837134]
F-score [0.97868478 0.27022261]
Support [79322   614]
**************************************
Iteration number

Performance on validation data - Confusion matrix
[[75488  3834]
 [    1   613]]
Precision, Recall, F-score, Support on validation data
Precision [0.99998675 0.13784574]
Recall [0.95166536 0.99837134]
F-score [0.97522786 0.24224462]
Support [79322   614]
**************************************
Iteration number   122
Class weights  {0: 1, 1: 122}
Performance on validation data - Confusion matrix
[[75460  3862]
 [    1   613]]
Precision, Recall, F-score, Support on validation data
Precision [0.99998675 0.13698324]
Recall [0.95131237 0.99837134]
F-score [0.97504248 0.24091177]
Support [79322   614]
**************************************
Iteration number   123
Class weights  {0: 1, 1: 123}
Performance on validation data - Confusion matrix
[[75434  3888]
 [    1   613]]
Precision, Recall, F-score, Support on validation data
Precision [0.99998674 0.13619196]
Recall [0.95098459 0.99837134]
F-score [0.97487028 0.23968719]
Support [79322   614]
**************************************
Iteration nu

Performance on validation data - Confusion matrix
[[74960  4362]
 [    1   613]]
Precision, Recall, F-score, Support on validation data
Precision [0.99998666 0.12321608]
Recall [0.94500895 0.99837134]
F-score [0.9717208  0.21935946]
Support [79322   614]
**************************************
Iteration number   146
Class weights  {0: 1, 1: 146}
Performance on validation data - Confusion matrix
[[74946  4376]
 [    1   613]]
Precision, Recall, F-score, Support on validation data
Precision [0.99998666 0.12287031]
Recall [0.94483246 0.99837134]
F-score [0.97162748 0.21881135]
Support [79322   614]
**************************************
Iteration number   147
Class weights  {0: 1, 1: 147}
Performance on validation data - Confusion matrix
[[74926  4396]
 [    1   613]]
Precision, Recall, F-score, Support on validation data
Precision [0.99998665 0.12237972]
Recall [0.94458032 0.99837134]
F-score [0.97149414 0.21803308]
Support [79322   614]
**************************************
Iteration nu

Performance on validation data - Confusion matrix
[[74516  4806]
 [    1   613]]
Precision, Recall, F-score, Support on validation data
Precision [0.99998658 0.1131205 ]
Recall [0.93941151 0.99837134]
F-score [0.96875305 0.20321565]
Support [79322   614]
**************************************
Iteration number   170
Class weights  {0: 1, 1: 170}
Performance on validation data - Confusion matrix
[[74496  4826]
 [    1   613]]
Precision, Recall, F-score, Support on validation data
Precision [0.99998658 0.11270454]
Recall [0.93915938 0.99837134]
F-score [0.96861896 0.20254419]
Support [79322   614]
**************************************
Iteration number   171
Class weights  {0: 1, 1: 171}
Performance on validation data - Confusion matrix
[[74488  4834]
 [    1   613]]
Precision, Recall, F-score, Support on validation data
Precision [0.99998658 0.11253901]
Recall [0.93905852 0.99837134]
F-score [0.96856532 0.20227685]
Support [79322   614]
**************************************
Iteration nu

Performance on validation data - Confusion matrix
[[74083  5239]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.10490347]
Recall [0.93395275 1.        ]
F-score [0.96584857 0.18988712]
Support [79322   614]
**************************************
Iteration number   194
Class weights  {0: 1, 1: 194}
Performance on validation data - Confusion matrix
[[74061  5261]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.10451064]
Recall [0.9336754 1.       ]
F-score [0.96570024 0.18924333]
Support [79322   614]
**************************************
Iteration number   195
Class weights  {0: 1, 1: 195}
Performance on validation data - Confusion matrix
[[74043  5279]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.10419141]
Recall [0.93344848 1.        ]
F-score [0.96557885 0.18871984]
Support [79322   614]
**************************************
Iteration numb

Performance on validation data - Confusion matrix
[[73679  5643]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.09813009]
Recall [0.92885958 1.        ]
F-score [0.96311789 0.17872217]
Support [79322   614]
**************************************
Iteration number   218
Class weights  {0: 1, 1: 218}
Performance on validation data - Confusion matrix
[[73664  5658]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.09789541]
Recall [0.92867048 1.        ]
F-score [0.96301622 0.17833285]
Support [79322   614]
**************************************
Iteration number   219
Class weights  {0: 1, 1: 219}
Performance on validation data - Confusion matrix
[[73643  5679]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.09756873]
Recall [0.92840574 1.        ]
F-score [0.96287386 0.17779065]
Support [79322   614]
**************************************
Iteration nu

Performance on validation data - Confusion matrix
[[73310  6012]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.09266526]
Recall [0.92420766 1.        ]
F-score [0.96061114 0.16961326]
Support [79322   614]
**************************************
Iteration number   242
Class weights  {0: 1, 1: 242}
Performance on validation data - Confusion matrix
[[73297  6025]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.09248381]
Recall [0.92404377 1.        ]
F-score [0.96052261 0.16930925]
Support [79322   614]
**************************************
Iteration number   243
Class weights  {0: 1, 1: 243}
Performance on validation data - Confusion matrix
[[73278  6044]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.09221989]
Recall [0.92380424 1.        ]
F-score [0.96039318 0.16886689]
Support [79322   614]
**************************************
Iteration nu

Performance on validation data - Confusion matrix
[[72940  6382]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.08776444]
Recall [0.91954313 1.        ]
F-score [0.95808541 0.16136662]
Support [79322   614]
**************************************
Iteration number   266
Class weights  {0: 1, 1: 266}
Performance on validation data - Confusion matrix
[[72924  6398]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.08756418]
Recall [0.91934142 1.        ]
F-score [0.95797591 0.16102806]
Support [79322   614]
**************************************
Iteration number   267
Class weights  {0: 1, 1: 267}
Performance on validation data - Confusion matrix
[[72917  6405]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.08747685]
Recall [0.91925317 1.        ]
F-score [0.95792799 0.16088039]
Support [79322   614]
**************************************
Iteration nu

Performance on validation data - Confusion matrix
[[72651  6671]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.08428277]
Recall [0.91589975 1.        ]
F-score [0.95610404 0.15546272]
Support [79322   614]
**************************************
Iteration number   290
Class weights  {0: 1, 1: 290}
Performance on validation data - Confusion matrix
[[72641  6681]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.08416724]
Recall [0.91577368 1.        ]
F-score [0.95603535 0.15526615]
Support [79322   614]
**************************************
Iteration number   291
Class weights  {0: 1, 1: 291}
Performance on validation data - Confusion matrix
[[72627  6695]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.08400602]
Recall [0.91559719 1.        ]
F-score [0.95593916 0.1549918 ]
Support [79322   614]
**************************************
Iteration nu

Performance on validation data - Confusion matrix
[[72374  6948]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.08119545]
Recall [0.91240765 1.        ]
F-score [0.95419787 0.15019569]
Support [79322   614]
**************************************
Iteration number   314
Class weights  {0: 1, 1: 314}
Performance on validation data - Confusion matrix
[[72367  6955]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.08112036]
Recall [0.91231941 1.        ]
F-score [0.95414961 0.15006721]
Support [79322   614]
**************************************
Iteration number   315
Class weights  {0: 1, 1: 315}
Performance on validation data - Confusion matrix
[[72352  6970]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.08095992]
Recall [0.9121303 1.       ]
F-score [0.95404618 0.14979263]
Support [79322   614]
**************************************
Iteration numb

Performance on validation data - Confusion matrix
[[72113  7209]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.07848651]
Recall [0.90911727 1.        ]
F-score [0.95239542 0.14554937]
Support [79322   614]
**************************************
Iteration number   338
Class weights  {0: 1, 1: 338}
Performance on validation data - Confusion matrix
[[72101  7221]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.07836631]
Recall [0.90896599 1.        ]
F-score [0.9523124  0.14534264]
Support [79322   614]
**************************************
Iteration number   339
Class weights  {0: 1, 1: 339}
Performance on validation data - Confusion matrix
[[72091  7231]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.07826641]
Recall [0.90883992 1.        ]
F-score [0.9522432  0.14517082]
Support [79322   614]
**************************************
Iteration nu

Performance on validation data - Confusion matrix
[[71840  7482]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.07583992]
Recall [0.9056756 1.       ]
F-score [0.95050343 0.14098737]
Support [79322   614]
**************************************
Iteration number   362
Class weights  {0: 1, 1: 362}
Performance on validation data - Confusion matrix
[[71829  7493]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.07573702]
Recall [0.90553693 1.        ]
F-score [0.95042706 0.14080954]
Support [79322   614]
**************************************
Iteration number   363
Class weights  {0: 1, 1: 363}
Performance on validation data - Confusion matrix
[[71816  7506]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.07561576]
Recall [0.90537304 1.        ]
F-score [0.95033678 0.14059995]
Support [79322   614]
**************************************
Iteration numb

Performance on validation data - Confusion matrix
[[71593  7729]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.07359463]
Recall [0.90256171 1.        ]
F-score [0.94878574 0.13709948]
Support [79322   614]
**************************************
Iteration number   386
Class weights  {0: 1, 1: 386}
Performance on validation data - Confusion matrix
[[71585  7737]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.07352413]
Recall [0.90246086 1.        ]
F-score [0.94873001 0.13697713]
Support [79322   614]
**************************************
Iteration number   387
Class weights  {0: 1, 1: 387}
Performance on validation data - Confusion matrix
[[71580  7742]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.07348013]
Recall [0.90239782 1.        ]
F-score [0.94869518 0.13690078]
Support [79322   614]
**************************************
Iteration nu

Performance on validation data - Confusion matrix
[[71351  7971]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.07152009]
Recall [0.89951085 1.        ]
F-score [0.94709736 0.13349277]
Support [79322   614]
**************************************
Iteration number   410
Class weights  {0: 1, 1: 410}
Performance on validation data - Confusion matrix
[[71342  7980]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.07144519]
Recall [0.89939739 1.        ]
F-score [0.94703446 0.13336229]
Support [79322   614]
**************************************
Iteration number   411
Class weights  {0: 1, 1: 411}
Performance on validation data - Confusion matrix
[[71331  7991]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.07135386]
Recall [0.89925872 1.        ]
F-score [0.94695758 0.13320317]
Support [79322   614]
**************************************
Iteration nu

Performance on validation data - Confusion matrix
[[71130  8192]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.06972519]
Recall [0.89672474 1.        ]
F-score [0.94555074 0.13036093]
Support [79322   614]
**************************************
Iteration number   434
Class weights  {0: 1, 1: 434}
Performance on validation data - Confusion matrix
[[71119  8203]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.        0.0696382]
Recall [0.89658607 1.        ]
F-score [0.94547364 0.13020889]
Support [79322   614]
**************************************
Iteration number   435
Class weights  {0: 1, 1: 435}
Performance on validation data - Confusion matrix
[[71111  8211]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.06957507]
Recall [0.89648521 1.        ]
F-score [0.94541756 0.13009853]
Support [79322   614]
**************************************
Iteration numb

Performance on validation data - Confusion matrix
[[70897  8425]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.06792787]
Recall [0.89378735 1.        ]
F-score [0.94391522 0.12721434]
Support [79322   614]
**************************************
Iteration number   458
Class weights  {0: 1, 1: 458}
Performance on validation data - Confusion matrix
[[70887  8435]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.        0.0678528]
Recall [0.89366128 1.        ]
F-score [0.94384491 0.12708269]
Support [79322   614]
**************************************
Iteration number   459
Class weights  {0: 1, 1: 459}
Performance on validation data - Confusion matrix
[[70878  8444]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.06778538]
Recall [0.89354782 1.        ]
F-score [0.94378162 0.12696443]
Support [79322   614]
**************************************
Iteration numb

Performance on validation data - Confusion matrix
[[70684  8638]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.06636403]
Recall [0.89110209 1.        ]
F-score [0.94241564 0.12446787]
Support [79322   614]
**************************************
Iteration number   482
Class weights  {0: 1, 1: 482}
Performance on validation data - Confusion matrix
[[70669  8653]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.06625661]
Recall [0.89091299 1.        ]
F-score [0.94230987 0.12427892]
Support [79322   614]
**************************************
Iteration number   483
Class weights  {0: 1, 1: 483}
Performance on validation data - Confusion matrix
[[70663  8659]
 [    0   614]]
Precision, Recall, F-score, Support on validation data
Precision [1.         0.06621374]
Recall [0.89083735 1.        ]
F-score [0.94226756 0.1242035 ]
Support [79322   614]
**************************************
Iteration nu