# Credit Card Fraud Detection

In [31]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [32]:
credit_data = pd.read_csv(".\\input\\creditcard.csv")

## Data Exploration

In [33]:
credit_data.head(n=10)

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0
5,2.0,-0.425966,0.960523,1.141109,-0.168252,0.420987,-0.029728,0.476201,0.260314,-0.568671,...,-0.208254,-0.559825,-0.026398,-0.371427,-0.232794,0.105915,0.253844,0.08108,3.67,0
6,4.0,1.229658,0.141004,0.045371,1.202613,0.191881,0.272708,-0.005159,0.081213,0.46496,...,-0.167716,-0.27071,-0.154104,-0.780055,0.750137,-0.257237,0.034507,0.005168,4.99,0
7,7.0,-0.644269,1.417964,1.07438,-0.492199,0.948934,0.428118,1.120631,-3.807864,0.615375,...,1.943465,-1.015455,0.057504,-0.649709,-0.415267,-0.051634,-1.206921,-1.085339,40.8,0
8,7.0,-0.894286,0.286157,-0.113192,-0.271526,2.669599,3.721818,0.370145,0.851084,-0.392048,...,-0.073425,-0.268092,-0.204233,1.011592,0.373205,-0.384157,0.011747,0.142404,93.2,0
9,9.0,-0.338262,1.119593,1.044367,-0.222187,0.499361,-0.246761,0.651583,0.069539,-0.736727,...,-0.246914,-0.633753,-0.120794,-0.38505,-0.069733,0.094199,0.246219,0.083076,3.68,0


In [34]:
credit_data[["Class", "Time"]].groupby("Class").count()

Unnamed: 0_level_0,Time
Class,Unnamed: 1_level_1
0,284315
1,492


- unbalanced dataset, most transactions are legitimate

In [35]:
credit_data.shape

(284807, 31)

In [36]:
credit_data["Amount"].describe()

count    284807.000000
mean         88.349619
std         250.120109
min           0.000000
25%           5.600000
50%          22.000000
75%          77.165000
max       25691.160000
Name: Amount, dtype: float64

## Data Manipulation
### Standardize `Amount`

In [37]:
from sklearn import preprocessing
## https://www.askpython.com/python/examples/standardize-data-in-python
credit_data["Amount"] = preprocessing.scale(credit_data["Amount"])
credit_data.drop("Time", axis=1, inplace=True)
credit_data.head(10)

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,0.090794,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,0.244964,0
1,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,-0.166974,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,-0.342475,0
2,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,0.207643,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,1.160686,0
3,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,-0.054952,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,0.140534,0
4,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,0.753074,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,-0.073403,0
5,-0.425966,0.960523,1.141109,-0.168252,0.420987,-0.029728,0.476201,0.260314,-0.568671,-0.371407,...,-0.208254,-0.559825,-0.026398,-0.371427,-0.232794,0.105915,0.253844,0.08108,-0.338556,0
6,1.229658,0.141004,0.045371,1.202613,0.191881,0.272708,-0.005159,0.081213,0.46496,-0.099254,...,-0.167716,-0.27071,-0.154104,-0.780055,0.750137,-0.257237,0.034507,0.005168,-0.333279,0
7,-0.644269,1.417964,1.07438,-0.492199,0.948934,0.428118,1.120631,-3.807864,0.615375,1.249376,...,1.943465,-1.015455,0.057504,-0.649709,-0.415267,-0.051634,-1.206921,-1.085339,-0.190107,0
8,-0.894286,0.286157,-0.113192,-0.271526,2.669599,3.721818,0.370145,0.851084,-0.392048,-0.41043,...,-0.073425,-0.268092,-0.204233,1.011592,0.373205,-0.384157,0.011747,0.142404,0.019392,0
9,-0.338262,1.119593,1.044367,-0.222187,0.499361,-0.246761,0.651583,0.069539,-0.736727,-0.366846,...,-0.246914,-0.633753,-0.120794,-0.38505,-0.069733,0.094199,0.246219,0.083076,-0.338516,0


## Data Modeling
### Split the dataset into training and testing sets

In [38]:
from sklearn.model_selection import train_test_split
x = credit_data.drop("Class", axis=1)
y = credit_data["Class"]
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size=0.2)
X_train

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
5780,-1.481485,1.648191,0.362125,0.073766,-0.343862,-0.138544,-0.445086,1.092799,0.880826,-0.726386,...,-0.170956,-0.307634,-0.927267,0.100968,-0.479200,-0.227998,0.067975,-0.059619,-0.000335,-0.317287
85541,1.171247,0.213456,0.245969,1.153716,-0.347627,-0.965100,0.238373,-0.218010,0.045159,0.010477,...,-0.095702,0.045252,0.070722,-0.126744,0.387770,0.658744,-0.330972,0.003737,0.025974,-0.185310
50513,-0.770995,0.266648,2.468239,0.098532,0.088182,1.626714,-0.315100,0.759066,0.305221,-0.899141,...,-0.172012,0.148030,0.681261,0.061847,-0.602893,-0.644149,0.378655,0.179318,0.115925,-0.307251
247127,2.098061,-0.069564,-1.585582,0.182761,0.557697,-0.352135,0.222716,-0.256575,0.481362,0.019655,...,-0.094560,-0.283494,-0.661770,0.211979,0.122866,-0.056420,0.278911,-0.075603,-0.059218,-0.292259
185652,-0.206180,0.575146,-2.495452,-1.133237,2.509649,3.312664,0.438192,1.184692,-0.446644,-1.272051,...,0.122842,0.417801,0.861464,0.206757,0.618609,-0.254105,-0.136235,-0.028312,0.041369,0.189711
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182597,1.898130,-0.291303,-1.440007,0.238590,0.098193,-0.787103,0.235996,-0.270986,0.312201,0.129375,...,-0.030066,0.343698,0.973281,-0.127500,-0.279778,0.254657,-0.111600,-0.031962,-0.056122,-0.032503
112043,-0.736087,0.451076,2.730187,1.056095,-0.663598,0.737382,-0.458285,0.463286,-0.055005,-0.067634,...,0.084732,0.063486,0.312456,-0.178210,0.267861,-0.221520,0.592321,-0.010243,0.076750,-0.307251
138070,-1.840387,-0.127289,3.893024,4.865979,-0.086045,1.681919,0.180612,-0.552422,0.156433,2.699224,...,-0.475641,-0.349603,0.747680,0.331091,0.570345,-0.021174,0.333446,-0.979979,-0.823991,-0.208379
39974,1.132404,0.004940,0.386392,1.296521,-0.299556,0.078993,-0.204066,0.267471,0.393401,0.117323,...,-0.345485,-0.081418,-0.135131,-0.041027,-0.039648,0.554659,-0.320852,0.021768,-0.000521,-0.345233
