Three different re-sampling methods were used to produce balanced data and used Random Forest to compare predicted results.

In [None]:
import numpy as np
import pandas as pd
import sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt

In [None]:
from imblearn.under_sampling import ClusterCentroids #undersampling
from imblearn.over_sampling import SMOTE  #oversampling
from imblearn.combine import SMOTEENN

In [None]:
%matplotlib inline
df = pd.read_csv("../input/creditcard.csv")

In [None]:
df.describe()

In [None]:
print(df['Class'].value_counts())

In [None]:
from sklearn.cross_validation import train_test_split

In [None]:
train, test = train_test_split(df, train_size = 0.8)

In [None]:
train.head()

In [None]:
train_label = train["Class"]
train_label
train_feature = train.drop("Class", axis = 1)

In [None]:
train_feature.head()

**Under sampling method-ClusterCentroids**

In [None]:
CC = ClusterCentroids()
ccx, ccy = CC.fit_sample(train_feature, train_label)

In [None]:
unique, counts = np.unique(ccy, return_counts=True)
print (np.asarray((unique, counts)).T)

In [None]:
test_label = test["Class"]
test_label
test_feature = test.drop("Class", axis = 1)

In [None]:
rfc = RandomForestClassifier()
rfc.fit(ccx,ccy)
y_cc_pred = rfc.predict(test_feature) 

print(classification_report(y_cc_pred,test_label))

In [None]:
from sklearn.metrics import confusion_matrix
confusion_matrix(test_label, y_cc_pred)

**Over-sampling-SMOTE**

In [None]:
smote = SMOTE(ratio='auto', kind='regular')
smox, smoy = smote.fit_sample(train_feature, train_label)

In [None]:
unique_smote, counts_smote = np.unique(smoy, return_counts=True)
print (np.asarray((unique_smote, counts_smote)).T)

In [None]:
rfc.fit(smox,smoy)
y_smote_pred = rfc.predict(test_feature) 
print(classification_report(y_smote_pred,test_label))

In [None]:
confusion_matrix(test_label, y_smote_pred)

**Combine method-SMOTEENN**

In [None]:
SENN = SMOTEENN(ratio = 'auto')
ennx, enny = SENN.fit_sample(train_feature, train_label)
unique_enny, counts_enny = np.unique(enny, return_counts=True)
print (np.asarray((unique_enny, counts_enny)).T)

In [None]:
rfc.fit(ennx, enny)
y_senn_pred = rfc.predict(test_feature) 
print(classification_report(y_senn_pred,test_label))

In [None]:
confusion_matrix(test_label, y_senn_pred)