<b>1) Import all libs</b>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE

<b>2) Import data and select training / test samples</b>

In [None]:
df = pd.read_csv('../input/creditcard.csv')

In [None]:
unbalance = df['Class'].sum() / df['Class'].count()

In [None]:
X = df.columns[1:29]
Y = 'Class'

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(df[X], df[Y], test_size=0.2, random_state=0)

<b>3.1) Classify using Random Forest with sample_weight</b>

In [None]:
weight = np.array([1/unbalance if i == 1 else 1 for i in Y_train])

In [None]:
rf = RandomForestClassifier(random_state=0)
rf.fit(X_train,Y_train, sample_weight=weight)

In [None]:
Y_predict=rf.predict(X_test)

In [None]:
confusion_matrix(Y_test,Y_predict)

In [None]:
print(classification_report(Y_test,Y_predict))

In [None]:
FP, TP, thresholds = roc_curve(Y_test,Y_predict)
roc_auc = auc(FP, TP)
print (roc_auc)

In [None]:
plt.plot(FP, TP, label='AUC = %0.2f'% roc_auc)
plt.title('ROC for Random Forest Classifier')
plt.plot([0,1],[0,1],'--')
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')

<b>3.2) Classify using Random Forest with oversampling (SMOTE)</b>

In [None]:
oversampler = SMOTE(random_state=0)
X_train_os,Y_train_os=oversampler.fit_sample(X_train,Y_train)

In [None]:
rf_os = RandomForestClassifier(random_state=0)
rf_os.fit(X_train_os,Y_train_os)

In [None]:
Y_predict_os=rf_os.predict(X_test)

In [None]:
confusion_matrix(Y_test,Y_predict_os)

In [None]:
print(classification_report(Y_test,Y_predict_os))

In [None]:
FP, TP, thresholds = roc_curve(Y_test,Y_predict_os)
roc_auc = auc(FP, TP)
print (roc_auc)

In [None]:
plt.plot(FP, TP, label='AUC = %0.2f'% roc_auc)
plt.title('ROC for Random Forest Classifier, with SMOTE')
plt.plot([0,1],[0,1],'--')
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')