In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import gridspec

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, confusion_matrix


In [None]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True, timeout_ms=120000)


In [None]:
%cd /content/gdrive/MyDrive/credit_fraud/
%ls

In [None]:
data = pd.read_csv('creditcard.csv')
data.head()

In [None]:
print(data.shape)
print(data.describe())

In [None]:
fraud = data[data['Class'] == 1]
valid = data[data['Class'] == 0]
outlier_fraction = len(fraud)/float(len(valid))
print(outlier_fraction)
print(f'Fraud Cases: {len(data[data['Class'] == 1])}')
print(f'Valid Transactions: {len(data[data['Class'] == 0])}')

In [None]:
print('Fraudulent transaction details')
fraud.Amount.describe()

In [None]:
print('Vali transaction details')
valid.Amount.describe()

In [None]:
correlation = data.corr()
fig = plt.figure(figsize = (12, 9))
sns.heatmap(correlation, vmax = .8, square = True)
plt.show()

In [None]:
X = data.drop(['Class'], axis = 1)
Y = data['Class']
print(X.shape)
print(Y.shape)


In [None]:
xData = X.values
yData = Y.values

In [None]:
x_train, x_test, y_train, y_test = train_test_split(
        xData, yData, test_size = 0.2, random_state = 42)
forest = RandomForestClassifier()
forest.fit(xTrain, yTrain)

In [None]:
y_pred = forest.predict(x_test)

In [None]:
n_outliers = len(fraud)
n_errors = (y_pred != y_test).sum()
print('Random Forest classifier model details.')

acc = accuracy_score(y_test, y_pred)
print(f'The accuracy is {acc}')
  
prec = precision_score(y_test, y_pred)
print(f'The precision is {prec}')
  
recall = recall_score(y_test, y_pred)
print(f'The recall is {recall}')
  
f1 = f1_score(y_test, y_pred)
print(f'The F1-Score is {f1}')
  
mcc = matthews_corrcoef(y_test, y_pred)
print(f'The Matthews correlation coefficient is{mcc}')

In [None]:
LABELS = ['Normal', 'Fraud']
conf_matrix = confusion_matrix(yTest, yPred)
plt.figure(figsize =(12, 12))
sns.heatmap(conf_matrix, xticklabels = LABELS, yticklabels = LABELS, annot = True, fmt ='d');
plt.title('Confusion matrix')
plt.ylabel('True class')
plt.xlabel('Predicted class')
plt.show()