# Detecting Fraud Credit Card Transactions

In [None]:
# Imports
import math
import random
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

## Loading Data

In [None]:
# Reading credit card CSV file
cc_df = pd.read_csv('creditcard.csv')

In [None]:
# Displaying first 5 rows of data
cc_df.head()

In [None]:
# Displaying data info
cc_df.info()

In [None]:
# Describing data
cc_df.describe()

In [None]:
# Getting the number of occurrences for each class. Non fraud = 1, fraud = 0
non_fraud = len(cc_df[cc_df['Class'] == 0])
fraud = len(cc_df[cc_df['Class'] == 1])
fraud_percentage = (fraud/len(cc_df['Class'])) * 100

print(f"Number of genuine transactions: {non_fraud}")
print(f"Number of fraud transactions: {fraud}")
print(f"Percentage of fraud transactions: {fraud_percentage:.4f}")

## Visualizing Data

In [None]:
# Plotting heat map to see if any values are null
plt.figure(figsize=(14, 8))
sns.heatmap(cc_df.isnull(), yticklabels=False, cbar=False, cmap='viridis')

In [None]:
# Countplot of genuine vs fraud transactions
plt.title('Genuine vs Fraud Transactions')
sns.countplot(x='Class', data=cc_df)

In [None]:
# Histogram of 'amount' column
plt.figure(figsize=(10, 5))
sns.histplot(x='Amount', data=cc_df)

In [None]:
# KDE Histogram of 'time' column
sns.displot(x='Time', data=cc_df, kde=True)

In [None]:
# Correlation Matrix of data
corr = cc_df.corr()
plt.figure(figsize=(30, 40))
sns.heatmap(corr, annot=True, cmap='coolwarm')

## Training Data

In [None]:
# Scaling amount column data
scaler = StandardScaler()
cc_df['NormalizedAmount'] = scaler.fit_transform(cc_df['Amount'].values.reshape(-1, 1))

# Dropping unnecessary columns
cc_df.drop(['Amount', 'Time'], axis=1, inplace=True)

In [None]:
# Seperating data
X = cc_df.drop('Class', axis=1)
y = cc_df['Class']

# Splitting data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

In [None]:
# Random Forest Model
rfc = RandomForestClassifier(n_estimators=100, n_jobs=-1)
rfc.fit(X_train, y_train)

In [None]:
# Model predictions
predictions = rfc.predict(X_test)
# Accuracy score
rfc_score = rfc.score(X_test, y_test) * 100
rfc_score

In [None]:
# Method displays the performance metrics of the model 
def display_metrics(y_test, predictions):
    print("Accuracy: {:.5f}".format(accuracy_score(y_test, predictions)))
    print("Precision: {:.5f}".format(precision_score(y_test, predictions)))
    print("Recall: {:.5f}".format(recall_score(y_test, predictions)))
    print("F1-score: {:.5f}".format(f1_score(y_test, predictions)))

In [None]:
# Displaying confusion matrix
cf_matrix = confusion_matrix(y_test, predictions)
plt.title('Confusion Matrix - Random Forest')
sns.heatmap(cf_matrix, annot=True, cmap='Blues', fmt=".0f")

In [None]:
# Printing metrics
print('Random Forest Metrics')
display_metrics(y_test, predictions)

## Oversampling Data

In [None]:
# Oversampling data because it is highly imbalanced.
X_resampled, y_resampled = SMOTE().fit_resample(X, y)
# Splitting data
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.30, random_state=42)

In [None]:
# Countplot of genuine vs fraud transactions
plt.title('Genuine vs Fraud Transactions')
sns.countplot(x=y_resampled)

In [None]:
# Random Forest Model
rfc = RandomForestClassifier(n_estimators=100, n_jobs=-1)
rfc.fit(X_train, y_train)

In [None]:
# Model predictions
predictions = rfc.predict(X_test)
# Accuracy score
rfc_score = rfc.score(X_test, y_test) * 100
rfc_score

In [None]:
# Displaying confusion matrix after oversampling data
cf_matrix = confusion_matrix(y_test, predictions)
plt.title('Confusion Matrix - Random Forest')
sns.heatmap(cf_matrix, annot=True, cmap='Blues', fmt=".0f")

In [None]:
# Displaying metrics
display_metrics(y_test, predictions)

## User Input

In [None]:
# Method handles the user input and user interface portion
def display_ui():
    print("After training and testing the model, the program will now accurately predict a transaction as genuine or fraudulent\n")
    print("Enter 'g' to randomly select a genuine transaction from the list of transactions. The model will then predict the class")
    print("Enter 'f' to randomly select a fraudulent transaction from the list of transactions. The model will then predict the class")
    print("Enter 'r' to randomly select any type of transaction for the model to predict. The model will then predict the class")
    print("\n0 is a genuine transaction | 1 is a fraudulent transaction\n")
    user_input = input("Enter Option - ").lower()
    rand_transaction = None
    if user_input == 'g': # genuine transaction
        rand_transaction = (cc_df[cc_df['Class'] == 0].sample()).drop('Class', axis=1)
        print(f"******Transaction Data******\n{rand_transaction.iloc[0]}")
        print("****************************")
        print(f">Predicted Value - {rfc.predict(rand_transaction)} 'genuine' | Actual Value - [0] 'genuine'")
    elif user_input == 'f': # fraud transaction
        rand_transaction = cc_df[cc_df['Class'] == 1].sample().drop('Class', axis=1)
        print(f"******Transaction Data******\n{rand_transaction.iloc[0]}")
        print("****************************")
        print(f">Predicted Value - {rfc.predict(rand_transaction)} 'fraudulent' | Actual Value - [1] 'fraudulent'")
    elif user_input == 'r': # random transaction
        rand_transaction = (cc_df[(cc_df['Class'] == 1) | (cc_df['Class'] == 0)].sample())
        print(f"******Transaction Data******\n{rand_transaction.drop('Class', axis=1).iloc[0]}")
        print("****************************")
        actual_val = math.floor(rand_transaction.iloc[0][28])
        if actual_val == 0:
            print(f">Predicted Value - {rfc.predict(rand_transaction.drop('Class', axis=1))} 'genuine' | Actual Value - [{actual_val}] 'genuine'")
        else:
            print(f">Predicted Value - {rfc.predict(rand_transaction.drop('Class', axis=1))} 'fraudulent' | Actual Value - [{actual_val}] 'fraudulent'")
    else: # User entered an incorrect option
        print('Please run cell again an enter a correct option')

In [None]:
# Displaying UI method
display_ui()
# RUN THIS CELL AGAIN FOR THE MODEL TO MAKE ANOTHER PREDICTION