# Credit Card Fraud Detection Using Machine Learning

In [None]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
from scipy import stats
import tensorflow as tf
import seaborn as sns
from pylab import rcParams
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.manifold import TSNE
from sklearn.metrics import classification_report, accuracy_score
from keras.models import Model, load_model
from keras.layers import Input, Dense
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras import regularizers, Sequential

In [None]:
sns.set(style='whitegrid', palette='muted', font_scale=1.5)
rcParams['figure.figsize'] = 14, 8
RANDOM_SEED = 42
LABELS = ["Normal", "Fraud"]

In [None]:
df = pd.read_csv("") #place the dataset

In [None]:
count_classes = pd.value_counts(df['Class'], sort= True)
count_classes.plot(kind='bar', rot=0)
plt.title("Transaction Class Distribution")
plt.xticks(range(2), LABELS)
plt.xlabel('Class')
plt.ylabel('Frequency')

In [None]:
frauds = df[df.Class == 1]
normal = df[df.Class == 0]
frauds.shape

# Checking the Amount of Money involved in each kind of transaction

In [None]:
frauds.Amount.describe()

In [None]:
normal.Amount.describe()

In [None]:
f, (ax1, ax2) = plt.subplots(2, 1, sharex = True)
f.suptitle('Amount per transaction by class')
bins = 50

ax1.hist(frauds.Amount, bins = bins)
ax1.set_title('Fraud')
ax2.hist(normal.Amount, bins = bins)
ax2.set_title('Normal')

plt.xlabel('Amount')
plt.ylabel('Number of transactions')
plt.xlim(0, 20000)
plt.yscale('log')
plt.show()

# Plotting time of transaction to check for correlation

In [None]:
f,(ax1, ax2) = plt.subplots(2, 1, sharex = True)
f.suptitle('Time of Transaction Vs Amount by Class')

ax1.scatter(frauds.Time, frauds.Amount)
ax1.set_title('Fraud')

ax2.scatter(normal.Time, normal.Amount)
ax2.set_title('Normal')

plt.xlabel('Time in Seconds')
plt.ylabel('Amount')
plt.show()

In [None]:
data = df.drop(['Time'], axis = 1)

# Scaling the amount using StandardScaler

In [None]:
from sklearn.preprocessing import StandardScaler
data['Amount'] = StandardScaler().fit_transform(data['Amount'].values.reshape(-1, 1))

# Building the Model

In [None]:
import pandas as pd

non_fraud = data[data['Class'] == 0]
fraud = data[data['Class'] == 1]
df = pd.concat([non_fraud, fraud]).sample(frac=1).reset_index(drop=True)
x = df.drop(['Class'], axis=1).values
y = df["Class"].values

# Spliting the data 

In [None]:
x_train, x_test, = train_test_split(data, test_size = .2, random_state= RANDOM_SEED)
x_train_fraud = x_train[x_train.Class==1]
x_train = x_train[x_train.Class ==0]
x_train = x_train.drop(['Class'], axis =1)
y_test = x_test['Class']
x_test = x_test.drop(['Class'], axis =1)
x_train = x_train.values
x_test = x_test.values

# Autoencoder Model

In [None]:
input_layer = Input(shape=(x.shape[1],))

encoded = Dense(100, activation='tanh', activity_regularizer = regularizers.l1(10e-5))(input_layer)
encoded = Dense(50, activation = 'relu')(encoded)

decoded = Dense(50, activation= 'tanh')(encoded)
decoded = Dense(100, activation='tanh')(decoded)

output_layer = Dense(x.shape[1], activation='relu')(decoded)

# Training the Model

In [None]:
autoencoder = Model(input_layer, output_layer)
autoencoder.compile(optimizer="adadelta", loss="mse")

# Scaling the Values

In [None]:
x = data.drop(["Class"], axis=1)
y = data["Class"].values

x_scale = MinMaxScaler().fit_transform(x.values)
x_norm, x_fraud = x_scale[y == 0], x_scale[y==1]

autoencoder.fit(x_norm[0:2000], x_norm[0:2000], batch_size=256, epochs=10, shuffle=True, validation_split=0.2);

# Obtain the Hidden Representation

In [None]:
hidden_representation = Sequential()
hidden_representation.add(autoencoder.layers[0])
hidden_representation.add(autoencoder.layers[1])
hidden_representation.add(autoencoder.layers[2])

# Model Prediction

In [None]:
x = hidden_representation.predict(x_norm[:3000])
y = hidden_representation.predict(x_fraud)

In [None]:
rep_x = np.append(x, y, axis =0)
y_n = np.zeros(x.shape[0])
y_f = np.ones(y.shape[0])
rep_y = np.append(y_n, y_f)

In [None]:
train_x, val_x, train_y, val_y = train_test_split(rep_x, rep_y, test_size = .25)

# Credit Card Fraud Detection Prediction Model

In [None]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(solver="lbfgs").fit(train_x, train_y)
pred_y = clf.predict(val_x)

print("")
print("Calsssification Report: ")
print(classification_report(val_y, pred_y))

print("")
print("Accuracy_score:", accuracy_score(val_y, val_y))